3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
42 #include <linux/uaccess.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
65 #include <net/tcp_memcontrol.h>
66 #include <net/busy_poll.h>
68 #include <linux/proc_fs.h>
69 #include <linux/seq_file.h>
71 #include <linux/crypto.h>
72 #include <linux/scatterlist.h>
74 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
75 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
76 struct request_sock *req);
78 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
80 static const struct inet_connection_sock_af_ops ipv6_mapped;
81 static const struct inet_connection_sock_af_ops ipv6_specific;
82 #ifdef CONFIG_TCP_MD5SIG
83 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
84 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
86 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
87 const struct in6_addr *addr)
93 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
95 struct dst_entry *dst = skb_dst(skb);
96 const struct rt6_info *rt = (const struct rt6_info *)dst;
100 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
102 inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
105 static void tcp_v6_hash(struct sock *sk)
107 if (sk->sk_state != TCP_CLOSE) {
108 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
113 __inet6_hash(sk, NULL);
118 static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
120 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
121 ipv6_hdr(skb)->saddr.s6_addr32,
123 tcp_hdr(skb)->source);
126 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
129 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
130 struct inet_sock *inet = inet_sk(sk);
131 struct inet_connection_sock *icsk = inet_csk(sk);
132 struct ipv6_pinfo *np = inet6_sk(sk);
133 struct tcp_sock *tp = tcp_sk(sk);
134 struct in6_addr *saddr = NULL, *final_p, final;
137 struct dst_entry *dst;
141 if (addr_len < SIN6_LEN_RFC2133)
144 if (usin->sin6_family != AF_INET6)
145 return -EAFNOSUPPORT;
147 memset(&fl6, 0, sizeof(fl6));
150 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
151 IP6_ECN_flow_init(fl6.flowlabel);
152 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
153 struct ip6_flowlabel *flowlabel;
154 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
155 if (flowlabel == NULL)
157 fl6_sock_release(flowlabel);
162 * connect() to INADDR_ANY means loopback (BSD'ism).
165 if (ipv6_addr_any(&usin->sin6_addr))
166 usin->sin6_addr.s6_addr[15] = 0x1;
168 addr_type = ipv6_addr_type(&usin->sin6_addr);
170 if (addr_type & IPV6_ADDR_MULTICAST)
173 if (addr_type&IPV6_ADDR_LINKLOCAL) {
174 if (addr_len >= sizeof(struct sockaddr_in6) &&
175 usin->sin6_scope_id) {
176 /* If interface is set while binding, indices
179 if (sk->sk_bound_dev_if &&
180 sk->sk_bound_dev_if != usin->sin6_scope_id)
183 sk->sk_bound_dev_if = usin->sin6_scope_id;
186 /* Connect to link-local address requires an interface */
187 if (!sk->sk_bound_dev_if)
191 if (tp->rx_opt.ts_recent_stamp &&
192 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
193 tp->rx_opt.ts_recent = 0;
194 tp->rx_opt.ts_recent_stamp = 0;
198 sk->sk_v6_daddr = usin->sin6_addr;
199 np->flow_label = fl6.flowlabel;
205 if (addr_type == IPV6_ADDR_MAPPED) {
206 u32 exthdrlen = icsk->icsk_ext_hdr_len;
207 struct sockaddr_in sin;
209 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
211 if (__ipv6_only_sock(sk))
214 sin.sin_family = AF_INET;
215 sin.sin_port = usin->sin6_port;
216 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
218 icsk->icsk_af_ops = &ipv6_mapped;
219 sk->sk_backlog_rcv = tcp_v4_do_rcv;
220 #ifdef CONFIG_TCP_MD5SIG
221 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
224 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
227 icsk->icsk_ext_hdr_len = exthdrlen;
228 icsk->icsk_af_ops = &ipv6_specific;
229 sk->sk_backlog_rcv = tcp_v6_do_rcv;
230 #ifdef CONFIG_TCP_MD5SIG
231 tp->af_specific = &tcp_sock_ipv6_specific;
235 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
236 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
237 &sk->sk_v6_rcv_saddr);
243 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
244 saddr = &sk->sk_v6_rcv_saddr;
246 fl6.flowi6_proto = IPPROTO_TCP;
247 fl6.daddr = sk->sk_v6_daddr;
248 fl6.saddr = saddr ? *saddr : np->saddr;
249 fl6.flowi6_oif = sk->sk_bound_dev_if;
250 fl6.flowi6_mark = sk->sk_mark;
251 fl6.fl6_dport = usin->sin6_port;
252 fl6.fl6_sport = inet->inet_sport;
254 final_p = fl6_update_dst(&fl6, np->opt, &final);
256 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
258 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
266 sk->sk_v6_rcv_saddr = *saddr;
269 /* set the source address */
271 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
273 sk->sk_gso_type = SKB_GSO_TCPV6;
274 __ip6_dst_store(sk, dst, NULL, NULL);
276 rt = (struct rt6_info *) dst;
277 if (tcp_death_row.sysctl_tw_recycle &&
278 !tp->rx_opt.ts_recent_stamp &&
279 ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr))
280 tcp_fetch_timewait_stamp(sk, dst);
282 icsk->icsk_ext_hdr_len = 0;
284 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
287 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
289 inet->inet_dport = usin->sin6_port;
291 tcp_set_state(sk, TCP_SYN_SENT);
292 err = inet6_hash_connect(&tcp_death_row, sk);
296 if (!tp->write_seq && likely(!tp->repair))
297 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
298 sk->sk_v6_daddr.s6_addr32,
302 err = tcp_connect(sk);
309 tcp_set_state(sk, TCP_CLOSE);
312 inet->inet_dport = 0;
313 sk->sk_route_caps = 0;
317 static void tcp_v6_mtu_reduced(struct sock *sk)
319 struct dst_entry *dst;
321 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
324 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
328 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
329 tcp_sync_mss(sk, dst_mtu(dst));
330 tcp_simple_retransmit(sk);
334 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
335 u8 type, u8 code, int offset, __be32 info)
337 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
338 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
339 struct ipv6_pinfo *np;
343 struct request_sock *fastopen;
345 struct net *net = dev_net(skb->dev);
347 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
348 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
351 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
356 if (sk->sk_state == TCP_TIME_WAIT) {
357 inet_twsk_put(inet_twsk(sk));
362 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
363 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
365 if (sk->sk_state == TCP_CLOSE)
368 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
369 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
374 seq = ntohl(th->seq);
375 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
376 fastopen = tp->fastopen_rsk;
377 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
378 if (sk->sk_state != TCP_LISTEN &&
379 !between(seq, snd_una, tp->snd_nxt)) {
380 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
386 if (type == NDISC_REDIRECT) {
387 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
390 dst->ops->redirect(dst, sk, skb);
394 if (type == ICMPV6_PKT_TOOBIG) {
395 /* We are not interested in TCP_LISTEN and open_requests
396 * (SYN-ACKs send out by Linux are always <576bytes so
397 * they should go through unfragmented).
399 if (sk->sk_state == TCP_LISTEN)
402 if (!ip6_sk_accept_pmtu(sk))
405 tp->mtu_info = ntohl(info);
406 if (!sock_owned_by_user(sk))
407 tcp_v6_mtu_reduced(sk);
408 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
414 icmpv6_err_convert(type, code, &err);
416 /* Might be for an request_sock */
417 switch (sk->sk_state) {
418 struct request_sock *req, **prev;
420 if (sock_owned_by_user(sk))
423 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
424 &hdr->saddr, inet6_iif(skb));
428 /* ICMPs are not backlogged, hence we cannot get
429 * an established socket here.
431 WARN_ON(req->sk != NULL);
433 if (seq != tcp_rsk(req)->snt_isn) {
434 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
438 inet_csk_reqsk_queue_drop(sk, req, prev);
439 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
444 /* Only in fast or simultaneous open. If a fast open socket is
445 * is already accepted it is treated as a connected one below.
447 if (fastopen && fastopen->sk == NULL)
450 if (!sock_owned_by_user(sk)) {
452 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
456 sk->sk_err_soft = err;
460 if (!sock_owned_by_user(sk) && np->recverr) {
462 sk->sk_error_report(sk);
464 sk->sk_err_soft = err;
472 static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
474 struct request_sock *req,
476 struct tcp_fastopen_cookie *foc)
478 struct inet_request_sock *ireq = inet_rsk(req);
479 struct ipv6_pinfo *np = inet6_sk(sk);
480 struct flowi6 *fl6 = &fl->u.ip6;
484 /* First, grab a route. */
485 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
488 skb = tcp_make_synack(sk, dst, req, foc);
491 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
492 &ireq->ir_v6_rmt_addr);
494 fl6->daddr = ireq->ir_v6_rmt_addr;
495 if (np->repflow && (ireq->pktopts != NULL))
496 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
498 skb_set_queue_mapping(skb, queue_mapping);
499 err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
500 err = net_xmit_eval(err);
501 if (!tcp_rsk(req)->snt_synack && !err)
502 tcp_rsk(req)->snt_synack = tcp_time_stamp;
510 static void tcp_v6_reqsk_destructor(struct request_sock *req)
512 kfree_skb(inet_rsk(req)->pktopts);
515 #ifdef CONFIG_TCP_MD5SIG
516 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
517 const struct in6_addr *addr)
519 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
522 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
523 struct sock *addr_sk)
525 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
528 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
529 struct request_sock *req)
531 return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr);
534 static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
537 struct tcp_md5sig cmd;
538 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
540 if (optlen < sizeof(cmd))
543 if (copy_from_user(&cmd, optval, sizeof(cmd)))
546 if (sin6->sin6_family != AF_INET6)
549 if (!cmd.tcpm_keylen) {
550 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
551 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
553 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
557 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
560 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
561 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
562 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
564 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
565 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
568 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
569 const struct in6_addr *daddr,
570 const struct in6_addr *saddr, int nbytes)
572 struct tcp6_pseudohdr *bp;
573 struct scatterlist sg;
575 bp = &hp->md5_blk.ip6;
576 /* 1. TCP pseudo-header (RFC2460) */
579 bp->protocol = cpu_to_be32(IPPROTO_TCP);
580 bp->len = cpu_to_be32(nbytes);
582 sg_init_one(&sg, bp, sizeof(*bp));
583 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
586 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
587 const struct in6_addr *daddr, struct in6_addr *saddr,
588 const struct tcphdr *th)
590 struct tcp_md5sig_pool *hp;
591 struct hash_desc *desc;
593 hp = tcp_get_md5sig_pool();
595 goto clear_hash_noput;
596 desc = &hp->md5_desc;
598 if (crypto_hash_init(desc))
600 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
602 if (tcp_md5_hash_header(hp, th))
604 if (tcp_md5_hash_key(hp, key))
606 if (crypto_hash_final(desc, md5_hash))
609 tcp_put_md5sig_pool();
613 tcp_put_md5sig_pool();
615 memset(md5_hash, 0, 16);
619 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
620 const struct sock *sk,
621 const struct request_sock *req,
622 const struct sk_buff *skb)
624 const struct in6_addr *saddr, *daddr;
625 struct tcp_md5sig_pool *hp;
626 struct hash_desc *desc;
627 const struct tcphdr *th = tcp_hdr(skb);
630 saddr = &inet6_sk(sk)->saddr;
631 daddr = &sk->sk_v6_daddr;
633 saddr = &inet_rsk(req)->ir_v6_loc_addr;
634 daddr = &inet_rsk(req)->ir_v6_rmt_addr;
636 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
637 saddr = &ip6h->saddr;
638 daddr = &ip6h->daddr;
641 hp = tcp_get_md5sig_pool();
643 goto clear_hash_noput;
644 desc = &hp->md5_desc;
646 if (crypto_hash_init(desc))
649 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
651 if (tcp_md5_hash_header(hp, th))
653 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
655 if (tcp_md5_hash_key(hp, key))
657 if (crypto_hash_final(desc, md5_hash))
660 tcp_put_md5sig_pool();
664 tcp_put_md5sig_pool();
666 memset(md5_hash, 0, 16);
670 static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
672 const __u8 *hash_location = NULL;
673 struct tcp_md5sig_key *hash_expected;
674 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
675 const struct tcphdr *th = tcp_hdr(skb);
679 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
680 hash_location = tcp_parse_md5sig_option(th);
682 /* We've parsed the options - do we have a hash? */
683 if (!hash_expected && !hash_location)
686 if (hash_expected && !hash_location) {
687 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
691 if (!hash_expected && hash_location) {
692 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
696 /* check the signature */
697 genhash = tcp_v6_md5_hash_skb(newhash,
701 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
702 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
703 genhash ? "failed" : "mismatch",
704 &ip6h->saddr, ntohs(th->source),
705 &ip6h->daddr, ntohs(th->dest));
712 static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
715 struct inet_request_sock *ireq = inet_rsk(req);
716 struct ipv6_pinfo *np = inet6_sk(sk);
718 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
719 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
721 ireq->ir_iif = sk->sk_bound_dev_if;
723 /* So that link locals have meaning */
724 if (!sk->sk_bound_dev_if &&
725 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
726 ireq->ir_iif = inet6_iif(skb);
728 if (!TCP_SKB_CB(skb)->when &&
729 (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo ||
730 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
731 np->rxopt.bits.rxohlim || np->repflow)) {
732 atomic_inc(&skb->users);
737 static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl,
738 const struct request_sock *req,
743 return inet6_csk_route_req(sk, &fl->u.ip6, req);
746 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
748 .obj_size = sizeof(struct tcp6_request_sock),
749 .rtx_syn_ack = tcp_rtx_synack,
750 .send_ack = tcp_v6_reqsk_send_ack,
751 .destructor = tcp_v6_reqsk_destructor,
752 .send_reset = tcp_v6_send_reset,
753 .syn_ack_timeout = tcp_syn_ack_timeout,
756 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
757 #ifdef CONFIG_TCP_MD5SIG
758 .md5_lookup = tcp_v6_reqsk_md5_lookup,
759 .calc_md5_hash = tcp_v6_md5_hash_skb,
761 .init_req = tcp_v6_init_req,
762 #ifdef CONFIG_SYN_COOKIES
763 .cookie_init_seq = cookie_v6_init_sequence,
765 .route_req = tcp_v6_route_req,
766 .init_seq = tcp_v6_init_sequence,
767 .send_synack = tcp_v6_send_synack,
770 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
771 u32 tsval, u32 tsecr, int oif,
772 struct tcp_md5sig_key *key, int rst, u8 tclass,
775 const struct tcphdr *th = tcp_hdr(skb);
777 struct sk_buff *buff;
779 struct net *net = dev_net(skb_dst(skb)->dev);
780 struct sock *ctl_sk = net->ipv6.tcp_sk;
781 unsigned int tot_len = sizeof(struct tcphdr);
782 struct dst_entry *dst;
786 tot_len += TCPOLEN_TSTAMP_ALIGNED;
787 #ifdef CONFIG_TCP_MD5SIG
789 tot_len += TCPOLEN_MD5SIG_ALIGNED;
792 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
797 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
799 t1 = (struct tcphdr *) skb_push(buff, tot_len);
800 skb_reset_transport_header(buff);
802 /* Swap the send and the receive. */
803 memset(t1, 0, sizeof(*t1));
804 t1->dest = th->source;
805 t1->source = th->dest;
806 t1->doff = tot_len / 4;
807 t1->seq = htonl(seq);
808 t1->ack_seq = htonl(ack);
809 t1->ack = !rst || !th->ack;
811 t1->window = htons(win);
813 topt = (__be32 *)(t1 + 1);
816 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
817 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
818 *topt++ = htonl(tsval);
819 *topt++ = htonl(tsecr);
822 #ifdef CONFIG_TCP_MD5SIG
824 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
825 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
826 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
827 &ipv6_hdr(skb)->saddr,
828 &ipv6_hdr(skb)->daddr, t1);
832 memset(&fl6, 0, sizeof(fl6));
833 fl6.daddr = ipv6_hdr(skb)->saddr;
834 fl6.saddr = ipv6_hdr(skb)->daddr;
835 fl6.flowlabel = label;
837 buff->ip_summed = CHECKSUM_PARTIAL;
840 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
842 fl6.flowi6_proto = IPPROTO_TCP;
843 if (rt6_need_strict(&fl6.daddr) && !oif)
844 fl6.flowi6_oif = inet6_iif(skb);
846 fl6.flowi6_oif = oif;
847 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
848 fl6.fl6_dport = t1->dest;
849 fl6.fl6_sport = t1->source;
850 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
852 /* Pass a socket to ip6_dst_lookup either it is for RST
853 * Underlying function will use this to retrieve the network
856 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
858 skb_dst_set(buff, dst);
859 ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
860 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
862 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
869 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
871 const struct tcphdr *th = tcp_hdr(skb);
872 u32 seq = 0, ack_seq = 0;
873 struct tcp_md5sig_key *key = NULL;
874 #ifdef CONFIG_TCP_MD5SIG
875 const __u8 *hash_location = NULL;
876 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
877 unsigned char newhash[16];
879 struct sock *sk1 = NULL;
886 if (!ipv6_unicast_destination(skb))
889 #ifdef CONFIG_TCP_MD5SIG
890 hash_location = tcp_parse_md5sig_option(th);
891 if (!sk && hash_location) {
893 * active side is lost. Try to find listening socket through
894 * source port, and then find md5 key through listening socket.
895 * we are not loose security here:
896 * Incoming packet is checked with md5 hash with finding key,
897 * no RST generated if md5 hash doesn't match.
899 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
900 &tcp_hashinfo, &ipv6h->saddr,
901 th->source, &ipv6h->daddr,
902 ntohs(th->source), inet6_iif(skb));
907 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
911 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
912 if (genhash || memcmp(hash_location, newhash, 16) != 0)
915 key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
920 seq = ntohl(th->ack_seq);
922 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
925 oif = sk ? sk->sk_bound_dev_if : 0;
926 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
928 #ifdef CONFIG_TCP_MD5SIG
937 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
938 u32 win, u32 tsval, u32 tsecr, int oif,
939 struct tcp_md5sig_key *key, u8 tclass,
942 tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass,
946 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
948 struct inet_timewait_sock *tw = inet_twsk(sk);
949 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
951 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
952 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
953 tcp_time_stamp + tcptw->tw_ts_offset,
954 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
955 tw->tw_tclass, (tw->tw_flowlabel << 12));
960 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
961 struct request_sock *req)
963 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
964 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
966 tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
967 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
968 tcp_rsk(req)->rcv_nxt,
969 req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
970 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
975 static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
977 struct request_sock *req, **prev;
978 const struct tcphdr *th = tcp_hdr(skb);
981 /* Find possible connection requests. */
982 req = inet6_csk_search_req(sk, &prev, th->source,
983 &ipv6_hdr(skb)->saddr,
984 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
986 return tcp_check_req(sk, skb, req, prev, false);
988 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
989 &ipv6_hdr(skb)->saddr, th->source,
990 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
993 if (nsk->sk_state != TCP_TIME_WAIT) {
997 inet_twsk_put(inet_twsk(nsk));
1001 #ifdef CONFIG_SYN_COOKIES
1003 sk = cookie_v6_check(sk, skb);
1008 /* FIXME: this is substantially similar to the ipv4 code.
1009 * Can some kind of merge be done? -- erics
1011 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1013 struct tcp_options_received tmp_opt;
1014 struct request_sock *req;
1015 struct inet_request_sock *ireq;
1016 struct tcp_sock *tp = tcp_sk(sk);
1017 __u32 isn = TCP_SKB_CB(skb)->when;
1018 struct dst_entry *dst = NULL;
1019 struct tcp_fastopen_cookie foc = { .len = -1 };
1020 bool want_cookie = false, fastopen;
1022 const struct tcp_request_sock_ops *af_ops;
1025 if (skb->protocol == htons(ETH_P_IP))
1026 return tcp_v4_conn_request(sk, skb);
1028 if (!ipv6_unicast_destination(skb))
1031 if ((sysctl_tcp_syncookies == 2 ||
1032 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
1033 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
1038 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
1039 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1043 req = inet_reqsk_alloc(&tcp6_request_sock_ops);
1047 af_ops = tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1049 tcp_clear_options(&tmp_opt);
1050 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1051 tmp_opt.user_mss = tp->rx_opt.user_mss;
1052 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1054 if (want_cookie && !tmp_opt.saw_tstamp)
1055 tcp_clear_options(&tmp_opt);
1057 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1058 tcp_openreq_init(req, &tmp_opt, skb, sk);
1060 ireq = inet_rsk(req);
1061 af_ops->init_req(req, sk, skb);
1063 if (security_inet_conn_request(sk, skb, req))
1064 goto drop_and_release;
1066 if (!want_cookie || tmp_opt.tstamp_ok)
1067 TCP_ECN_create_request(req, skb, sock_net(sk));
1070 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
1071 req->cookie_ts = tmp_opt.tstamp_ok;
1073 /* VJ's idea. We save last timestamp seen
1074 * from the destination in peer table, when entering
1075 * state TIME-WAIT, and check against it before
1076 * accepting new connection request.
1078 * If "isn" is not zero, this request hit alive
1079 * timewait bucket, so that all the necessary checks
1080 * are made in the function processing timewait state.
1082 if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle) {
1083 dst = af_ops->route_req(sk, (struct flowi *)&fl6, req,
1085 if (dst && !tcp_peer_is_proven(req, dst, true)) {
1086 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1087 goto drop_and_release;
1090 /* Kill the following clause, if you dislike this way. */
1091 else if (!sysctl_tcp_syncookies &&
1092 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1093 (sysctl_max_syn_backlog >> 2)) &&
1094 !tcp_peer_is_proven(req, dst, false)) {
1095 /* Without syncookies last quarter of
1096 * backlog is filled with destinations,
1097 * proven to be alive.
1098 * It means that we continue to communicate
1099 * to destinations, already remembered
1100 * to the moment of synflood.
1102 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
1103 &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source));
1104 goto drop_and_release;
1107 isn = af_ops->init_seq(skb);
1111 dst = af_ops->route_req(sk, (struct flowi *)&fl6, req, NULL);
1116 tcp_rsk(req)->snt_isn = isn;
1117 tcp_openreq_init_rwin(req, sk, dst);
1118 fastopen = !want_cookie &&
1119 tcp_try_fastopen(sk, skb, req, &foc, dst);
1120 err = af_ops->send_synack(sk, dst, (struct flowi *)&fl6, req,
1121 skb_get_queue_mapping(skb), &foc);
1123 if (err || want_cookie)
1126 tcp_rsk(req)->listener = NULL;
1127 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1136 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1137 return 0; /* don't send reset */
1140 static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1141 struct request_sock *req,
1142 struct dst_entry *dst)
1144 struct inet_request_sock *ireq;
1145 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1146 struct tcp6_sock *newtcp6sk;
1147 struct inet_sock *newinet;
1148 struct tcp_sock *newtp;
1150 #ifdef CONFIG_TCP_MD5SIG
1151 struct tcp_md5sig_key *key;
1155 if (skb->protocol == htons(ETH_P_IP)) {
1160 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1165 newtcp6sk = (struct tcp6_sock *)newsk;
1166 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1168 newinet = inet_sk(newsk);
1169 newnp = inet6_sk(newsk);
1170 newtp = tcp_sk(newsk);
1172 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1174 ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);
1176 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
1178 newsk->sk_v6_rcv_saddr = newnp->saddr;
1180 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1181 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1182 #ifdef CONFIG_TCP_MD5SIG
1183 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1186 newnp->ipv6_ac_list = NULL;
1187 newnp->ipv6_fl_list = NULL;
1188 newnp->pktoptions = NULL;
1190 newnp->mcast_oif = inet6_iif(skb);
1191 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1192 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1194 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1197 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1198 * here, tcp_create_openreq_child now does this for us, see the comment in
1199 * that function for the gory details. -acme
1202 /* It is tricky place. Until this moment IPv4 tcp
1203 worked with IPv6 icsk.icsk_af_ops.
1206 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1211 ireq = inet_rsk(req);
1213 if (sk_acceptq_is_full(sk))
1217 dst = inet6_csk_route_req(sk, &fl6, req);
1222 newsk = tcp_create_openreq_child(sk, req, skb);
1227 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1228 * count here, tcp_create_openreq_child now does this for us, see the
1229 * comment in that function for the gory details. -acme
1232 newsk->sk_gso_type = SKB_GSO_TCPV6;
1233 __ip6_dst_store(newsk, dst, NULL, NULL);
1234 inet6_sk_rx_dst_set(newsk, skb);
1236 newtcp6sk = (struct tcp6_sock *)newsk;
1237 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1239 newtp = tcp_sk(newsk);
1240 newinet = inet_sk(newsk);
1241 newnp = inet6_sk(newsk);
1243 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1245 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1246 newnp->saddr = ireq->ir_v6_loc_addr;
1247 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1248 newsk->sk_bound_dev_if = ireq->ir_iif;
1250 /* Now IPv6 options...
1252 First: no IPv4 options.
1254 newinet->inet_opt = NULL;
1255 newnp->ipv6_ac_list = NULL;
1256 newnp->ipv6_fl_list = NULL;
1259 newnp->rxopt.all = np->rxopt.all;
1261 /* Clone pktoptions received with SYN */
1262 newnp->pktoptions = NULL;
1263 if (ireq->pktopts != NULL) {
1264 newnp->pktoptions = skb_clone(ireq->pktopts,
1265 sk_gfp_atomic(sk, GFP_ATOMIC));
1266 consume_skb(ireq->pktopts);
1267 ireq->pktopts = NULL;
1268 if (newnp->pktoptions)
1269 skb_set_owner_r(newnp->pktoptions, newsk);
1272 newnp->mcast_oif = inet6_iif(skb);
1273 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1274 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1276 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1278 /* Clone native IPv6 options from listening socket (if any)
1280 Yes, keeping reference count would be much more clever,
1281 but we make one more one thing there: reattach optmem
1285 newnp->opt = ipv6_dup_options(newsk, np->opt);
1287 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1289 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1290 newnp->opt->opt_flen);
1292 tcp_sync_mss(newsk, dst_mtu(dst));
1293 newtp->advmss = dst_metric_advmss(dst);
1294 if (tcp_sk(sk)->rx_opt.user_mss &&
1295 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1296 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1298 tcp_initialize_rcv_mss(newsk);
1300 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1301 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1303 #ifdef CONFIG_TCP_MD5SIG
1304 /* Copy over the MD5 key from the original socket */
1305 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1307 /* We're using one, so create a matching key
1308 * on the newsk structure. If we fail to get
1309 * memory, then we end up not copying the key
1312 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1313 AF_INET6, key->key, key->keylen,
1314 sk_gfp_atomic(sk, GFP_ATOMIC));
1318 if (__inet_inherit_port(sk, newsk) < 0) {
1319 inet_csk_prepare_forced_close(newsk);
1323 __inet6_hash(newsk, NULL);
1328 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1332 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1336 /* The socket must have it's spinlock held when we get
1339 * We have a potential double-lock case here, so even when
1340 * doing backlog processing we use the BH locking scheme.
1341 * This is because we cannot sleep with the original spinlock
1344 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1346 struct ipv6_pinfo *np = inet6_sk(sk);
1347 struct tcp_sock *tp;
1348 struct sk_buff *opt_skb = NULL;
1350 /* Imagine: socket is IPv6. IPv4 packet arrives,
1351 goes to IPv4 receive handler and backlogged.
1352 From backlog it always goes here. Kerboom...
1353 Fortunately, tcp_rcv_established and rcv_established
1354 handle them correctly, but it is not case with
1355 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1358 if (skb->protocol == htons(ETH_P_IP))
1359 return tcp_v4_do_rcv(sk, skb);
1361 #ifdef CONFIG_TCP_MD5SIG
1362 if (tcp_v6_inbound_md5_hash(sk, skb))
1366 if (sk_filter(sk, skb))
1370 * socket locking is here for SMP purposes as backlog rcv
1371 * is currently called with bh processing disabled.
1374 /* Do Stevens' IPV6_PKTOPTIONS.
1376 Yes, guys, it is the only place in our code, where we
1377 may make it not affecting IPv4.
1378 The rest of code is protocol independent,
1379 and I do not like idea to uglify IPv4.
1381 Actually, all the idea behind IPV6_PKTOPTIONS
1382 looks not very well thought. For now we latch
1383 options, received in the last packet, enqueued
1384 by tcp. Feel free to propose better solution.
1388 opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
1390 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1391 struct dst_entry *dst = sk->sk_rx_dst;
1393 sock_rps_save_rxhash(sk, skb);
1395 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1396 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1398 sk->sk_rx_dst = NULL;
1402 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1404 goto ipv6_pktoptions;
1408 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1411 if (sk->sk_state == TCP_LISTEN) {
1412 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1417 * Queue it on the new socket if the new socket is active,
1418 * otherwise we just shortcircuit this and continue with
1422 sock_rps_save_rxhash(nsk, skb);
1423 if (tcp_child_process(sk, nsk, skb))
1426 __kfree_skb(opt_skb);
1430 sock_rps_save_rxhash(sk, skb);
1432 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1435 goto ipv6_pktoptions;
1439 tcp_v6_send_reset(sk, skb);
1442 __kfree_skb(opt_skb);
1446 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
1447 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1452 /* Do you ask, what is it?
1454 1. skb was enqueued by tcp.
1455 2. skb is added to tail of read queue, rather than out of order.
1456 3. socket is not in passive state.
1457 4. Finally, it really contains options, which user wants to receive.
1460 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1461 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1462 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1463 np->mcast_oif = inet6_iif(opt_skb);
1464 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1465 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1466 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1467 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1469 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1470 if (ipv6_opt_accepted(sk, opt_skb)) {
1471 skb_set_owner_r(opt_skb, sk);
1472 opt_skb = xchg(&np->pktoptions, opt_skb);
1474 __kfree_skb(opt_skb);
1475 opt_skb = xchg(&np->pktoptions, NULL);
1483 static int tcp_v6_rcv(struct sk_buff *skb)
1485 const struct tcphdr *th;
1486 const struct ipv6hdr *hdr;
1489 struct net *net = dev_net(skb->dev);
1491 if (skb->pkt_type != PACKET_HOST)
1495 * Count it even if it's bad.
1497 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1499 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1504 if (th->doff < sizeof(struct tcphdr)/4)
1506 if (!pskb_may_pull(skb, th->doff*4))
1509 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1513 hdr = ipv6_hdr(skb);
1514 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1515 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1516 skb->len - th->doff*4);
1517 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1518 TCP_SKB_CB(skb)->when = 0;
1519 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1520 TCP_SKB_CB(skb)->sacked = 0;
1522 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1527 if (sk->sk_state == TCP_TIME_WAIT)
1530 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1531 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1532 goto discard_and_relse;
1535 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1536 goto discard_and_relse;
1538 if (sk_filter(sk, skb))
1539 goto discard_and_relse;
1541 sk_mark_napi_id(sk, skb);
1544 bh_lock_sock_nested(sk);
1546 if (!sock_owned_by_user(sk)) {
1547 #ifdef CONFIG_NET_DMA
1548 struct tcp_sock *tp = tcp_sk(sk);
1549 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1550 tp->ucopy.dma_chan = net_dma_find_channel();
1551 if (tp->ucopy.dma_chan)
1552 ret = tcp_v6_do_rcv(sk, skb);
1556 if (!tcp_prequeue(sk, skb))
1557 ret = tcp_v6_do_rcv(sk, skb);
1559 } else if (unlikely(sk_add_backlog(sk, skb,
1560 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1562 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1563 goto discard_and_relse;
1568 return ret ? -1 : 0;
1571 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1574 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1576 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1578 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1580 tcp_v6_send_reset(NULL, skb);
1592 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1593 inet_twsk_put(inet_twsk(sk));
1597 if (skb->len < (th->doff<<2)) {
1598 inet_twsk_put(inet_twsk(sk));
1601 if (tcp_checksum_complete(skb)) {
1602 inet_twsk_put(inet_twsk(sk));
1606 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1611 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1612 &ipv6_hdr(skb)->saddr, th->source,
1613 &ipv6_hdr(skb)->daddr,
1614 ntohs(th->dest), inet6_iif(skb));
1616 struct inet_timewait_sock *tw = inet_twsk(sk);
1617 inet_twsk_deschedule(tw, &tcp_death_row);
1622 /* Fall through to ACK */
1625 tcp_v6_timewait_ack(sk, skb);
1629 case TCP_TW_SUCCESS:
1635 static void tcp_v6_early_demux(struct sk_buff *skb)
1637 const struct ipv6hdr *hdr;
1638 const struct tcphdr *th;
1641 if (skb->pkt_type != PACKET_HOST)
1644 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1647 hdr = ipv6_hdr(skb);
1650 if (th->doff < sizeof(struct tcphdr) / 4)
1653 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1654 &hdr->saddr, th->source,
1655 &hdr->daddr, ntohs(th->dest),
1659 skb->destructor = sock_edemux;
1660 if (sk->sk_state != TCP_TIME_WAIT) {
1661 struct dst_entry *dst = sk->sk_rx_dst;
1664 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1666 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1667 skb_dst_set_noref(skb, dst);
1672 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1673 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1674 .twsk_unique = tcp_twsk_unique,
1675 .twsk_destructor = tcp_twsk_destructor,
1678 static const struct inet_connection_sock_af_ops ipv6_specific = {
1679 .queue_xmit = inet6_csk_xmit,
1680 .send_check = tcp_v6_send_check,
1681 .rebuild_header = inet6_sk_rebuild_header,
1682 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1683 .conn_request = tcp_v6_conn_request,
1684 .syn_recv_sock = tcp_v6_syn_recv_sock,
1685 .net_header_len = sizeof(struct ipv6hdr),
1686 .net_frag_header_len = sizeof(struct frag_hdr),
1687 .setsockopt = ipv6_setsockopt,
1688 .getsockopt = ipv6_getsockopt,
1689 .addr2sockaddr = inet6_csk_addr2sockaddr,
1690 .sockaddr_len = sizeof(struct sockaddr_in6),
1691 .bind_conflict = inet6_csk_bind_conflict,
1692 #ifdef CONFIG_COMPAT
1693 .compat_setsockopt = compat_ipv6_setsockopt,
1694 .compat_getsockopt = compat_ipv6_getsockopt,
1698 #ifdef CONFIG_TCP_MD5SIG
1699 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1700 .md5_lookup = tcp_v6_md5_lookup,
1701 .calc_md5_hash = tcp_v6_md5_hash_skb,
1702 .md5_parse = tcp_v6_parse_md5_keys,
1707 * TCP over IPv4 via INET6 API
1709 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1710 .queue_xmit = ip_queue_xmit,
1711 .send_check = tcp_v4_send_check,
1712 .rebuild_header = inet_sk_rebuild_header,
1713 .sk_rx_dst_set = inet_sk_rx_dst_set,
1714 .conn_request = tcp_v6_conn_request,
1715 .syn_recv_sock = tcp_v6_syn_recv_sock,
1716 .net_header_len = sizeof(struct iphdr),
1717 .setsockopt = ipv6_setsockopt,
1718 .getsockopt = ipv6_getsockopt,
1719 .addr2sockaddr = inet6_csk_addr2sockaddr,
1720 .sockaddr_len = sizeof(struct sockaddr_in6),
1721 .bind_conflict = inet6_csk_bind_conflict,
1722 #ifdef CONFIG_COMPAT
1723 .compat_setsockopt = compat_ipv6_setsockopt,
1724 .compat_getsockopt = compat_ipv6_getsockopt,
1728 #ifdef CONFIG_TCP_MD5SIG
1729 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1730 .md5_lookup = tcp_v4_md5_lookup,
1731 .calc_md5_hash = tcp_v4_md5_hash_skb,
1732 .md5_parse = tcp_v6_parse_md5_keys,
1736 /* NOTE: A lot of things set to zero explicitly by call to
1737 * sk_alloc() so need not be done here.
1739 static int tcp_v6_init_sock(struct sock *sk)
1741 struct inet_connection_sock *icsk = inet_csk(sk);
1745 icsk->icsk_af_ops = &ipv6_specific;
1747 #ifdef CONFIG_TCP_MD5SIG
1748 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1754 static void tcp_v6_destroy_sock(struct sock *sk)
1756 tcp_v4_destroy_sock(sk);
1757 inet6_destroy_sock(sk);
1760 #ifdef CONFIG_PROC_FS
1761 /* Proc filesystem TCPv6 sock list dumping. */
1762 static void get_openreq6(struct seq_file *seq,
1763 const struct sock *sk, struct request_sock *req, int i, kuid_t uid)
1765 int ttd = req->expires - jiffies;
1766 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1767 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1773 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1774 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1776 src->s6_addr32[0], src->s6_addr32[1],
1777 src->s6_addr32[2], src->s6_addr32[3],
1778 inet_rsk(req)->ir_num,
1779 dest->s6_addr32[0], dest->s6_addr32[1],
1780 dest->s6_addr32[2], dest->s6_addr32[3],
1781 ntohs(inet_rsk(req)->ir_rmt_port),
1783 0, 0, /* could print option size, but that is af dependent. */
1784 1, /* timers active (only the expire timer) */
1785 jiffies_to_clock_t(ttd),
1787 from_kuid_munged(seq_user_ns(seq), uid),
1788 0, /* non standard timer */
1789 0, /* open_requests have no inode */
1793 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1795 const struct in6_addr *dest, *src;
1798 unsigned long timer_expires;
1799 const struct inet_sock *inet = inet_sk(sp);
1800 const struct tcp_sock *tp = tcp_sk(sp);
1801 const struct inet_connection_sock *icsk = inet_csk(sp);
1802 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
1804 dest = &sp->sk_v6_daddr;
1805 src = &sp->sk_v6_rcv_saddr;
1806 destp = ntohs(inet->inet_dport);
1807 srcp = ntohs(inet->inet_sport);
1809 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1811 timer_expires = icsk->icsk_timeout;
1812 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1814 timer_expires = icsk->icsk_timeout;
1815 } else if (timer_pending(&sp->sk_timer)) {
1817 timer_expires = sp->sk_timer.expires;
1820 timer_expires = jiffies;
1824 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1825 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1827 src->s6_addr32[0], src->s6_addr32[1],
1828 src->s6_addr32[2], src->s6_addr32[3], srcp,
1829 dest->s6_addr32[0], dest->s6_addr32[1],
1830 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1832 tp->write_seq-tp->snd_una,
1833 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1835 jiffies_delta_to_clock_t(timer_expires - jiffies),
1836 icsk->icsk_retransmits,
1837 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1838 icsk->icsk_probes_out,
1840 atomic_read(&sp->sk_refcnt), sp,
1841 jiffies_to_clock_t(icsk->icsk_rto),
1842 jiffies_to_clock_t(icsk->icsk_ack.ato),
1843 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1845 sp->sk_state == TCP_LISTEN ?
1846 (fastopenq ? fastopenq->max_qlen : 0) :
1847 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1851 static void get_timewait6_sock(struct seq_file *seq,
1852 struct inet_timewait_sock *tw, int i)
1854 const struct in6_addr *dest, *src;
1856 s32 delta = tw->tw_ttd - inet_tw_time_stamp();
1858 dest = &tw->tw_v6_daddr;
1859 src = &tw->tw_v6_rcv_saddr;
1860 destp = ntohs(tw->tw_dport);
1861 srcp = ntohs(tw->tw_sport);
1864 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1865 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1867 src->s6_addr32[0], src->s6_addr32[1],
1868 src->s6_addr32[2], src->s6_addr32[3], srcp,
1869 dest->s6_addr32[0], dest->s6_addr32[1],
1870 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1871 tw->tw_substate, 0, 0,
1872 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1873 atomic_read(&tw->tw_refcnt), tw);
1876 static int tcp6_seq_show(struct seq_file *seq, void *v)
1878 struct tcp_iter_state *st;
1879 struct sock *sk = v;
1881 if (v == SEQ_START_TOKEN) {
1886 "st tx_queue rx_queue tr tm->when retrnsmt"
1887 " uid timeout inode\n");
1892 switch (st->state) {
1893 case TCP_SEQ_STATE_LISTENING:
1894 case TCP_SEQ_STATE_ESTABLISHED:
1895 if (sk->sk_state == TCP_TIME_WAIT)
1896 get_timewait6_sock(seq, v, st->num);
1898 get_tcp6_sock(seq, v, st->num);
1900 case TCP_SEQ_STATE_OPENREQ:
1901 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1908 static const struct file_operations tcp6_afinfo_seq_fops = {
1909 .owner = THIS_MODULE,
1910 .open = tcp_seq_open,
1912 .llseek = seq_lseek,
1913 .release = seq_release_net
1916 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1919 .seq_fops = &tcp6_afinfo_seq_fops,
1921 .show = tcp6_seq_show,
1925 int __net_init tcp6_proc_init(struct net *net)
1927 return tcp_proc_register(net, &tcp6_seq_afinfo);
1930 void tcp6_proc_exit(struct net *net)
1932 tcp_proc_unregister(net, &tcp6_seq_afinfo);
1936 static void tcp_v6_clear_sk(struct sock *sk, int size)
1938 struct inet_sock *inet = inet_sk(sk);
1940 /* we do not want to clear pinet6 field, because of RCU lookups */
1941 sk_prot_clear_nulls(sk, offsetof(struct inet_sock, pinet6));
1943 size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
1944 memset(&inet->pinet6 + 1, 0, size);
1947 struct proto tcpv6_prot = {
1949 .owner = THIS_MODULE,
1951 .connect = tcp_v6_connect,
1952 .disconnect = tcp_disconnect,
1953 .accept = inet_csk_accept,
1955 .init = tcp_v6_init_sock,
1956 .destroy = tcp_v6_destroy_sock,
1957 .shutdown = tcp_shutdown,
1958 .setsockopt = tcp_setsockopt,
1959 .getsockopt = tcp_getsockopt,
1960 .recvmsg = tcp_recvmsg,
1961 .sendmsg = tcp_sendmsg,
1962 .sendpage = tcp_sendpage,
1963 .backlog_rcv = tcp_v6_do_rcv,
1964 .release_cb = tcp_release_cb,
1965 .mtu_reduced = tcp_v6_mtu_reduced,
1966 .hash = tcp_v6_hash,
1967 .unhash = inet_unhash,
1968 .get_port = inet_csk_get_port,
1969 .enter_memory_pressure = tcp_enter_memory_pressure,
1970 .stream_memory_free = tcp_stream_memory_free,
1971 .sockets_allocated = &tcp_sockets_allocated,
1972 .memory_allocated = &tcp_memory_allocated,
1973 .memory_pressure = &tcp_memory_pressure,
1974 .orphan_count = &tcp_orphan_count,
1975 .sysctl_mem = sysctl_tcp_mem,
1976 .sysctl_wmem = sysctl_tcp_wmem,
1977 .sysctl_rmem = sysctl_tcp_rmem,
1978 .max_header = MAX_TCP_HEADER,
1979 .obj_size = sizeof(struct tcp6_sock),
1980 .slab_flags = SLAB_DESTROY_BY_RCU,
1981 .twsk_prot = &tcp6_timewait_sock_ops,
1982 .rsk_prot = &tcp6_request_sock_ops,
1983 .h.hashinfo = &tcp_hashinfo,
1984 .no_autobind = true,
1985 #ifdef CONFIG_COMPAT
1986 .compat_setsockopt = compat_tcp_setsockopt,
1987 .compat_getsockopt = compat_tcp_getsockopt,
1989 #ifdef CONFIG_MEMCG_KMEM
1990 .proto_cgroup = tcp_proto_cgroup,
1992 .clear_sk = tcp_v6_clear_sk,
1995 static const struct inet6_protocol tcpv6_protocol = {
1996 .early_demux = tcp_v6_early_demux,
1997 .handler = tcp_v6_rcv,
1998 .err_handler = tcp_v6_err,
1999 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2002 static struct inet_protosw tcpv6_protosw = {
2003 .type = SOCK_STREAM,
2004 .protocol = IPPROTO_TCP,
2005 .prot = &tcpv6_prot,
2006 .ops = &inet6_stream_ops,
2007 .flags = INET_PROTOSW_PERMANENT |
2011 static int __net_init tcpv6_net_init(struct net *net)
2013 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2014 SOCK_RAW, IPPROTO_TCP, net);
2017 static void __net_exit tcpv6_net_exit(struct net *net)
2019 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2022 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2024 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
2027 static struct pernet_operations tcpv6_net_ops = {
2028 .init = tcpv6_net_init,
2029 .exit = tcpv6_net_exit,
2030 .exit_batch = tcpv6_net_exit_batch,
2033 int __init tcpv6_init(void)
2037 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2041 /* register inet6 protocol */
2042 ret = inet6_register_protosw(&tcpv6_protosw);
2044 goto out_tcpv6_protocol;
2046 ret = register_pernet_subsys(&tcpv6_net_ops);
2048 goto out_tcpv6_protosw;
2053 inet6_unregister_protosw(&tcpv6_protosw);
2055 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2059 void tcpv6_exit(void)
2061 unregister_pernet_subsys(&tcpv6_net_ops);
2062 inet6_unregister_protosw(&tcpv6_protosw);
2063 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);