3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
42 #include <linux/uaccess.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
46 #include <linux/indirect_call_wrapper.h>
49 #include <net/ndisc.h>
50 #include <net/inet6_hashtables.h>
51 #include <net/inet6_connection_sock.h>
53 #include <net/transp_v6.h>
54 #include <net/addrconf.h>
55 #include <net/ip6_route.h>
56 #include <net/ip6_checksum.h>
57 #include <net/inet_ecn.h>
58 #include <net/protocol.h>
61 #include <net/dsfield.h>
62 #include <net/timewait_sock.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
65 #include <net/busy_poll.h>
67 #include <linux/proc_fs.h>
68 #include <linux/seq_file.h>
70 #include <crypto/hash.h>
71 #include <linux/scatterlist.h>
73 #include <trace/events/tcp.h>
75 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
76 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
77 struct request_sock *req);
79 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
81 static const struct inet_connection_sock_af_ops ipv6_mapped;
82 static const struct inet_connection_sock_af_ops ipv6_specific;
83 #ifdef CONFIG_TCP_MD5SIG
84 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
85 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
87 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
88 const struct in6_addr *addr)
94 /* Helper returning the inet6 address from a given tcp socket.
95 * It can be used in TCP stack instead of inet6_sk(sk).
96 * This avoids a dereference and allow compiler optimizations.
97 * It is a specialized version of inet6_sk_generic().
99 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
101 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
103 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
106 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
108 struct dst_entry *dst = skb_dst(skb);
110 if (dst && dst_hold_safe(dst)) {
111 const struct rt6_info *rt = (const struct rt6_info *)dst;
114 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
115 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
119 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
121 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
122 ipv6_hdr(skb)->saddr.s6_addr32,
124 tcp_hdr(skb)->source);
127 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
129 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
130 ipv6_hdr(skb)->saddr.s6_addr32);
133 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
136 /* This check is replicated from tcp_v6_connect() and intended to
137 * prevent BPF program called below from accessing bytes that are out
138 * of the bound specified by user in addr_len.
140 if (addr_len < SIN6_LEN_RFC2133)
143 sock_owned_by_me(sk);
145 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
148 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
151 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
152 struct inet_sock *inet = inet_sk(sk);
153 struct inet_connection_sock *icsk = inet_csk(sk);
154 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
155 struct tcp_sock *tp = tcp_sk(sk);
156 struct in6_addr *saddr = NULL, *final_p, final;
157 struct ipv6_txoptions *opt;
159 struct dst_entry *dst;
162 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
164 if (addr_len < SIN6_LEN_RFC2133)
167 if (usin->sin6_family != AF_INET6)
168 return -EAFNOSUPPORT;
170 memset(&fl6, 0, sizeof(fl6));
173 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
174 IP6_ECN_flow_init(fl6.flowlabel);
175 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
176 struct ip6_flowlabel *flowlabel;
177 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
180 fl6_sock_release(flowlabel);
185 * connect() to INADDR_ANY means loopback (BSD'ism).
188 if (ipv6_addr_any(&usin->sin6_addr)) {
189 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
190 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
193 usin->sin6_addr = in6addr_loopback;
196 addr_type = ipv6_addr_type(&usin->sin6_addr);
198 if (addr_type & IPV6_ADDR_MULTICAST)
201 if (addr_type&IPV6_ADDR_LINKLOCAL) {
202 if (addr_len >= sizeof(struct sockaddr_in6) &&
203 usin->sin6_scope_id) {
204 /* If interface is set while binding, indices
207 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
210 sk->sk_bound_dev_if = usin->sin6_scope_id;
213 /* Connect to link-local address requires an interface */
214 if (!sk->sk_bound_dev_if)
218 if (tp->rx_opt.ts_recent_stamp &&
219 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
220 tp->rx_opt.ts_recent = 0;
221 tp->rx_opt.ts_recent_stamp = 0;
225 sk->sk_v6_daddr = usin->sin6_addr;
226 np->flow_label = fl6.flowlabel;
232 if (addr_type & IPV6_ADDR_MAPPED) {
233 u32 exthdrlen = icsk->icsk_ext_hdr_len;
234 struct sockaddr_in sin;
236 if (__ipv6_only_sock(sk))
239 sin.sin_family = AF_INET;
240 sin.sin_port = usin->sin6_port;
241 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
243 icsk->icsk_af_ops = &ipv6_mapped;
244 sk->sk_backlog_rcv = tcp_v4_do_rcv;
245 #ifdef CONFIG_TCP_MD5SIG
246 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
249 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
252 icsk->icsk_ext_hdr_len = exthdrlen;
253 icsk->icsk_af_ops = &ipv6_specific;
254 sk->sk_backlog_rcv = tcp_v6_do_rcv;
255 #ifdef CONFIG_TCP_MD5SIG
256 tp->af_specific = &tcp_sock_ipv6_specific;
260 np->saddr = sk->sk_v6_rcv_saddr;
265 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
266 saddr = &sk->sk_v6_rcv_saddr;
268 fl6.flowi6_proto = IPPROTO_TCP;
269 fl6.daddr = sk->sk_v6_daddr;
270 fl6.saddr = saddr ? *saddr : np->saddr;
271 fl6.flowi6_oif = sk->sk_bound_dev_if;
272 fl6.flowi6_mark = sk->sk_mark;
273 fl6.fl6_dport = usin->sin6_port;
274 fl6.fl6_sport = inet->inet_sport;
275 fl6.flowi6_uid = sk->sk_uid;
277 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
278 final_p = fl6_update_dst(&fl6, opt, &final);
280 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
282 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
290 sk->sk_v6_rcv_saddr = *saddr;
293 /* set the source address */
295 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297 sk->sk_gso_type = SKB_GSO_TCPV6;
298 ip6_dst_store(sk, dst, NULL, NULL);
300 icsk->icsk_ext_hdr_len = 0;
302 icsk->icsk_ext_hdr_len = opt->opt_flen +
305 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307 inet->inet_dport = usin->sin6_port;
309 tcp_set_state(sk, TCP_SYN_SENT);
310 err = inet6_hash_connect(tcp_death_row, sk);
316 if (likely(!tp->repair)) {
318 tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
319 sk->sk_v6_daddr.s6_addr32,
322 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
324 sk->sk_v6_daddr.s6_addr32);
327 if (tcp_fastopen_defer_connect(sk, &err))
332 err = tcp_connect(sk);
339 tcp_set_state(sk, TCP_CLOSE);
341 inet->inet_dport = 0;
342 sk->sk_route_caps = 0;
346 static void tcp_v6_mtu_reduced(struct sock *sk)
348 struct dst_entry *dst;
350 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
353 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
357 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
358 tcp_sync_mss(sk, dst_mtu(dst));
359 tcp_simple_retransmit(sk);
363 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
364 u8 type, u8 code, int offset, __be32 info)
366 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
367 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
368 struct net *net = dev_net(skb->dev);
369 struct request_sock *fastopen;
370 struct ipv6_pinfo *np;
377 sk = __inet6_lookup_established(net, &tcp_hashinfo,
378 &hdr->daddr, th->dest,
379 &hdr->saddr, ntohs(th->source),
380 skb->dev->ifindex, inet6_sdif(skb));
383 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
388 if (sk->sk_state == TCP_TIME_WAIT) {
389 inet_twsk_put(inet_twsk(sk));
392 seq = ntohl(th->seq);
393 fatal = icmpv6_err_convert(type, code, &err);
394 if (sk->sk_state == TCP_NEW_SYN_RECV) {
395 tcp_req_err(sk, seq, fatal);
400 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
401 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
403 if (sk->sk_state == TCP_CLOSE)
406 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
407 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
412 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
413 fastopen = tp->fastopen_rsk;
414 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
415 if (sk->sk_state != TCP_LISTEN &&
416 !between(seq, snd_una, tp->snd_nxt)) {
417 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
421 np = tcp_inet6_sk(sk);
423 if (type == NDISC_REDIRECT) {
424 if (!sock_owned_by_user(sk)) {
425 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
428 dst->ops->redirect(dst, sk, skb);
433 if (type == ICMPV6_PKT_TOOBIG) {
434 /* We are not interested in TCP_LISTEN and open_requests
435 * (SYN-ACKs send out by Linux are always <576bytes so
436 * they should go through unfragmented).
438 if (sk->sk_state == TCP_LISTEN)
441 if (!ip6_sk_accept_pmtu(sk))
444 tp->mtu_info = ntohl(info);
445 if (!sock_owned_by_user(sk))
446 tcp_v6_mtu_reduced(sk);
447 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
454 /* Might be for an request_sock */
455 switch (sk->sk_state) {
458 /* Only in fast or simultaneous open. If a fast open socket is
459 * is already accepted it is treated as a connected one below.
461 if (fastopen && !fastopen->sk)
464 if (!sock_owned_by_user(sk)) {
466 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
470 sk->sk_err_soft = err;
474 if (!sock_owned_by_user(sk) && np->recverr) {
476 sk->sk_error_report(sk);
478 sk->sk_err_soft = err;
487 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
489 struct request_sock *req,
490 struct tcp_fastopen_cookie *foc,
491 enum tcp_synack_type synack_type)
493 struct inet_request_sock *ireq = inet_rsk(req);
494 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
495 struct ipv6_txoptions *opt;
496 struct flowi6 *fl6 = &fl->u.ip6;
500 /* First, grab a route. */
501 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
502 IPPROTO_TCP)) == NULL)
505 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
508 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
509 &ireq->ir_v6_rmt_addr);
511 fl6->daddr = ireq->ir_v6_rmt_addr;
512 if (np->repflow && ireq->pktopts)
513 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
516 opt = ireq->ipv6_opt;
518 opt = rcu_dereference(np->opt);
519 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
521 err = net_xmit_eval(err);
529 static void tcp_v6_reqsk_destructor(struct request_sock *req)
531 kfree(inet_rsk(req)->ipv6_opt);
532 kfree_skb(inet_rsk(req)->pktopts);
535 #ifdef CONFIG_TCP_MD5SIG
536 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
537 const struct in6_addr *addr)
539 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
542 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
543 const struct sock *addr_sk)
545 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
548 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
549 char __user *optval, int optlen)
551 struct tcp_md5sig cmd;
552 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
555 if (optlen < sizeof(cmd))
558 if (copy_from_user(&cmd, optval, sizeof(cmd)))
561 if (sin6->sin6_family != AF_INET6)
564 if (optname == TCP_MD5SIG_EXT &&
565 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
566 prefixlen = cmd.tcpm_prefixlen;
567 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
571 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
574 if (!cmd.tcpm_keylen) {
575 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
576 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
578 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
579 AF_INET6, prefixlen);
582 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
585 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
586 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
587 AF_INET, prefixlen, cmd.tcpm_key,
588 cmd.tcpm_keylen, GFP_KERNEL);
590 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
591 AF_INET6, prefixlen, cmd.tcpm_key,
592 cmd.tcpm_keylen, GFP_KERNEL);
595 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
596 const struct in6_addr *daddr,
597 const struct in6_addr *saddr,
598 const struct tcphdr *th, int nbytes)
600 struct tcp6_pseudohdr *bp;
601 struct scatterlist sg;
605 /* 1. TCP pseudo-header (RFC2460) */
608 bp->protocol = cpu_to_be32(IPPROTO_TCP);
609 bp->len = cpu_to_be32(nbytes);
611 _th = (struct tcphdr *)(bp + 1);
612 memcpy(_th, th, sizeof(*th));
615 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
616 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
617 sizeof(*bp) + sizeof(*th));
618 return crypto_ahash_update(hp->md5_req);
621 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
622 const struct in6_addr *daddr, struct in6_addr *saddr,
623 const struct tcphdr *th)
625 struct tcp_md5sig_pool *hp;
626 struct ahash_request *req;
628 hp = tcp_get_md5sig_pool();
630 goto clear_hash_noput;
633 if (crypto_ahash_init(req))
635 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
637 if (tcp_md5_hash_key(hp, key))
639 ahash_request_set_crypt(req, NULL, md5_hash, 0);
640 if (crypto_ahash_final(req))
643 tcp_put_md5sig_pool();
647 tcp_put_md5sig_pool();
649 memset(md5_hash, 0, 16);
653 static int tcp_v6_md5_hash_skb(char *md5_hash,
654 const struct tcp_md5sig_key *key,
655 const struct sock *sk,
656 const struct sk_buff *skb)
658 const struct in6_addr *saddr, *daddr;
659 struct tcp_md5sig_pool *hp;
660 struct ahash_request *req;
661 const struct tcphdr *th = tcp_hdr(skb);
663 if (sk) { /* valid for establish/request sockets */
664 saddr = &sk->sk_v6_rcv_saddr;
665 daddr = &sk->sk_v6_daddr;
667 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
668 saddr = &ip6h->saddr;
669 daddr = &ip6h->daddr;
672 hp = tcp_get_md5sig_pool();
674 goto clear_hash_noput;
677 if (crypto_ahash_init(req))
680 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
682 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
684 if (tcp_md5_hash_key(hp, key))
686 ahash_request_set_crypt(req, NULL, md5_hash, 0);
687 if (crypto_ahash_final(req))
690 tcp_put_md5sig_pool();
694 tcp_put_md5sig_pool();
696 memset(md5_hash, 0, 16);
702 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
703 const struct sk_buff *skb)
705 #ifdef CONFIG_TCP_MD5SIG
706 const __u8 *hash_location = NULL;
707 struct tcp_md5sig_key *hash_expected;
708 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
709 const struct tcphdr *th = tcp_hdr(skb);
713 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
714 hash_location = tcp_parse_md5sig_option(th);
716 /* We've parsed the options - do we have a hash? */
717 if (!hash_expected && !hash_location)
720 if (hash_expected && !hash_location) {
721 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
725 if (!hash_expected && hash_location) {
726 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
730 /* check the signature */
731 genhash = tcp_v6_md5_hash_skb(newhash,
735 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
736 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
737 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
738 genhash ? "failed" : "mismatch",
739 &ip6h->saddr, ntohs(th->source),
740 &ip6h->daddr, ntohs(th->dest));
747 static void tcp_v6_init_req(struct request_sock *req,
748 const struct sock *sk_listener,
751 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
752 struct inet_request_sock *ireq = inet_rsk(req);
753 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
755 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
756 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
758 /* So that link locals have meaning */
759 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
760 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
761 ireq->ir_iif = tcp_v6_iif(skb);
763 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
764 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
765 np->rxopt.bits.rxinfo ||
766 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
767 np->rxopt.bits.rxohlim || np->repflow)) {
768 refcount_inc(&skb->users);
773 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
775 const struct request_sock *req)
777 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
780 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
782 .obj_size = sizeof(struct tcp6_request_sock),
783 .rtx_syn_ack = tcp_rtx_synack,
784 .send_ack = tcp_v6_reqsk_send_ack,
785 .destructor = tcp_v6_reqsk_destructor,
786 .send_reset = tcp_v6_send_reset,
787 .syn_ack_timeout = tcp_syn_ack_timeout,
790 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
791 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
792 sizeof(struct ipv6hdr),
793 #ifdef CONFIG_TCP_MD5SIG
794 .req_md5_lookup = tcp_v6_md5_lookup,
795 .calc_md5_hash = tcp_v6_md5_hash_skb,
797 .init_req = tcp_v6_init_req,
798 #ifdef CONFIG_SYN_COOKIES
799 .cookie_init_seq = cookie_v6_init_sequence,
801 .route_req = tcp_v6_route_req,
802 .init_seq = tcp_v6_init_seq,
803 .init_ts_off = tcp_v6_init_ts_off,
804 .send_synack = tcp_v6_send_synack,
807 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
808 u32 ack, u32 win, u32 tsval, u32 tsecr,
809 int oif, struct tcp_md5sig_key *key, int rst,
810 u8 tclass, __be32 label)
812 const struct tcphdr *th = tcp_hdr(skb);
814 struct sk_buff *buff;
816 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
817 struct sock *ctl_sk = net->ipv6.tcp_sk;
818 unsigned int tot_len = sizeof(struct tcphdr);
819 struct dst_entry *dst;
824 tot_len += TCPOLEN_TSTAMP_ALIGNED;
825 #ifdef CONFIG_TCP_MD5SIG
827 tot_len += TCPOLEN_MD5SIG_ALIGNED;
830 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
835 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
837 t1 = skb_push(buff, tot_len);
838 skb_reset_transport_header(buff);
840 /* Swap the send and the receive. */
841 memset(t1, 0, sizeof(*t1));
842 t1->dest = th->source;
843 t1->source = th->dest;
844 t1->doff = tot_len / 4;
845 t1->seq = htonl(seq);
846 t1->ack_seq = htonl(ack);
847 t1->ack = !rst || !th->ack;
849 t1->window = htons(win);
851 topt = (__be32 *)(t1 + 1);
854 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
855 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
856 *topt++ = htonl(tsval);
857 *topt++ = htonl(tsecr);
860 #ifdef CONFIG_TCP_MD5SIG
862 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
863 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
864 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
865 &ipv6_hdr(skb)->saddr,
866 &ipv6_hdr(skb)->daddr, t1);
870 memset(&fl6, 0, sizeof(fl6));
871 fl6.daddr = ipv6_hdr(skb)->saddr;
872 fl6.saddr = ipv6_hdr(skb)->daddr;
873 fl6.flowlabel = label;
875 buff->ip_summed = CHECKSUM_PARTIAL;
878 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
880 fl6.flowi6_proto = IPPROTO_TCP;
881 if (rt6_need_strict(&fl6.daddr) && !oif)
882 fl6.flowi6_oif = tcp_v6_iif(skb);
884 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
887 fl6.flowi6_oif = oif;
891 mark = (sk->sk_state == TCP_TIME_WAIT) ?
892 inet_twsk(sk)->tw_mark : sk->sk_mark;
893 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
894 fl6.fl6_dport = t1->dest;
895 fl6.fl6_sport = t1->source;
896 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
897 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
899 /* Pass a socket to ip6_dst_lookup either it is for RST
900 * Underlying function will use this to retrieve the network
903 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
905 skb_dst_set(buff, dst);
906 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
907 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
909 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
916 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
918 const struct tcphdr *th = tcp_hdr(skb);
919 u32 seq = 0, ack_seq = 0;
920 struct tcp_md5sig_key *key = NULL;
921 #ifdef CONFIG_TCP_MD5SIG
922 const __u8 *hash_location = NULL;
923 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
924 unsigned char newhash[16];
926 struct sock *sk1 = NULL;
933 /* If sk not NULL, it means we did a successful lookup and incoming
934 * route had to be correct. prequeue might have dropped our dst.
936 if (!sk && !ipv6_unicast_destination(skb))
939 #ifdef CONFIG_TCP_MD5SIG
941 hash_location = tcp_parse_md5sig_option(th);
942 if (sk && sk_fullsock(sk)) {
943 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
944 } else if (hash_location) {
946 * active side is lost. Try to find listening socket through
947 * source port, and then find md5 key through listening socket.
948 * we are not loose security here:
949 * Incoming packet is checked with md5 hash with finding key,
950 * no RST generated if md5 hash doesn't match.
952 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
953 &tcp_hashinfo, NULL, 0,
955 th->source, &ipv6h->daddr,
957 tcp_v6_iif_l3_slave(skb),
962 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
966 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
967 if (genhash || memcmp(hash_location, newhash, 16) != 0)
973 seq = ntohl(th->ack_seq);
975 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
979 oif = sk->sk_bound_dev_if;
981 trace_tcp_send_reset(sk, skb);
984 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
986 #ifdef CONFIG_TCP_MD5SIG
992 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
993 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
994 struct tcp_md5sig_key *key, u8 tclass,
997 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1001 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1003 struct inet_timewait_sock *tw = inet_twsk(sk);
1004 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1006 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1007 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1008 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1009 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1010 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
1015 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1016 struct request_sock *req)
1018 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1019 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1022 * The window field (SEG.WND) of every outgoing segment, with the
1023 * exception of <SYN> segments, MUST be right-shifted by
1024 * Rcv.Wind.Shift bits:
1026 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1027 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1028 tcp_rsk(req)->rcv_nxt,
1029 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1030 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1031 req->ts_recent, sk->sk_bound_dev_if,
1032 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1037 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1039 #ifdef CONFIG_SYN_COOKIES
1040 const struct tcphdr *th = tcp_hdr(skb);
1043 sk = cookie_v6_check(sk, skb);
1048 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1050 if (skb->protocol == htons(ETH_P_IP))
1051 return tcp_v4_conn_request(sk, skb);
1053 if (!ipv6_unicast_destination(skb))
1056 return tcp_conn_request(&tcp6_request_sock_ops,
1057 &tcp_request_sock_ipv6_ops, sk, skb);
1061 return 0; /* don't send reset */
1064 static void tcp_v6_restore_cb(struct sk_buff *skb)
1066 /* We need to move header back to the beginning if xfrm6_policy_check()
1067 * and tcp_v6_fill_cb() are going to be called again.
1068 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1070 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1071 sizeof(struct inet6_skb_parm));
1074 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1075 struct request_sock *req,
1076 struct dst_entry *dst,
1077 struct request_sock *req_unhash,
1080 struct inet_request_sock *ireq;
1081 struct ipv6_pinfo *newnp;
1082 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1083 struct ipv6_txoptions *opt;
1084 struct inet_sock *newinet;
1085 struct tcp_sock *newtp;
1087 #ifdef CONFIG_TCP_MD5SIG
1088 struct tcp_md5sig_key *key;
1092 if (skb->protocol == htons(ETH_P_IP)) {
1097 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1098 req_unhash, own_req);
1103 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1105 newinet = inet_sk(newsk);
1106 newnp = tcp_inet6_sk(newsk);
1107 newtp = tcp_sk(newsk);
1109 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1111 newnp->saddr = newsk->sk_v6_rcv_saddr;
1113 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1114 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1115 #ifdef CONFIG_TCP_MD5SIG
1116 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1119 newnp->ipv6_mc_list = NULL;
1120 newnp->ipv6_ac_list = NULL;
1121 newnp->ipv6_fl_list = NULL;
1122 newnp->pktoptions = NULL;
1124 newnp->mcast_oif = inet_iif(skb);
1125 newnp->mcast_hops = ip_hdr(skb)->ttl;
1126 newnp->rcv_flowinfo = 0;
1128 newnp->flow_label = 0;
1131 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1132 * here, tcp_create_openreq_child now does this for us, see the comment in
1133 * that function for the gory details. -acme
1136 /* It is tricky place. Until this moment IPv4 tcp
1137 worked with IPv6 icsk.icsk_af_ops.
1140 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1145 ireq = inet_rsk(req);
1147 if (sk_acceptq_is_full(sk))
1151 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1156 newsk = tcp_create_openreq_child(sk, req, skb);
1161 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1162 * count here, tcp_create_openreq_child now does this for us, see the
1163 * comment in that function for the gory details. -acme
1166 newsk->sk_gso_type = SKB_GSO_TCPV6;
1167 ip6_dst_store(newsk, dst, NULL, NULL);
1168 inet6_sk_rx_dst_set(newsk, skb);
1170 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1172 newtp = tcp_sk(newsk);
1173 newinet = inet_sk(newsk);
1174 newnp = tcp_inet6_sk(newsk);
1176 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1178 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1179 newnp->saddr = ireq->ir_v6_loc_addr;
1180 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1181 newsk->sk_bound_dev_if = ireq->ir_iif;
1183 /* Now IPv6 options...
1185 First: no IPv4 options.
1187 newinet->inet_opt = NULL;
1188 newnp->ipv6_mc_list = NULL;
1189 newnp->ipv6_ac_list = NULL;
1190 newnp->ipv6_fl_list = NULL;
1193 newnp->rxopt.all = np->rxopt.all;
1195 newnp->pktoptions = NULL;
1197 newnp->mcast_oif = tcp_v6_iif(skb);
1198 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1199 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1201 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1203 /* Clone native IPv6 options from listening socket (if any)
1205 Yes, keeping reference count would be much more clever,
1206 but we make one more one thing there: reattach optmem
1209 opt = ireq->ipv6_opt;
1211 opt = rcu_dereference(np->opt);
1213 opt = ipv6_dup_options(newsk, opt);
1214 RCU_INIT_POINTER(newnp->opt, opt);
1216 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1218 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1221 tcp_ca_openreq_child(newsk, dst);
1223 tcp_sync_mss(newsk, dst_mtu(dst));
1224 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1226 tcp_initialize_rcv_mss(newsk);
1228 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1229 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1231 #ifdef CONFIG_TCP_MD5SIG
1232 /* Copy over the MD5 key from the original socket */
1233 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1235 /* We're using one, so create a matching key
1236 * on the newsk structure. If we fail to get
1237 * memory, then we end up not copying the key
1240 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1241 AF_INET6, 128, key->key, key->keylen,
1242 sk_gfp_mask(sk, GFP_ATOMIC));
1246 if (__inet_inherit_port(sk, newsk) < 0) {
1247 inet_csk_prepare_forced_close(newsk);
1251 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1253 tcp_move_syn(newtp, req);
1255 /* Clone pktoptions received with SYN, if we own the req */
1256 if (ireq->pktopts) {
1257 newnp->pktoptions = skb_clone(ireq->pktopts,
1258 sk_gfp_mask(sk, GFP_ATOMIC));
1259 consume_skb(ireq->pktopts);
1260 ireq->pktopts = NULL;
1261 if (newnp->pktoptions) {
1262 tcp_v6_restore_cb(newnp->pktoptions);
1263 skb_set_owner_r(newnp->pktoptions, newsk);
1271 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1279 /* The socket must have it's spinlock held when we get
1280 * here, unless it is a TCP_LISTEN socket.
1282 * We have a potential double-lock case here, so even when
1283 * doing backlog processing we use the BH locking scheme.
1284 * This is because we cannot sleep with the original spinlock
1287 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1289 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1290 struct sk_buff *opt_skb = NULL;
1291 struct tcp_sock *tp;
1293 /* Imagine: socket is IPv6. IPv4 packet arrives,
1294 goes to IPv4 receive handler and backlogged.
1295 From backlog it always goes here. Kerboom...
1296 Fortunately, tcp_rcv_established and rcv_established
1297 handle them correctly, but it is not case with
1298 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1301 if (skb->protocol == htons(ETH_P_IP))
1302 return tcp_v4_do_rcv(sk, skb);
1305 * socket locking is here for SMP purposes as backlog rcv
1306 * is currently called with bh processing disabled.
1309 /* Do Stevens' IPV6_PKTOPTIONS.
1311 Yes, guys, it is the only place in our code, where we
1312 may make it not affecting IPv4.
1313 The rest of code is protocol independent,
1314 and I do not like idea to uglify IPv4.
1316 Actually, all the idea behind IPV6_PKTOPTIONS
1317 looks not very well thought. For now we latch
1318 options, received in the last packet, enqueued
1319 by tcp. Feel free to propose better solution.
1323 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1325 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1326 struct dst_entry *dst = sk->sk_rx_dst;
1328 sock_rps_save_rxhash(sk, skb);
1329 sk_mark_napi_id(sk, skb);
1331 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1332 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1334 sk->sk_rx_dst = NULL;
1338 tcp_rcv_established(sk, skb);
1340 goto ipv6_pktoptions;
1344 if (tcp_checksum_complete(skb))
1347 if (sk->sk_state == TCP_LISTEN) {
1348 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1354 if (tcp_child_process(sk, nsk, skb))
1357 __kfree_skb(opt_skb);
1361 sock_rps_save_rxhash(sk, skb);
1363 if (tcp_rcv_state_process(sk, skb))
1366 goto ipv6_pktoptions;
1370 tcp_v6_send_reset(sk, skb);
1373 __kfree_skb(opt_skb);
1377 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1378 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1383 /* Do you ask, what is it?
1385 1. skb was enqueued by tcp.
1386 2. skb is added to tail of read queue, rather than out of order.
1387 3. socket is not in passive state.
1388 4. Finally, it really contains options, which user wants to receive.
1391 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1392 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1393 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1394 np->mcast_oif = tcp_v6_iif(opt_skb);
1395 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1396 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1397 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1398 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1400 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1401 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1402 skb_set_owner_r(opt_skb, sk);
1403 tcp_v6_restore_cb(opt_skb);
1404 opt_skb = xchg(&np->pktoptions, opt_skb);
1406 __kfree_skb(opt_skb);
1407 opt_skb = xchg(&np->pktoptions, NULL);
1415 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1416 const struct tcphdr *th)
1418 /* This is tricky: we move IP6CB at its correct location into
1419 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1420 * _decode_session6() uses IP6CB().
1421 * barrier() makes sure compiler won't play aliasing games.
1423 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1424 sizeof(struct inet6_skb_parm));
1427 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1428 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1429 skb->len - th->doff*4);
1430 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1431 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1432 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1433 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1434 TCP_SKB_CB(skb)->sacked = 0;
1435 TCP_SKB_CB(skb)->has_rxtstamp =
1436 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1439 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1441 struct sk_buff *skb_to_free;
1442 int sdif = inet6_sdif(skb);
1443 const struct tcphdr *th;
1444 const struct ipv6hdr *hdr;
1448 struct net *net = dev_net(skb->dev);
1450 if (skb->pkt_type != PACKET_HOST)
1454 * Count it even if it's bad.
1456 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1458 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1461 th = (const struct tcphdr *)skb->data;
1463 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1465 if (!pskb_may_pull(skb, th->doff*4))
1468 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1471 th = (const struct tcphdr *)skb->data;
1472 hdr = ipv6_hdr(skb);
1475 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1476 th->source, th->dest, inet6_iif(skb), sdif,
1482 if (sk->sk_state == TCP_TIME_WAIT)
1485 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1486 struct request_sock *req = inet_reqsk(sk);
1487 bool req_stolen = false;
1490 sk = req->rsk_listener;
1491 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1492 sk_drops_add(sk, skb);
1496 if (tcp_checksum_complete(skb)) {
1500 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1501 inet_csk_reqsk_queue_drop_and_put(sk, req);
1507 if (!tcp_filter(sk, skb)) {
1508 th = (const struct tcphdr *)skb->data;
1509 hdr = ipv6_hdr(skb);
1510 tcp_v6_fill_cb(skb, hdr, th);
1511 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1516 /* Another cpu got exclusive access to req
1517 * and created a full blown socket.
1518 * Try to feed this packet to this socket
1519 * instead of discarding it.
1521 tcp_v6_restore_cb(skb);
1525 goto discard_and_relse;
1529 tcp_v6_restore_cb(skb);
1530 } else if (tcp_child_process(sk, nsk, skb)) {
1531 tcp_v6_send_reset(nsk, skb);
1532 goto discard_and_relse;
1538 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1539 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1540 goto discard_and_relse;
1543 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1544 goto discard_and_relse;
1546 if (tcp_v6_inbound_md5_hash(sk, skb))
1547 goto discard_and_relse;
1549 if (tcp_filter(sk, skb))
1550 goto discard_and_relse;
1551 th = (const struct tcphdr *)skb->data;
1552 hdr = ipv6_hdr(skb);
1553 tcp_v6_fill_cb(skb, hdr, th);
1557 if (sk->sk_state == TCP_LISTEN) {
1558 ret = tcp_v6_do_rcv(sk, skb);
1559 goto put_and_return;
1562 sk_incoming_cpu_update(sk);
1564 bh_lock_sock_nested(sk);
1565 tcp_segs_in(tcp_sk(sk), skb);
1567 if (!sock_owned_by_user(sk)) {
1568 skb_to_free = sk->sk_rx_skb_cache;
1569 sk->sk_rx_skb_cache = NULL;
1570 ret = tcp_v6_do_rcv(sk, skb);
1572 if (tcp_add_backlog(sk, skb))
1573 goto discard_and_relse;
1578 __kfree_skb(skb_to_free);
1582 return ret ? -1 : 0;
1585 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1588 tcp_v6_fill_cb(skb, hdr, th);
1590 if (tcp_checksum_complete(skb)) {
1592 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1594 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1596 tcp_v6_send_reset(NULL, skb);
1604 sk_drops_add(sk, skb);
1610 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1611 inet_twsk_put(inet_twsk(sk));
1615 tcp_v6_fill_cb(skb, hdr, th);
1617 if (tcp_checksum_complete(skb)) {
1618 inet_twsk_put(inet_twsk(sk));
1622 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1627 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1628 skb, __tcp_hdrlen(th),
1629 &ipv6_hdr(skb)->saddr, th->source,
1630 &ipv6_hdr(skb)->daddr,
1632 tcp_v6_iif_l3_slave(skb),
1635 struct inet_timewait_sock *tw = inet_twsk(sk);
1636 inet_twsk_deschedule_put(tw);
1638 tcp_v6_restore_cb(skb);
1646 tcp_v6_timewait_ack(sk, skb);
1649 tcp_v6_send_reset(sk, skb);
1650 inet_twsk_deschedule_put(inet_twsk(sk));
1652 case TCP_TW_SUCCESS:
1658 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1660 const struct ipv6hdr *hdr;
1661 const struct tcphdr *th;
1664 if (skb->pkt_type != PACKET_HOST)
1667 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1670 hdr = ipv6_hdr(skb);
1673 if (th->doff < sizeof(struct tcphdr) / 4)
1676 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1677 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1678 &hdr->saddr, th->source,
1679 &hdr->daddr, ntohs(th->dest),
1680 inet6_iif(skb), inet6_sdif(skb));
1683 skb->destructor = sock_edemux;
1684 if (sk_fullsock(sk)) {
1685 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1688 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1690 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1691 skb_dst_set_noref(skb, dst);
1696 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1697 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1698 .twsk_unique = tcp_twsk_unique,
1699 .twsk_destructor = tcp_twsk_destructor,
1702 static const struct inet_connection_sock_af_ops ipv6_specific = {
1703 .queue_xmit = inet6_csk_xmit,
1704 .send_check = tcp_v6_send_check,
1705 .rebuild_header = inet6_sk_rebuild_header,
1706 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1707 .conn_request = tcp_v6_conn_request,
1708 .syn_recv_sock = tcp_v6_syn_recv_sock,
1709 .net_header_len = sizeof(struct ipv6hdr),
1710 .net_frag_header_len = sizeof(struct frag_hdr),
1711 .setsockopt = ipv6_setsockopt,
1712 .getsockopt = ipv6_getsockopt,
1713 .addr2sockaddr = inet6_csk_addr2sockaddr,
1714 .sockaddr_len = sizeof(struct sockaddr_in6),
1715 #ifdef CONFIG_COMPAT
1716 .compat_setsockopt = compat_ipv6_setsockopt,
1717 .compat_getsockopt = compat_ipv6_getsockopt,
1719 .mtu_reduced = tcp_v6_mtu_reduced,
1722 #ifdef CONFIG_TCP_MD5SIG
1723 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1724 .md5_lookup = tcp_v6_md5_lookup,
1725 .calc_md5_hash = tcp_v6_md5_hash_skb,
1726 .md5_parse = tcp_v6_parse_md5_keys,
1731 * TCP over IPv4 via INET6 API
1733 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1734 .queue_xmit = ip_queue_xmit,
1735 .send_check = tcp_v4_send_check,
1736 .rebuild_header = inet_sk_rebuild_header,
1737 .sk_rx_dst_set = inet_sk_rx_dst_set,
1738 .conn_request = tcp_v6_conn_request,
1739 .syn_recv_sock = tcp_v6_syn_recv_sock,
1740 .net_header_len = sizeof(struct iphdr),
1741 .setsockopt = ipv6_setsockopt,
1742 .getsockopt = ipv6_getsockopt,
1743 .addr2sockaddr = inet6_csk_addr2sockaddr,
1744 .sockaddr_len = sizeof(struct sockaddr_in6),
1745 #ifdef CONFIG_COMPAT
1746 .compat_setsockopt = compat_ipv6_setsockopt,
1747 .compat_getsockopt = compat_ipv6_getsockopt,
1749 .mtu_reduced = tcp_v4_mtu_reduced,
1752 #ifdef CONFIG_TCP_MD5SIG
1753 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1754 .md5_lookup = tcp_v4_md5_lookup,
1755 .calc_md5_hash = tcp_v4_md5_hash_skb,
1756 .md5_parse = tcp_v6_parse_md5_keys,
1760 /* NOTE: A lot of things set to zero explicitly by call to
1761 * sk_alloc() so need not be done here.
1763 static int tcp_v6_init_sock(struct sock *sk)
1765 struct inet_connection_sock *icsk = inet_csk(sk);
1769 icsk->icsk_af_ops = &ipv6_specific;
1771 #ifdef CONFIG_TCP_MD5SIG
1772 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1778 static void tcp_v6_destroy_sock(struct sock *sk)
1780 tcp_v4_destroy_sock(sk);
1781 inet6_destroy_sock(sk);
1784 #ifdef CONFIG_PROC_FS
1785 /* Proc filesystem TCPv6 sock list dumping. */
1786 static void get_openreq6(struct seq_file *seq,
1787 const struct request_sock *req, int i)
1789 long ttd = req->rsk_timer.expires - jiffies;
1790 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1791 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1797 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1798 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1800 src->s6_addr32[0], src->s6_addr32[1],
1801 src->s6_addr32[2], src->s6_addr32[3],
1802 inet_rsk(req)->ir_num,
1803 dest->s6_addr32[0], dest->s6_addr32[1],
1804 dest->s6_addr32[2], dest->s6_addr32[3],
1805 ntohs(inet_rsk(req)->ir_rmt_port),
1807 0, 0, /* could print option size, but that is af dependent. */
1808 1, /* timers active (only the expire timer) */
1809 jiffies_to_clock_t(ttd),
1811 from_kuid_munged(seq_user_ns(seq),
1812 sock_i_uid(req->rsk_listener)),
1813 0, /* non standard timer */
1814 0, /* open_requests have no inode */
1818 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1820 const struct in6_addr *dest, *src;
1823 unsigned long timer_expires;
1824 const struct inet_sock *inet = inet_sk(sp);
1825 const struct tcp_sock *tp = tcp_sk(sp);
1826 const struct inet_connection_sock *icsk = inet_csk(sp);
1827 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1831 dest = &sp->sk_v6_daddr;
1832 src = &sp->sk_v6_rcv_saddr;
1833 destp = ntohs(inet->inet_dport);
1834 srcp = ntohs(inet->inet_sport);
1836 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1837 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1838 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1840 timer_expires = icsk->icsk_timeout;
1841 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1843 timer_expires = icsk->icsk_timeout;
1844 } else if (timer_pending(&sp->sk_timer)) {
1846 timer_expires = sp->sk_timer.expires;
1849 timer_expires = jiffies;
1852 state = inet_sk_state_load(sp);
1853 if (state == TCP_LISTEN)
1854 rx_queue = sp->sk_ack_backlog;
1856 /* Because we don't lock the socket,
1857 * we might find a transient negative value.
1859 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1862 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1863 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1865 src->s6_addr32[0], src->s6_addr32[1],
1866 src->s6_addr32[2], src->s6_addr32[3], srcp,
1867 dest->s6_addr32[0], dest->s6_addr32[1],
1868 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1870 tp->write_seq - tp->snd_una,
1873 jiffies_delta_to_clock_t(timer_expires - jiffies),
1874 icsk->icsk_retransmits,
1875 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1876 icsk->icsk_probes_out,
1878 refcount_read(&sp->sk_refcnt), sp,
1879 jiffies_to_clock_t(icsk->icsk_rto),
1880 jiffies_to_clock_t(icsk->icsk_ack.ato),
1881 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
1883 state == TCP_LISTEN ?
1884 fastopenq->max_qlen :
1885 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1889 static void get_timewait6_sock(struct seq_file *seq,
1890 struct inet_timewait_sock *tw, int i)
1892 long delta = tw->tw_timer.expires - jiffies;
1893 const struct in6_addr *dest, *src;
1896 dest = &tw->tw_v6_daddr;
1897 src = &tw->tw_v6_rcv_saddr;
1898 destp = ntohs(tw->tw_dport);
1899 srcp = ntohs(tw->tw_sport);
1902 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1903 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1905 src->s6_addr32[0], src->s6_addr32[1],
1906 src->s6_addr32[2], src->s6_addr32[3], srcp,
1907 dest->s6_addr32[0], dest->s6_addr32[1],
1908 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1909 tw->tw_substate, 0, 0,
1910 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1911 refcount_read(&tw->tw_refcnt), tw);
1914 static int tcp6_seq_show(struct seq_file *seq, void *v)
1916 struct tcp_iter_state *st;
1917 struct sock *sk = v;
1919 if (v == SEQ_START_TOKEN) {
1924 "st tx_queue rx_queue tr tm->when retrnsmt"
1925 " uid timeout inode\n");
1930 if (sk->sk_state == TCP_TIME_WAIT)
1931 get_timewait6_sock(seq, v, st->num);
1932 else if (sk->sk_state == TCP_NEW_SYN_RECV)
1933 get_openreq6(seq, v, st->num);
1935 get_tcp6_sock(seq, v, st->num);
1940 static const struct seq_operations tcp6_seq_ops = {
1941 .show = tcp6_seq_show,
1942 .start = tcp_seq_start,
1943 .next = tcp_seq_next,
1944 .stop = tcp_seq_stop,
1947 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1951 int __net_init tcp6_proc_init(struct net *net)
1953 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
1954 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
1959 void tcp6_proc_exit(struct net *net)
1961 remove_proc_entry("tcp6", net->proc_net);
1965 struct proto tcpv6_prot = {
1967 .owner = THIS_MODULE,
1969 .pre_connect = tcp_v6_pre_connect,
1970 .connect = tcp_v6_connect,
1971 .disconnect = tcp_disconnect,
1972 .accept = inet_csk_accept,
1974 .init = tcp_v6_init_sock,
1975 .destroy = tcp_v6_destroy_sock,
1976 .shutdown = tcp_shutdown,
1977 .setsockopt = tcp_setsockopt,
1978 .getsockopt = tcp_getsockopt,
1979 .keepalive = tcp_set_keepalive,
1980 .recvmsg = tcp_recvmsg,
1981 .sendmsg = tcp_sendmsg,
1982 .sendpage = tcp_sendpage,
1983 .backlog_rcv = tcp_v6_do_rcv,
1984 .release_cb = tcp_release_cb,
1986 .unhash = inet_unhash,
1987 .get_port = inet_csk_get_port,
1988 .enter_memory_pressure = tcp_enter_memory_pressure,
1989 .leave_memory_pressure = tcp_leave_memory_pressure,
1990 .stream_memory_free = tcp_stream_memory_free,
1991 .sockets_allocated = &tcp_sockets_allocated,
1992 .memory_allocated = &tcp_memory_allocated,
1993 .memory_pressure = &tcp_memory_pressure,
1994 .orphan_count = &tcp_orphan_count,
1995 .sysctl_mem = sysctl_tcp_mem,
1996 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
1997 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
1998 .max_header = MAX_TCP_HEADER,
1999 .obj_size = sizeof(struct tcp6_sock),
2000 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2001 .twsk_prot = &tcp6_timewait_sock_ops,
2002 .rsk_prot = &tcp6_request_sock_ops,
2003 .h.hashinfo = &tcp_hashinfo,
2004 .no_autobind = true,
2005 #ifdef CONFIG_COMPAT
2006 .compat_setsockopt = compat_tcp_setsockopt,
2007 .compat_getsockopt = compat_tcp_getsockopt,
2009 .diag_destroy = tcp_abort,
2012 /* thinking of making this const? Don't.
2013 * early_demux can change based on sysctl.
2015 static struct inet6_protocol tcpv6_protocol = {
2016 .early_demux = tcp_v6_early_demux,
2017 .early_demux_handler = tcp_v6_early_demux,
2018 .handler = tcp_v6_rcv,
2019 .err_handler = tcp_v6_err,
2020 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2023 static struct inet_protosw tcpv6_protosw = {
2024 .type = SOCK_STREAM,
2025 .protocol = IPPROTO_TCP,
2026 .prot = &tcpv6_prot,
2027 .ops = &inet6_stream_ops,
2028 .flags = INET_PROTOSW_PERMANENT |
2032 static int __net_init tcpv6_net_init(struct net *net)
2034 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2035 SOCK_RAW, IPPROTO_TCP, net);
2038 static void __net_exit tcpv6_net_exit(struct net *net)
2040 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2043 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2045 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2048 static struct pernet_operations tcpv6_net_ops = {
2049 .init = tcpv6_net_init,
2050 .exit = tcpv6_net_exit,
2051 .exit_batch = tcpv6_net_exit_batch,
2054 int __init tcpv6_init(void)
2058 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2062 /* register inet6 protocol */
2063 ret = inet6_register_protosw(&tcpv6_protosw);
2065 goto out_tcpv6_protocol;
2067 ret = register_pernet_subsys(&tcpv6_net_ops);
2069 goto out_tcpv6_protosw;
2074 inet6_unregister_protosw(&tcpv6_protosw);
2076 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2080 void tcpv6_exit(void)
2082 unregister_pernet_subsys(&tcpv6_net_ops);
2083 inet6_unregister_protosw(&tcpv6_protosw);
2084 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);