1 // SPDX-License-Identifier: GPL-2.0-or-later
4 * Linux INET6 implementation
7 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
69 #include <trace/events/tcp.h>
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr,
91 /* Helper returning the inet6 address from a given tcp socket.
92 * It can be used in TCP stack instead of inet6_sk(sk).
93 * This avoids a dereference and allow compiler optimizations.
94 * It is a specialized version of inet6_sk_generic().
96 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
97 struct tcp6_sock, tcp)->inet6)
99 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
101 struct dst_entry *dst = skb_dst(skb);
103 if (dst && dst_hold_safe(dst)) {
104 const struct rt6_info *rt = (const struct rt6_info *)dst;
106 rcu_assign_pointer(sk->sk_rx_dst, dst);
107 sk->sk_rx_dst_ifindex = skb->skb_iif;
108 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
112 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
114 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
115 ipv6_hdr(skb)->saddr.s6_addr32,
117 tcp_hdr(skb)->source);
120 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
122 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
123 ipv6_hdr(skb)->saddr.s6_addr32);
126 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
129 /* This check is replicated from tcp_v6_connect() and intended to
130 * prevent BPF program called below from accessing bytes that are out
131 * of the bound specified by user in addr_len.
133 if (addr_len < SIN6_LEN_RFC2133)
136 sock_owned_by_me(sk);
138 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
141 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
144 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
145 struct inet_connection_sock *icsk = inet_csk(sk);
146 struct in6_addr *saddr = NULL, *final_p, final;
147 struct inet_timewait_death_row *tcp_death_row;
148 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
149 struct inet_sock *inet = inet_sk(sk);
150 struct tcp_sock *tp = tcp_sk(sk);
151 struct net *net = sock_net(sk);
152 struct ipv6_txoptions *opt;
153 struct dst_entry *dst;
158 if (addr_len < SIN6_LEN_RFC2133)
161 if (usin->sin6_family != AF_INET6)
162 return -EAFNOSUPPORT;
164 memset(&fl6, 0, sizeof(fl6));
166 if (inet6_test_bit(SNDFLOW, sk)) {
167 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
168 IP6_ECN_flow_init(fl6.flowlabel);
169 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
170 struct ip6_flowlabel *flowlabel;
171 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
172 if (IS_ERR(flowlabel))
174 fl6_sock_release(flowlabel);
179 * connect() to INADDR_ANY means loopback (BSD'ism).
182 if (ipv6_addr_any(&usin->sin6_addr)) {
183 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
184 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
187 usin->sin6_addr = in6addr_loopback;
190 addr_type = ipv6_addr_type(&usin->sin6_addr);
192 if (addr_type & IPV6_ADDR_MULTICAST)
195 if (addr_type&IPV6_ADDR_LINKLOCAL) {
196 if (addr_len >= sizeof(struct sockaddr_in6) &&
197 usin->sin6_scope_id) {
198 /* If interface is set while binding, indices
201 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
204 sk->sk_bound_dev_if = usin->sin6_scope_id;
207 /* Connect to link-local address requires an interface */
208 if (!sk->sk_bound_dev_if)
212 if (tp->rx_opt.ts_recent_stamp &&
213 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
214 tp->rx_opt.ts_recent = 0;
215 tp->rx_opt.ts_recent_stamp = 0;
216 WRITE_ONCE(tp->write_seq, 0);
219 sk->sk_v6_daddr = usin->sin6_addr;
220 np->flow_label = fl6.flowlabel;
226 if (addr_type & IPV6_ADDR_MAPPED) {
227 u32 exthdrlen = icsk->icsk_ext_hdr_len;
228 struct sockaddr_in sin;
230 if (ipv6_only_sock(sk))
233 sin.sin_family = AF_INET;
234 sin.sin_port = usin->sin6_port;
235 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
237 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
238 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
240 mptcpv6_handle_mapped(sk, true);
241 sk->sk_backlog_rcv = tcp_v4_do_rcv;
242 #ifdef CONFIG_TCP_MD5SIG
243 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249 icsk->icsk_ext_hdr_len = exthdrlen;
250 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
251 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
253 mptcpv6_handle_mapped(sk, false);
254 sk->sk_backlog_rcv = tcp_v6_do_rcv;
255 #ifdef CONFIG_TCP_MD5SIG
256 tp->af_specific = &tcp_sock_ipv6_specific;
260 np->saddr = sk->sk_v6_rcv_saddr;
265 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
266 saddr = &sk->sk_v6_rcv_saddr;
268 fl6.flowi6_proto = IPPROTO_TCP;
269 fl6.daddr = sk->sk_v6_daddr;
270 fl6.saddr = saddr ? *saddr : np->saddr;
271 fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
272 fl6.flowi6_oif = sk->sk_bound_dev_if;
273 fl6.flowi6_mark = sk->sk_mark;
274 fl6.fl6_dport = usin->sin6_port;
275 fl6.fl6_sport = inet->inet_sport;
276 fl6.flowi6_uid = sk->sk_uid;
278 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279 final_p = fl6_update_dst(&fl6, opt, &final);
281 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
283 dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
289 tp->tcp_usec_ts = dst_tcp_usec_ts(dst);
290 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
295 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
300 /* set the source address */
302 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
304 sk->sk_gso_type = SKB_GSO_TCPV6;
305 ip6_dst_store(sk, dst, NULL, NULL);
307 icsk->icsk_ext_hdr_len = 0;
309 icsk->icsk_ext_hdr_len = opt->opt_flen +
312 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
314 inet->inet_dport = usin->sin6_port;
316 tcp_set_state(sk, TCP_SYN_SENT);
317 err = inet6_hash_connect(tcp_death_row, sk);
323 if (likely(!tp->repair)) {
325 WRITE_ONCE(tp->write_seq,
326 secure_tcpv6_seq(np->saddr.s6_addr32,
327 sk->sk_v6_daddr.s6_addr32,
330 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
331 sk->sk_v6_daddr.s6_addr32);
334 if (tcp_fastopen_defer_connect(sk, &err))
339 err = tcp_connect(sk);
346 tcp_set_state(sk, TCP_CLOSE);
347 inet_bhash2_reset_saddr(sk);
349 inet->inet_dport = 0;
350 sk->sk_route_caps = 0;
354 static void tcp_v6_mtu_reduced(struct sock *sk)
356 struct dst_entry *dst;
359 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
362 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
364 /* Drop requests trying to increase our current mss.
365 * Check done in __ip6_rt_update_pmtu() is too late.
367 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
370 dst = inet6_csk_update_pmtu(sk, mtu);
374 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
375 tcp_sync_mss(sk, dst_mtu(dst));
376 tcp_simple_retransmit(sk);
380 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
381 u8 type, u8 code, int offset, __be32 info)
383 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
384 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
385 struct net *net = dev_net(skb->dev);
386 struct request_sock *fastopen;
387 struct ipv6_pinfo *np;
394 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
395 &hdr->daddr, th->dest,
396 &hdr->saddr, ntohs(th->source),
397 skb->dev->ifindex, inet6_sdif(skb));
400 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
405 if (sk->sk_state == TCP_TIME_WAIT) {
406 inet_twsk_put(inet_twsk(sk));
409 seq = ntohl(th->seq);
410 fatal = icmpv6_err_convert(type, code, &err);
411 if (sk->sk_state == TCP_NEW_SYN_RECV) {
412 tcp_req_err(sk, seq, fatal);
417 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
418 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
420 if (sk->sk_state == TCP_CLOSE)
423 if (static_branch_unlikely(&ip6_min_hopcount)) {
424 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
425 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
426 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
432 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
433 fastopen = rcu_dereference(tp->fastopen_rsk);
434 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
435 if (sk->sk_state != TCP_LISTEN &&
436 !between(seq, snd_una, tp->snd_nxt)) {
437 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
441 np = tcp_inet6_sk(sk);
443 if (type == NDISC_REDIRECT) {
444 if (!sock_owned_by_user(sk)) {
445 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
448 dst->ops->redirect(dst, sk, skb);
453 if (type == ICMPV6_PKT_TOOBIG) {
454 u32 mtu = ntohl(info);
456 /* We are not interested in TCP_LISTEN and open_requests
457 * (SYN-ACKs send out by Linux are always <576bytes so
458 * they should go through unfragmented).
460 if (sk->sk_state == TCP_LISTEN)
463 if (!ip6_sk_accept_pmtu(sk))
466 if (mtu < IPV6_MIN_MTU)
469 WRITE_ONCE(tp->mtu_info, mtu);
471 if (!sock_owned_by_user(sk))
472 tcp_v6_mtu_reduced(sk);
473 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
480 /* Might be for an request_sock */
481 switch (sk->sk_state) {
484 /* Only in fast or simultaneous open. If a fast open socket is
485 * already accepted it is treated as a connected one below.
487 if (fastopen && !fastopen->sk)
490 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
492 if (!sock_owned_by_user(sk)) {
493 WRITE_ONCE(sk->sk_err, err);
494 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
498 WRITE_ONCE(sk->sk_err_soft, err);
504 /* check if this ICMP message allows revert of backoff.
507 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
508 code == ICMPV6_NOROUTE)
509 tcp_ld_RTO_revert(sk, seq);
512 if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
513 WRITE_ONCE(sk->sk_err, err);
516 WRITE_ONCE(sk->sk_err_soft, err);
525 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
527 struct request_sock *req,
528 struct tcp_fastopen_cookie *foc,
529 enum tcp_synack_type synack_type,
530 struct sk_buff *syn_skb)
532 struct inet_request_sock *ireq = inet_rsk(req);
533 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
534 struct ipv6_txoptions *opt;
535 struct flowi6 *fl6 = &fl->u.ip6;
540 /* First, grab a route. */
541 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
542 IPPROTO_TCP)) == NULL)
545 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
548 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
549 &ireq->ir_v6_rmt_addr);
551 fl6->daddr = ireq->ir_v6_rmt_addr;
552 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts)
553 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
555 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
556 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
557 (np->tclass & INET_ECN_MASK) :
560 if (!INET_ECN_is_capable(tclass) &&
561 tcp_bpf_ca_needs_ecn((struct sock *)req))
562 tclass |= INET_ECN_ECT_0;
565 opt = ireq->ipv6_opt;
567 opt = rcu_dereference(np->opt);
568 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
569 opt, tclass, READ_ONCE(sk->sk_priority));
571 err = net_xmit_eval(err);
579 static void tcp_v6_reqsk_destructor(struct request_sock *req)
581 kfree(inet_rsk(req)->ipv6_opt);
582 consume_skb(inet_rsk(req)->pktopts);
585 #ifdef CONFIG_TCP_MD5SIG
586 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
587 const struct in6_addr *addr,
590 return tcp_md5_do_lookup(sk, l3index,
591 (union tcp_md5_addr *)addr, AF_INET6);
594 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
595 const struct sock *addr_sk)
599 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
600 addr_sk->sk_bound_dev_if);
601 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
605 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
606 sockptr_t optval, int optlen)
608 struct tcp_md5sig cmd;
609 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
614 if (optlen < sizeof(cmd))
617 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
620 if (sin6->sin6_family != AF_INET6)
623 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
625 if (optname == TCP_MD5SIG_EXT &&
626 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
627 prefixlen = cmd.tcpm_prefixlen;
628 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
632 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
635 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
636 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
637 struct net_device *dev;
640 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
641 if (dev && netif_is_l3_master(dev))
642 l3index = dev->ifindex;
645 /* ok to reference set/not set outside of rcu;
646 * right now device MUST be an L3 master
648 if (!dev || !l3index)
652 if (!cmd.tcpm_keylen) {
653 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
654 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
657 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
658 AF_INET6, prefixlen, l3index, flags);
661 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
664 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
665 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
666 AF_INET, prefixlen, l3index, flags,
667 cmd.tcpm_key, cmd.tcpm_keylen);
669 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
670 AF_INET6, prefixlen, l3index, flags,
671 cmd.tcpm_key, cmd.tcpm_keylen);
674 static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp,
675 const struct in6_addr *daddr,
676 const struct in6_addr *saddr,
677 const struct tcphdr *th, int nbytes)
679 struct tcp6_pseudohdr *bp;
680 struct scatterlist sg;
684 /* 1. TCP pseudo-header (RFC2460) */
687 bp->protocol = cpu_to_be32(IPPROTO_TCP);
688 bp->len = cpu_to_be32(nbytes);
690 _th = (struct tcphdr *)(bp + 1);
691 memcpy(_th, th, sizeof(*th));
694 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
695 ahash_request_set_crypt(hp->req, &sg, NULL,
696 sizeof(*bp) + sizeof(*th));
697 return crypto_ahash_update(hp->req);
700 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
701 const struct in6_addr *daddr, struct in6_addr *saddr,
702 const struct tcphdr *th)
704 struct tcp_sigpool hp;
706 if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
707 goto clear_hash_nostart;
709 if (crypto_ahash_init(hp.req))
711 if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2))
713 if (tcp_md5_hash_key(&hp, key))
715 ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
716 if (crypto_ahash_final(hp.req))
719 tcp_sigpool_end(&hp);
723 tcp_sigpool_end(&hp);
725 memset(md5_hash, 0, 16);
729 static int tcp_v6_md5_hash_skb(char *md5_hash,
730 const struct tcp_md5sig_key *key,
731 const struct sock *sk,
732 const struct sk_buff *skb)
734 const struct tcphdr *th = tcp_hdr(skb);
735 const struct in6_addr *saddr, *daddr;
736 struct tcp_sigpool hp;
738 if (sk) { /* valid for establish/request sockets */
739 saddr = &sk->sk_v6_rcv_saddr;
740 daddr = &sk->sk_v6_daddr;
742 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
743 saddr = &ip6h->saddr;
744 daddr = &ip6h->daddr;
747 if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
748 goto clear_hash_nostart;
750 if (crypto_ahash_init(hp.req))
753 if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len))
755 if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2))
757 if (tcp_md5_hash_key(&hp, key))
759 ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
760 if (crypto_ahash_final(hp.req))
763 tcp_sigpool_end(&hp);
767 tcp_sigpool_end(&hp);
769 memset(md5_hash, 0, 16);
775 static void tcp_v6_init_req(struct request_sock *req,
776 const struct sock *sk_listener,
779 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
780 struct inet_request_sock *ireq = inet_rsk(req);
781 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
783 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
784 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
786 /* So that link locals have meaning */
787 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
788 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
789 ireq->ir_iif = tcp_v6_iif(skb);
791 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
792 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
793 np->rxopt.bits.rxinfo ||
794 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
795 np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) {
796 refcount_inc(&skb->users);
801 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
804 struct request_sock *req)
806 tcp_v6_init_req(req, sk, skb);
808 if (security_inet_conn_request(sk, skb, req))
811 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
814 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
816 .obj_size = sizeof(struct tcp6_request_sock),
817 .rtx_syn_ack = tcp_rtx_synack,
818 .send_ack = tcp_v6_reqsk_send_ack,
819 .destructor = tcp_v6_reqsk_destructor,
820 .send_reset = tcp_v6_send_reset,
821 .syn_ack_timeout = tcp_syn_ack_timeout,
824 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
825 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
826 sizeof(struct ipv6hdr),
827 #ifdef CONFIG_TCP_MD5SIG
828 .req_md5_lookup = tcp_v6_md5_lookup,
829 .calc_md5_hash = tcp_v6_md5_hash_skb,
831 #ifdef CONFIG_SYN_COOKIES
832 .cookie_init_seq = cookie_v6_init_sequence,
834 .route_req = tcp_v6_route_req,
835 .init_seq = tcp_v6_init_seq,
836 .init_ts_off = tcp_v6_init_ts_off,
837 .send_synack = tcp_v6_send_synack,
840 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
841 u32 ack, u32 win, u32 tsval, u32 tsecr,
842 int oif, struct tcp_md5sig_key *key, int rst,
843 u8 tclass, __be32 label, u32 priority, u32 txhash)
845 const struct tcphdr *th = tcp_hdr(skb);
847 struct sk_buff *buff;
849 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
850 struct sock *ctl_sk = net->ipv6.tcp_sk;
851 unsigned int tot_len = sizeof(struct tcphdr);
852 __be32 mrst = 0, *topt;
853 struct dst_entry *dst;
857 tot_len += TCPOLEN_TSTAMP_ALIGNED;
858 #ifdef CONFIG_TCP_MD5SIG
860 tot_len += TCPOLEN_MD5SIG_ALIGNED;
865 mrst = mptcp_reset_option(skb);
868 tot_len += sizeof(__be32);
872 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
876 skb_reserve(buff, MAX_TCP_HEADER);
878 t1 = skb_push(buff, tot_len);
879 skb_reset_transport_header(buff);
881 /* Swap the send and the receive. */
882 memset(t1, 0, sizeof(*t1));
883 t1->dest = th->source;
884 t1->source = th->dest;
885 t1->doff = tot_len / 4;
886 t1->seq = htonl(seq);
887 t1->ack_seq = htonl(ack);
888 t1->ack = !rst || !th->ack;
890 t1->window = htons(win);
892 topt = (__be32 *)(t1 + 1);
895 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
896 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
897 *topt++ = htonl(tsval);
898 *topt++ = htonl(tsecr);
904 #ifdef CONFIG_TCP_MD5SIG
906 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
907 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
908 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
909 &ipv6_hdr(skb)->saddr,
910 &ipv6_hdr(skb)->daddr, t1);
914 memset(&fl6, 0, sizeof(fl6));
915 fl6.daddr = ipv6_hdr(skb)->saddr;
916 fl6.saddr = ipv6_hdr(skb)->daddr;
917 fl6.flowlabel = label;
919 buff->ip_summed = CHECKSUM_PARTIAL;
921 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
923 fl6.flowi6_proto = IPPROTO_TCP;
924 if (rt6_need_strict(&fl6.daddr) && !oif)
925 fl6.flowi6_oif = tcp_v6_iif(skb);
927 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
930 fl6.flowi6_oif = oif;
934 if (sk->sk_state == TCP_TIME_WAIT)
935 mark = inet_twsk(sk)->tw_mark;
937 mark = READ_ONCE(sk->sk_mark);
938 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
941 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
942 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
944 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
945 fl6.fl6_dport = t1->dest;
946 fl6.fl6_sport = t1->source;
947 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
948 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
950 /* Pass a socket to ip6_dst_lookup either it is for RST
951 * Underlying function will use this to retrieve the network
954 if (sk && sk->sk_state != TCP_TIME_WAIT)
955 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
957 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
959 skb_dst_set(buff, dst);
960 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
961 tclass & ~INET_ECN_MASK, priority);
962 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
964 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
971 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
973 const struct tcphdr *th = tcp_hdr(skb);
974 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
975 u32 seq = 0, ack_seq = 0;
976 struct tcp_md5sig_key *key = NULL;
977 #ifdef CONFIG_TCP_MD5SIG
978 const __u8 *hash_location = NULL;
979 unsigned char newhash[16];
981 struct sock *sk1 = NULL;
992 /* If sk not NULL, it means we did a successful lookup and incoming
993 * route had to be correct. prequeue might have dropped our dst.
995 if (!sk && !ipv6_unicast_destination(skb))
998 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
999 #ifdef CONFIG_TCP_MD5SIG
1001 hash_location = tcp_parse_md5sig_option(th);
1002 if (sk && sk_fullsock(sk)) {
1005 /* sdif set, means packet ingressed via a device
1006 * in an L3 domain and inet_iif is set to it.
1008 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1009 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1010 } else if (hash_location) {
1011 int dif = tcp_v6_iif_l3_slave(skb);
1012 int sdif = tcp_v6_sdif(skb);
1016 * active side is lost. Try to find listening socket through
1017 * source port, and then find md5 key through listening socket.
1018 * we are not loose security here:
1019 * Incoming packet is checked with md5 hash with finding key,
1020 * no RST generated if md5 hash doesn't match.
1022 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1023 NULL, 0, &ipv6h->saddr, th->source,
1024 &ipv6h->daddr, ntohs(th->source),
1029 /* sdif set, means packet ingressed via a device
1030 * in an L3 domain and dif is set to it.
1032 l3index = tcp_v6_sdif(skb) ? dif : 0;
1034 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1038 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1039 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1045 seq = ntohl(th->ack_seq);
1047 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1051 oif = sk->sk_bound_dev_if;
1052 if (sk_fullsock(sk)) {
1053 trace_tcp_send_reset(sk, skb);
1054 if (inet6_test_bit(REPFLOW, sk))
1055 label = ip6_flowlabel(ipv6h);
1056 priority = READ_ONCE(sk->sk_priority);
1057 txhash = sk->sk_txhash;
1059 if (sk->sk_state == TCP_TIME_WAIT) {
1060 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1061 priority = inet_twsk(sk)->tw_priority;
1062 txhash = inet_twsk(sk)->tw_txhash;
1065 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1066 label = ip6_flowlabel(ipv6h);
1069 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1070 ipv6_get_dsfield(ipv6h), label, priority, txhash);
1072 #ifdef CONFIG_TCP_MD5SIG
1078 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1079 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1080 struct tcp_md5sig_key *key, u8 tclass,
1081 __be32 label, u32 priority, u32 txhash)
1083 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1084 tclass, label, priority, txhash);
1087 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1089 struct inet_timewait_sock *tw = inet_twsk(sk);
1090 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1092 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1093 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1094 tcp_tw_tsval(tcptw),
1095 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1096 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1102 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1103 struct request_sock *req)
1107 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1109 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1110 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1113 * The window field (SEG.WND) of every outgoing segment, with the
1114 * exception of <SYN> segments, MUST be right-shifted by
1115 * Rcv.Wind.Shift bits:
1117 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1118 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1119 tcp_rsk(req)->rcv_nxt,
1120 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1121 tcp_rsk_tsval(tcp_rsk(req)),
1122 READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1123 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1124 ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1125 READ_ONCE(sk->sk_priority),
1126 READ_ONCE(tcp_rsk(req)->txhash));
1130 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1132 #ifdef CONFIG_SYN_COOKIES
1133 const struct tcphdr *th = tcp_hdr(skb);
1136 sk = cookie_v6_check(sk, skb);
1141 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1142 struct tcphdr *th, u32 *cookie)
1145 #ifdef CONFIG_SYN_COOKIES
1146 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1147 &tcp_request_sock_ipv6_ops, sk, th);
1149 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1150 tcp_synq_overflow(sk);
1156 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1158 if (skb->protocol == htons(ETH_P_IP))
1159 return tcp_v4_conn_request(sk, skb);
1161 if (!ipv6_unicast_destination(skb))
1164 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1165 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1169 return tcp_conn_request(&tcp6_request_sock_ops,
1170 &tcp_request_sock_ipv6_ops, sk, skb);
1174 return 0; /* don't send reset */
1177 static void tcp_v6_restore_cb(struct sk_buff *skb)
1179 /* We need to move header back to the beginning if xfrm6_policy_check()
1180 * and tcp_v6_fill_cb() are going to be called again.
1181 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1183 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1184 sizeof(struct inet6_skb_parm));
1187 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1188 struct request_sock *req,
1189 struct dst_entry *dst,
1190 struct request_sock *req_unhash,
1193 struct inet_request_sock *ireq;
1194 struct ipv6_pinfo *newnp;
1195 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1196 struct ipv6_txoptions *opt;
1197 struct inet_sock *newinet;
1198 bool found_dup_sk = false;
1199 struct tcp_sock *newtp;
1201 #ifdef CONFIG_TCP_MD5SIG
1202 struct tcp_md5sig_key *key;
1207 if (skb->protocol == htons(ETH_P_IP)) {
1212 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1213 req_unhash, own_req);
1218 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1220 newnp = tcp_inet6_sk(newsk);
1221 newtp = tcp_sk(newsk);
1223 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1225 newnp->saddr = newsk->sk_v6_rcv_saddr;
1227 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1228 if (sk_is_mptcp(newsk))
1229 mptcpv6_handle_mapped(newsk, true);
1230 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1231 #ifdef CONFIG_TCP_MD5SIG
1232 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1235 newnp->ipv6_mc_list = NULL;
1236 newnp->ipv6_ac_list = NULL;
1237 newnp->ipv6_fl_list = NULL;
1238 newnp->pktoptions = NULL;
1240 newnp->mcast_oif = inet_iif(skb);
1241 newnp->mcast_hops = ip_hdr(skb)->ttl;
1242 newnp->rcv_flowinfo = 0;
1243 if (inet6_test_bit(REPFLOW, sk))
1244 newnp->flow_label = 0;
1247 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1248 * here, tcp_create_openreq_child now does this for us, see the comment in
1249 * that function for the gory details. -acme
1252 /* It is tricky place. Until this moment IPv4 tcp
1253 worked with IPv6 icsk.icsk_af_ops.
1256 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1261 ireq = inet_rsk(req);
1263 if (sk_acceptq_is_full(sk))
1267 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1272 newsk = tcp_create_openreq_child(sk, req, skb);
1277 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1278 * count here, tcp_create_openreq_child now does this for us, see the
1279 * comment in that function for the gory details. -acme
1282 newsk->sk_gso_type = SKB_GSO_TCPV6;
1283 ip6_dst_store(newsk, dst, NULL, NULL);
1284 inet6_sk_rx_dst_set(newsk, skb);
1286 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1288 newtp = tcp_sk(newsk);
1289 newinet = inet_sk(newsk);
1290 newnp = tcp_inet6_sk(newsk);
1292 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1294 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1295 newnp->saddr = ireq->ir_v6_loc_addr;
1296 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1297 newsk->sk_bound_dev_if = ireq->ir_iif;
1299 /* Now IPv6 options...
1301 First: no IPv4 options.
1303 newinet->inet_opt = NULL;
1304 newnp->ipv6_mc_list = NULL;
1305 newnp->ipv6_ac_list = NULL;
1306 newnp->ipv6_fl_list = NULL;
1309 newnp->rxopt.all = np->rxopt.all;
1311 newnp->pktoptions = NULL;
1313 newnp->mcast_oif = tcp_v6_iif(skb);
1314 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1315 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1316 if (inet6_test_bit(REPFLOW, sk))
1317 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1319 /* Set ToS of the new socket based upon the value of incoming SYN.
1320 * ECT bits are set later in tcp_init_transfer().
1322 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1323 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1325 /* Clone native IPv6 options from listening socket (if any)
1327 Yes, keeping reference count would be much more clever,
1328 but we make one more one thing there: reattach optmem
1331 opt = ireq->ipv6_opt;
1333 opt = rcu_dereference(np->opt);
1335 opt = ipv6_dup_options(newsk, opt);
1336 RCU_INIT_POINTER(newnp->opt, opt);
1338 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1340 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1343 tcp_ca_openreq_child(newsk, dst);
1345 tcp_sync_mss(newsk, dst_mtu(dst));
1346 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1348 tcp_initialize_rcv_mss(newsk);
1350 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1351 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1353 #ifdef CONFIG_TCP_MD5SIG
1354 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1356 /* Copy over the MD5 key from the original socket */
1357 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1359 const union tcp_md5_addr *addr;
1361 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1362 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1363 inet_csk_prepare_forced_close(newsk);
1370 if (__inet_inherit_port(sk, newsk) < 0) {
1371 inet_csk_prepare_forced_close(newsk);
1375 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1378 tcp_move_syn(newtp, req);
1380 /* Clone pktoptions received with SYN, if we own the req */
1381 if (ireq->pktopts) {
1382 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1383 consume_skb(ireq->pktopts);
1384 ireq->pktopts = NULL;
1385 if (newnp->pktoptions)
1386 tcp_v6_restore_cb(newnp->pktoptions);
1389 if (!req_unhash && found_dup_sk) {
1390 /* This code path should only be executed in the
1391 * syncookie case only
1393 bh_unlock_sock(newsk);
1402 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1410 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1412 /* The socket must have it's spinlock held when we get
1413 * here, unless it is a TCP_LISTEN socket.
1415 * We have a potential double-lock case here, so even when
1416 * doing backlog processing we use the BH locking scheme.
1417 * This is because we cannot sleep with the original spinlock
1420 INDIRECT_CALLABLE_SCOPE
1421 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1423 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1424 struct sk_buff *opt_skb = NULL;
1425 enum skb_drop_reason reason;
1426 struct tcp_sock *tp;
1428 /* Imagine: socket is IPv6. IPv4 packet arrives,
1429 goes to IPv4 receive handler and backlogged.
1430 From backlog it always goes here. Kerboom...
1431 Fortunately, tcp_rcv_established and rcv_established
1432 handle them correctly, but it is not case with
1433 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1436 if (skb->protocol == htons(ETH_P_IP))
1437 return tcp_v4_do_rcv(sk, skb);
1440 * socket locking is here for SMP purposes as backlog rcv
1441 * is currently called with bh processing disabled.
1444 /* Do Stevens' IPV6_PKTOPTIONS.
1446 Yes, guys, it is the only place in our code, where we
1447 may make it not affecting IPv4.
1448 The rest of code is protocol independent,
1449 and I do not like idea to uglify IPv4.
1451 Actually, all the idea behind IPV6_PKTOPTIONS
1452 looks not very well thought. For now we latch
1453 options, received in the last packet, enqueued
1454 by tcp. Feel free to propose better solution.
1458 opt_skb = skb_clone_and_charge_r(skb, sk);
1460 reason = SKB_DROP_REASON_NOT_SPECIFIED;
1461 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1462 struct dst_entry *dst;
1464 dst = rcu_dereference_protected(sk->sk_rx_dst,
1465 lockdep_sock_is_held(sk));
1467 sock_rps_save_rxhash(sk, skb);
1468 sk_mark_napi_id(sk, skb);
1470 if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1471 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1472 dst, sk->sk_rx_dst_cookie) == NULL) {
1473 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1478 tcp_rcv_established(sk, skb);
1480 goto ipv6_pktoptions;
1484 if (tcp_checksum_complete(skb))
1487 if (sk->sk_state == TCP_LISTEN) {
1488 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1494 if (tcp_child_process(sk, nsk, skb))
1497 __kfree_skb(opt_skb);
1501 sock_rps_save_rxhash(sk, skb);
1503 if (tcp_rcv_state_process(sk, skb))
1506 goto ipv6_pktoptions;
1510 tcp_v6_send_reset(sk, skb);
1513 __kfree_skb(opt_skb);
1514 kfree_skb_reason(skb, reason);
1517 reason = SKB_DROP_REASON_TCP_CSUM;
1518 trace_tcp_bad_csum(skb);
1519 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1520 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1525 /* Do you ask, what is it?
1527 1. skb was enqueued by tcp.
1528 2. skb is added to tail of read queue, rather than out of order.
1529 3. socket is not in passive state.
1530 4. Finally, it really contains options, which user wants to receive.
1533 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1534 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1535 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1536 np->mcast_oif = tcp_v6_iif(opt_skb);
1537 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1538 WRITE_ONCE(np->mcast_hops,
1539 ipv6_hdr(opt_skb)->hop_limit);
1540 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1541 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1542 if (inet6_test_bit(REPFLOW, sk))
1543 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1544 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1545 tcp_v6_restore_cb(opt_skb);
1546 opt_skb = xchg(&np->pktoptions, opt_skb);
1548 __kfree_skb(opt_skb);
1549 opt_skb = xchg(&np->pktoptions, NULL);
1553 consume_skb(opt_skb);
1557 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1558 const struct tcphdr *th)
1560 /* This is tricky: we move IP6CB at its correct location into
1561 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1562 * _decode_session6() uses IP6CB().
1563 * barrier() makes sure compiler won't play aliasing games.
1565 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1566 sizeof(struct inet6_skb_parm));
1569 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1570 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1571 skb->len - th->doff*4);
1572 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1573 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1574 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1575 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1576 TCP_SKB_CB(skb)->sacked = 0;
1577 TCP_SKB_CB(skb)->has_rxtstamp =
1578 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1581 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1583 enum skb_drop_reason drop_reason;
1584 int sdif = inet6_sdif(skb);
1585 int dif = inet6_iif(skb);
1586 const struct tcphdr *th;
1587 const struct ipv6hdr *hdr;
1591 struct net *net = dev_net(skb->dev);
1593 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1594 if (skb->pkt_type != PACKET_HOST)
1598 * Count it even if it's bad.
1600 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1602 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1605 th = (const struct tcphdr *)skb->data;
1607 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1608 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1611 if (!pskb_may_pull(skb, th->doff*4))
1614 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1617 th = (const struct tcphdr *)skb->data;
1618 hdr = ipv6_hdr(skb);
1621 sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1622 th->source, th->dest, inet6_iif(skb), sdif,
1628 if (sk->sk_state == TCP_TIME_WAIT)
1631 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1632 struct request_sock *req = inet_reqsk(sk);
1633 bool req_stolen = false;
1636 sk = req->rsk_listener;
1637 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1638 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1640 drop_reason = tcp_inbound_md5_hash(sk, skb,
1641 &hdr->saddr, &hdr->daddr,
1642 AF_INET6, dif, sdif);
1644 sk_drops_add(sk, skb);
1648 if (tcp_checksum_complete(skb)) {
1652 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1653 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1655 inet_csk_reqsk_queue_drop_and_put(sk, req);
1659 /* reuseport_migrate_sock() has already held one sk_refcnt
1667 if (!tcp_filter(sk, skb)) {
1668 th = (const struct tcphdr *)skb->data;
1669 hdr = ipv6_hdr(skb);
1670 tcp_v6_fill_cb(skb, hdr, th);
1671 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1673 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1678 /* Another cpu got exclusive access to req
1679 * and created a full blown socket.
1680 * Try to feed this packet to this socket
1681 * instead of discarding it.
1683 tcp_v6_restore_cb(skb);
1687 goto discard_and_relse;
1692 tcp_v6_restore_cb(skb);
1693 } else if (tcp_child_process(sk, nsk, skb)) {
1694 tcp_v6_send_reset(nsk, skb);
1695 goto discard_and_relse;
1702 if (static_branch_unlikely(&ip6_min_hopcount)) {
1703 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1704 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1705 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1706 drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1707 goto discard_and_relse;
1711 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1712 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1713 goto discard_and_relse;
1716 drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1717 AF_INET6, dif, sdif);
1719 goto discard_and_relse;
1723 if (tcp_filter(sk, skb)) {
1724 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1725 goto discard_and_relse;
1727 th = (const struct tcphdr *)skb->data;
1728 hdr = ipv6_hdr(skb);
1729 tcp_v6_fill_cb(skb, hdr, th);
1733 if (sk->sk_state == TCP_LISTEN) {
1734 ret = tcp_v6_do_rcv(sk, skb);
1735 goto put_and_return;
1738 sk_incoming_cpu_update(sk);
1740 bh_lock_sock_nested(sk);
1741 tcp_segs_in(tcp_sk(sk), skb);
1743 if (!sock_owned_by_user(sk)) {
1744 ret = tcp_v6_do_rcv(sk, skb);
1746 if (tcp_add_backlog(sk, skb, &drop_reason))
1747 goto discard_and_relse;
1753 return ret ? -1 : 0;
1756 drop_reason = SKB_DROP_REASON_NO_SOCKET;
1757 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1760 tcp_v6_fill_cb(skb, hdr, th);
1762 if (tcp_checksum_complete(skb)) {
1764 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1765 trace_tcp_bad_csum(skb);
1766 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1768 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1770 tcp_v6_send_reset(NULL, skb);
1774 SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1775 kfree_skb_reason(skb, drop_reason);
1779 sk_drops_add(sk, skb);
1785 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1786 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1787 inet_twsk_put(inet_twsk(sk));
1791 tcp_v6_fill_cb(skb, hdr, th);
1793 if (tcp_checksum_complete(skb)) {
1794 inet_twsk_put(inet_twsk(sk));
1798 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1803 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1804 skb, __tcp_hdrlen(th),
1805 &ipv6_hdr(skb)->saddr, th->source,
1806 &ipv6_hdr(skb)->daddr,
1808 tcp_v6_iif_l3_slave(skb),
1811 struct inet_timewait_sock *tw = inet_twsk(sk);
1812 inet_twsk_deschedule_put(tw);
1814 tcp_v6_restore_cb(skb);
1822 tcp_v6_timewait_ack(sk, skb);
1825 tcp_v6_send_reset(sk, skb);
1826 inet_twsk_deschedule_put(inet_twsk(sk));
1828 case TCP_TW_SUCCESS:
1834 void tcp_v6_early_demux(struct sk_buff *skb)
1836 struct net *net = dev_net(skb->dev);
1837 const struct ipv6hdr *hdr;
1838 const struct tcphdr *th;
1841 if (skb->pkt_type != PACKET_HOST)
1844 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1847 hdr = ipv6_hdr(skb);
1850 if (th->doff < sizeof(struct tcphdr) / 4)
1853 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1854 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1855 &hdr->saddr, th->source,
1856 &hdr->daddr, ntohs(th->dest),
1857 inet6_iif(skb), inet6_sdif(skb));
1860 skb->destructor = sock_edemux;
1861 if (sk_fullsock(sk)) {
1862 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1865 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1867 sk->sk_rx_dst_ifindex == skb->skb_iif)
1868 skb_dst_set_noref(skb, dst);
1873 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1874 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1875 .twsk_unique = tcp_twsk_unique,
1876 .twsk_destructor = tcp_twsk_destructor,
1879 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1881 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1884 const struct inet_connection_sock_af_ops ipv6_specific = {
1885 .queue_xmit = inet6_csk_xmit,
1886 .send_check = tcp_v6_send_check,
1887 .rebuild_header = inet6_sk_rebuild_header,
1888 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1889 .conn_request = tcp_v6_conn_request,
1890 .syn_recv_sock = tcp_v6_syn_recv_sock,
1891 .net_header_len = sizeof(struct ipv6hdr),
1892 .setsockopt = ipv6_setsockopt,
1893 .getsockopt = ipv6_getsockopt,
1894 .addr2sockaddr = inet6_csk_addr2sockaddr,
1895 .sockaddr_len = sizeof(struct sockaddr_in6),
1896 .mtu_reduced = tcp_v6_mtu_reduced,
1899 #ifdef CONFIG_TCP_MD5SIG
1900 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1901 .md5_lookup = tcp_v6_md5_lookup,
1902 .calc_md5_hash = tcp_v6_md5_hash_skb,
1903 .md5_parse = tcp_v6_parse_md5_keys,
1908 * TCP over IPv4 via INET6 API
1910 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1911 .queue_xmit = ip_queue_xmit,
1912 .send_check = tcp_v4_send_check,
1913 .rebuild_header = inet_sk_rebuild_header,
1914 .sk_rx_dst_set = inet_sk_rx_dst_set,
1915 .conn_request = tcp_v6_conn_request,
1916 .syn_recv_sock = tcp_v6_syn_recv_sock,
1917 .net_header_len = sizeof(struct iphdr),
1918 .setsockopt = ipv6_setsockopt,
1919 .getsockopt = ipv6_getsockopt,
1920 .addr2sockaddr = inet6_csk_addr2sockaddr,
1921 .sockaddr_len = sizeof(struct sockaddr_in6),
1922 .mtu_reduced = tcp_v4_mtu_reduced,
1925 #ifdef CONFIG_TCP_MD5SIG
1926 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1927 .md5_lookup = tcp_v4_md5_lookup,
1928 .calc_md5_hash = tcp_v4_md5_hash_skb,
1929 .md5_parse = tcp_v6_parse_md5_keys,
1933 /* NOTE: A lot of things set to zero explicitly by call to
1934 * sk_alloc() so need not be done here.
1936 static int tcp_v6_init_sock(struct sock *sk)
1938 struct inet_connection_sock *icsk = inet_csk(sk);
1942 icsk->icsk_af_ops = &ipv6_specific;
1944 #ifdef CONFIG_TCP_MD5SIG
1945 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1951 #ifdef CONFIG_PROC_FS
1952 /* Proc filesystem TCPv6 sock list dumping. */
1953 static void get_openreq6(struct seq_file *seq,
1954 const struct request_sock *req, int i)
1956 long ttd = req->rsk_timer.expires - jiffies;
1957 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1958 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1964 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1965 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1967 src->s6_addr32[0], src->s6_addr32[1],
1968 src->s6_addr32[2], src->s6_addr32[3],
1969 inet_rsk(req)->ir_num,
1970 dest->s6_addr32[0], dest->s6_addr32[1],
1971 dest->s6_addr32[2], dest->s6_addr32[3],
1972 ntohs(inet_rsk(req)->ir_rmt_port),
1974 0, 0, /* could print option size, but that is af dependent. */
1975 1, /* timers active (only the expire timer) */
1976 jiffies_to_clock_t(ttd),
1978 from_kuid_munged(seq_user_ns(seq),
1979 sock_i_uid(req->rsk_listener)),
1980 0, /* non standard timer */
1981 0, /* open_requests have no inode */
1985 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1987 const struct in6_addr *dest, *src;
1990 unsigned long timer_expires;
1991 const struct inet_sock *inet = inet_sk(sp);
1992 const struct tcp_sock *tp = tcp_sk(sp);
1993 const struct inet_connection_sock *icsk = inet_csk(sp);
1994 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1998 dest = &sp->sk_v6_daddr;
1999 src = &sp->sk_v6_rcv_saddr;
2000 destp = ntohs(inet->inet_dport);
2001 srcp = ntohs(inet->inet_sport);
2003 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2004 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2005 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2007 timer_expires = icsk->icsk_timeout;
2008 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2010 timer_expires = icsk->icsk_timeout;
2011 } else if (timer_pending(&sp->sk_timer)) {
2013 timer_expires = sp->sk_timer.expires;
2016 timer_expires = jiffies;
2019 state = inet_sk_state_load(sp);
2020 if (state == TCP_LISTEN)
2021 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2023 /* Because we don't lock the socket,
2024 * we might find a transient negative value.
2026 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2027 READ_ONCE(tp->copied_seq), 0);
2030 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2031 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2033 src->s6_addr32[0], src->s6_addr32[1],
2034 src->s6_addr32[2], src->s6_addr32[3], srcp,
2035 dest->s6_addr32[0], dest->s6_addr32[1],
2036 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2038 READ_ONCE(tp->write_seq) - tp->snd_una,
2041 jiffies_delta_to_clock_t(timer_expires - jiffies),
2042 icsk->icsk_retransmits,
2043 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2044 icsk->icsk_probes_out,
2046 refcount_read(&sp->sk_refcnt), sp,
2047 jiffies_to_clock_t(icsk->icsk_rto),
2048 jiffies_to_clock_t(icsk->icsk_ack.ato),
2049 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2051 state == TCP_LISTEN ?
2052 fastopenq->max_qlen :
2053 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2057 static void get_timewait6_sock(struct seq_file *seq,
2058 struct inet_timewait_sock *tw, int i)
2060 long delta = tw->tw_timer.expires - jiffies;
2061 const struct in6_addr *dest, *src;
2064 dest = &tw->tw_v6_daddr;
2065 src = &tw->tw_v6_rcv_saddr;
2066 destp = ntohs(tw->tw_dport);
2067 srcp = ntohs(tw->tw_sport);
2070 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2071 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2073 src->s6_addr32[0], src->s6_addr32[1],
2074 src->s6_addr32[2], src->s6_addr32[3], srcp,
2075 dest->s6_addr32[0], dest->s6_addr32[1],
2076 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2077 tw->tw_substate, 0, 0,
2078 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2079 refcount_read(&tw->tw_refcnt), tw);
2082 static int tcp6_seq_show(struct seq_file *seq, void *v)
2084 struct tcp_iter_state *st;
2085 struct sock *sk = v;
2087 if (v == SEQ_START_TOKEN) {
2092 "st tx_queue rx_queue tr tm->when retrnsmt"
2093 " uid timeout inode\n");
2098 if (sk->sk_state == TCP_TIME_WAIT)
2099 get_timewait6_sock(seq, v, st->num);
2100 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2101 get_openreq6(seq, v, st->num);
2103 get_tcp6_sock(seq, v, st->num);
2108 static const struct seq_operations tcp6_seq_ops = {
2109 .show = tcp6_seq_show,
2110 .start = tcp_seq_start,
2111 .next = tcp_seq_next,
2112 .stop = tcp_seq_stop,
2115 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2119 int __net_init tcp6_proc_init(struct net *net)
2121 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2122 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2127 void tcp6_proc_exit(struct net *net)
2129 remove_proc_entry("tcp6", net->proc_net);
2133 struct proto tcpv6_prot = {
2135 .owner = THIS_MODULE,
2137 .pre_connect = tcp_v6_pre_connect,
2138 .connect = tcp_v6_connect,
2139 .disconnect = tcp_disconnect,
2140 .accept = inet_csk_accept,
2142 .init = tcp_v6_init_sock,
2143 .destroy = tcp_v4_destroy_sock,
2144 .shutdown = tcp_shutdown,
2145 .setsockopt = tcp_setsockopt,
2146 .getsockopt = tcp_getsockopt,
2147 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
2148 .keepalive = tcp_set_keepalive,
2149 .recvmsg = tcp_recvmsg,
2150 .sendmsg = tcp_sendmsg,
2151 .splice_eof = tcp_splice_eof,
2152 .backlog_rcv = tcp_v6_do_rcv,
2153 .release_cb = tcp_release_cb,
2155 .unhash = inet_unhash,
2156 .get_port = inet_csk_get_port,
2157 .put_port = inet_put_port,
2158 #ifdef CONFIG_BPF_SYSCALL
2159 .psock_update_sk_prot = tcp_bpf_update_proto,
2161 .enter_memory_pressure = tcp_enter_memory_pressure,
2162 .leave_memory_pressure = tcp_leave_memory_pressure,
2163 .stream_memory_free = tcp_stream_memory_free,
2164 .sockets_allocated = &tcp_sockets_allocated,
2166 .memory_allocated = &tcp_memory_allocated,
2167 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
2169 .memory_pressure = &tcp_memory_pressure,
2170 .orphan_count = &tcp_orphan_count,
2171 .sysctl_mem = sysctl_tcp_mem,
2172 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2173 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2174 .max_header = MAX_TCP_HEADER,
2175 .obj_size = sizeof(struct tcp6_sock),
2176 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2177 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2178 .twsk_prot = &tcp6_timewait_sock_ops,
2179 .rsk_prot = &tcp6_request_sock_ops,
2181 .no_autobind = true,
2182 .diag_destroy = tcp_abort,
2184 EXPORT_SYMBOL_GPL(tcpv6_prot);
2186 static const struct inet6_protocol tcpv6_protocol = {
2187 .handler = tcp_v6_rcv,
2188 .err_handler = tcp_v6_err,
2189 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2192 static struct inet_protosw tcpv6_protosw = {
2193 .type = SOCK_STREAM,
2194 .protocol = IPPROTO_TCP,
2195 .prot = &tcpv6_prot,
2196 .ops = &inet6_stream_ops,
2197 .flags = INET_PROTOSW_PERMANENT |
2201 static int __net_init tcpv6_net_init(struct net *net)
2203 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2204 SOCK_RAW, IPPROTO_TCP, net);
2207 static void __net_exit tcpv6_net_exit(struct net *net)
2209 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2212 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2214 tcp_twsk_purge(net_exit_list, AF_INET6);
2217 static struct pernet_operations tcpv6_net_ops = {
2218 .init = tcpv6_net_init,
2219 .exit = tcpv6_net_exit,
2220 .exit_batch = tcpv6_net_exit_batch,
2223 int __init tcpv6_init(void)
2227 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2231 /* register inet6 protocol */
2232 ret = inet6_register_protosw(&tcpv6_protosw);
2234 goto out_tcpv6_protocol;
2236 ret = register_pernet_subsys(&tcpv6_net_ops);
2238 goto out_tcpv6_protosw;
2240 ret = mptcpv6_init();
2242 goto out_tcpv6_pernet_subsys;
2247 out_tcpv6_pernet_subsys:
2248 unregister_pernet_subsys(&tcpv6_net_ops);
2250 inet6_unregister_protosw(&tcpv6_protosw);
2252 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2256 void tcpv6_exit(void)
2258 unregister_pernet_subsys(&tcpv6_net_ops);
2259 inet6_unregister_protosw(&tcpv6_protosw);
2260 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);