3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
42 #include <linux/uaccess.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/inet_common.h>
63 #include <net/secure_seq.h>
64 #include <net/busy_poll.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 #include <crypto/hash.h>
70 #include <linux/scatterlist.h>
72 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
73 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
74 struct request_sock *req);
76 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
78 static const struct inet_connection_sock_af_ops ipv6_mapped;
79 static const struct inet_connection_sock_af_ops ipv6_specific;
80 #ifdef CONFIG_TCP_MD5SIG
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
82 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
84 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
85 const struct in6_addr *addr)
91 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
93 struct dst_entry *dst = skb_dst(skb);
95 if (dst && dst_hold_safe(dst)) {
96 const struct rt6_info *rt = (const struct rt6_info *)dst;
99 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
100 inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
104 static u32 tcp_v6_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff)
106 return secure_tcpv6_seq_and_tsoff(ipv6_hdr(skb)->daddr.s6_addr32,
107 ipv6_hdr(skb)->saddr.s6_addr32,
109 tcp_hdr(skb)->source, tsoff);
112 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
115 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
116 struct inet_sock *inet = inet_sk(sk);
117 struct inet_connection_sock *icsk = inet_csk(sk);
118 struct ipv6_pinfo *np = inet6_sk(sk);
119 struct tcp_sock *tp = tcp_sk(sk);
120 struct in6_addr *saddr = NULL, *final_p, final;
121 struct ipv6_txoptions *opt;
123 struct dst_entry *dst;
127 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
129 if (addr_len < SIN6_LEN_RFC2133)
132 if (usin->sin6_family != AF_INET6)
133 return -EAFNOSUPPORT;
135 memset(&fl6, 0, sizeof(fl6));
138 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
139 IP6_ECN_flow_init(fl6.flowlabel);
140 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
141 struct ip6_flowlabel *flowlabel;
142 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
145 fl6_sock_release(flowlabel);
150 * connect() to INADDR_ANY means loopback (BSD'ism).
153 if (ipv6_addr_any(&usin->sin6_addr)) {
154 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
155 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
158 usin->sin6_addr = in6addr_loopback;
161 addr_type = ipv6_addr_type(&usin->sin6_addr);
163 if (addr_type & IPV6_ADDR_MULTICAST)
166 if (addr_type&IPV6_ADDR_LINKLOCAL) {
167 if (addr_len >= sizeof(struct sockaddr_in6) &&
168 usin->sin6_scope_id) {
169 /* If interface is set while binding, indices
172 if (sk->sk_bound_dev_if &&
173 sk->sk_bound_dev_if != usin->sin6_scope_id)
176 sk->sk_bound_dev_if = usin->sin6_scope_id;
179 /* Connect to link-local address requires an interface */
180 if (!sk->sk_bound_dev_if)
184 if (tp->rx_opt.ts_recent_stamp &&
185 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
186 tp->rx_opt.ts_recent = 0;
187 tp->rx_opt.ts_recent_stamp = 0;
191 sk->sk_v6_daddr = usin->sin6_addr;
192 np->flow_label = fl6.flowlabel;
198 if (addr_type & IPV6_ADDR_MAPPED) {
199 u32 exthdrlen = icsk->icsk_ext_hdr_len;
200 struct sockaddr_in sin;
202 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
204 if (__ipv6_only_sock(sk))
207 sin.sin_family = AF_INET;
208 sin.sin_port = usin->sin6_port;
209 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
211 icsk->icsk_af_ops = &ipv6_mapped;
212 sk->sk_backlog_rcv = tcp_v4_do_rcv;
213 #ifdef CONFIG_TCP_MD5SIG
214 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
217 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
220 icsk->icsk_ext_hdr_len = exthdrlen;
221 icsk->icsk_af_ops = &ipv6_specific;
222 sk->sk_backlog_rcv = tcp_v6_do_rcv;
223 #ifdef CONFIG_TCP_MD5SIG
224 tp->af_specific = &tcp_sock_ipv6_specific;
228 np->saddr = sk->sk_v6_rcv_saddr;
233 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
234 saddr = &sk->sk_v6_rcv_saddr;
236 fl6.flowi6_proto = IPPROTO_TCP;
237 fl6.daddr = sk->sk_v6_daddr;
238 fl6.saddr = saddr ? *saddr : np->saddr;
239 fl6.flowi6_oif = sk->sk_bound_dev_if;
240 fl6.flowi6_mark = sk->sk_mark;
241 fl6.fl6_dport = usin->sin6_port;
242 fl6.fl6_sport = inet->inet_sport;
243 fl6.flowi6_uid = sk->sk_uid;
245 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
246 final_p = fl6_update_dst(&fl6, opt, &final);
248 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
250 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
258 sk->sk_v6_rcv_saddr = *saddr;
261 /* set the source address */
263 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
265 sk->sk_gso_type = SKB_GSO_TCPV6;
266 ip6_dst_store(sk, dst, NULL, NULL);
268 icsk->icsk_ext_hdr_len = 0;
270 icsk->icsk_ext_hdr_len = opt->opt_flen +
273 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
275 inet->inet_dport = usin->sin6_port;
277 tcp_set_state(sk, TCP_SYN_SENT);
278 err = inet6_hash_connect(tcp_death_row, sk);
284 if (likely(!tp->repair)) {
285 seq = secure_tcpv6_seq_and_tsoff(np->saddr.s6_addr32,
286 sk->sk_v6_daddr.s6_addr32,
294 if (tcp_fastopen_defer_connect(sk, &err))
299 err = tcp_connect(sk);
306 tcp_set_state(sk, TCP_CLOSE);
308 inet->inet_dport = 0;
309 sk->sk_route_caps = 0;
313 static void tcp_v6_mtu_reduced(struct sock *sk)
315 struct dst_entry *dst;
317 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
320 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
324 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
325 tcp_sync_mss(sk, dst_mtu(dst));
326 tcp_simple_retransmit(sk);
330 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
331 u8 type, u8 code, int offset, __be32 info)
333 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
334 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
335 struct net *net = dev_net(skb->dev);
336 struct request_sock *fastopen;
337 struct ipv6_pinfo *np;
344 sk = __inet6_lookup_established(net, &tcp_hashinfo,
345 &hdr->daddr, th->dest,
346 &hdr->saddr, ntohs(th->source),
350 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
355 if (sk->sk_state == TCP_TIME_WAIT) {
356 inet_twsk_put(inet_twsk(sk));
359 seq = ntohl(th->seq);
360 fatal = icmpv6_err_convert(type, code, &err);
361 if (sk->sk_state == TCP_NEW_SYN_RECV)
362 return tcp_req_err(sk, seq, fatal);
365 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
366 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
368 if (sk->sk_state == TCP_CLOSE)
371 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
372 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
377 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
378 fastopen = tp->fastopen_rsk;
379 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
380 if (sk->sk_state != TCP_LISTEN &&
381 !between(seq, snd_una, tp->snd_nxt)) {
382 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
388 if (type == NDISC_REDIRECT) {
389 if (!sock_owned_by_user(sk)) {
390 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
393 dst->ops->redirect(dst, sk, skb);
398 if (type == ICMPV6_PKT_TOOBIG) {
399 /* We are not interested in TCP_LISTEN and open_requests
400 * (SYN-ACKs send out by Linux are always <576bytes so
401 * they should go through unfragmented).
403 if (sk->sk_state == TCP_LISTEN)
406 if (!ip6_sk_accept_pmtu(sk))
409 tp->mtu_info = ntohl(info);
410 if (!sock_owned_by_user(sk))
411 tcp_v6_mtu_reduced(sk);
412 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
419 /* Might be for an request_sock */
420 switch (sk->sk_state) {
423 /* Only in fast or simultaneous open. If a fast open socket is
424 * is already accepted it is treated as a connected one below.
426 if (fastopen && !fastopen->sk)
429 if (!sock_owned_by_user(sk)) {
431 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
435 sk->sk_err_soft = err;
439 if (!sock_owned_by_user(sk) && np->recverr) {
441 sk->sk_error_report(sk);
443 sk->sk_err_soft = err;
451 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
453 struct request_sock *req,
454 struct tcp_fastopen_cookie *foc,
455 enum tcp_synack_type synack_type)
457 struct inet_request_sock *ireq = inet_rsk(req);
458 struct ipv6_pinfo *np = inet6_sk(sk);
459 struct ipv6_txoptions *opt;
460 struct flowi6 *fl6 = &fl->u.ip6;
464 /* First, grab a route. */
465 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
466 IPPROTO_TCP)) == NULL)
469 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
472 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
473 &ireq->ir_v6_rmt_addr);
475 fl6->daddr = ireq->ir_v6_rmt_addr;
476 if (np->repflow && ireq->pktopts)
477 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
480 opt = ireq->ipv6_opt;
482 opt = rcu_dereference(np->opt);
483 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
485 err = net_xmit_eval(err);
493 static void tcp_v6_reqsk_destructor(struct request_sock *req)
495 kfree(inet_rsk(req)->ipv6_opt);
496 kfree_skb(inet_rsk(req)->pktopts);
499 #ifdef CONFIG_TCP_MD5SIG
500 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
501 const struct in6_addr *addr)
503 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
506 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
507 const struct sock *addr_sk)
509 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
512 static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
515 struct tcp_md5sig cmd;
516 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
518 if (optlen < sizeof(cmd))
521 if (copy_from_user(&cmd, optval, sizeof(cmd)))
524 if (sin6->sin6_family != AF_INET6)
527 if (!cmd.tcpm_keylen) {
528 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
529 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
531 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
535 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
538 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
539 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
540 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
542 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
543 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
546 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
547 const struct in6_addr *daddr,
548 const struct in6_addr *saddr,
549 const struct tcphdr *th, int nbytes)
551 struct tcp6_pseudohdr *bp;
552 struct scatterlist sg;
556 /* 1. TCP pseudo-header (RFC2460) */
559 bp->protocol = cpu_to_be32(IPPROTO_TCP);
560 bp->len = cpu_to_be32(nbytes);
562 _th = (struct tcphdr *)(bp + 1);
563 memcpy(_th, th, sizeof(*th));
566 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
567 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
568 sizeof(*bp) + sizeof(*th));
569 return crypto_ahash_update(hp->md5_req);
572 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
573 const struct in6_addr *daddr, struct in6_addr *saddr,
574 const struct tcphdr *th)
576 struct tcp_md5sig_pool *hp;
577 struct ahash_request *req;
579 hp = tcp_get_md5sig_pool();
581 goto clear_hash_noput;
584 if (crypto_ahash_init(req))
586 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
588 if (tcp_md5_hash_key(hp, key))
590 ahash_request_set_crypt(req, NULL, md5_hash, 0);
591 if (crypto_ahash_final(req))
594 tcp_put_md5sig_pool();
598 tcp_put_md5sig_pool();
600 memset(md5_hash, 0, 16);
604 static int tcp_v6_md5_hash_skb(char *md5_hash,
605 const struct tcp_md5sig_key *key,
606 const struct sock *sk,
607 const struct sk_buff *skb)
609 const struct in6_addr *saddr, *daddr;
610 struct tcp_md5sig_pool *hp;
611 struct ahash_request *req;
612 const struct tcphdr *th = tcp_hdr(skb);
614 if (sk) { /* valid for establish/request sockets */
615 saddr = &sk->sk_v6_rcv_saddr;
616 daddr = &sk->sk_v6_daddr;
618 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
619 saddr = &ip6h->saddr;
620 daddr = &ip6h->daddr;
623 hp = tcp_get_md5sig_pool();
625 goto clear_hash_noput;
628 if (crypto_ahash_init(req))
631 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
633 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
635 if (tcp_md5_hash_key(hp, key))
637 ahash_request_set_crypt(req, NULL, md5_hash, 0);
638 if (crypto_ahash_final(req))
641 tcp_put_md5sig_pool();
645 tcp_put_md5sig_pool();
647 memset(md5_hash, 0, 16);
653 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
654 const struct sk_buff *skb)
656 #ifdef CONFIG_TCP_MD5SIG
657 const __u8 *hash_location = NULL;
658 struct tcp_md5sig_key *hash_expected;
659 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
660 const struct tcphdr *th = tcp_hdr(skb);
664 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
665 hash_location = tcp_parse_md5sig_option(th);
667 /* We've parsed the options - do we have a hash? */
668 if (!hash_expected && !hash_location)
671 if (hash_expected && !hash_location) {
672 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
676 if (!hash_expected && hash_location) {
677 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
681 /* check the signature */
682 genhash = tcp_v6_md5_hash_skb(newhash,
686 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
687 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
688 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
689 genhash ? "failed" : "mismatch",
690 &ip6h->saddr, ntohs(th->source),
691 &ip6h->daddr, ntohs(th->dest));
698 static void tcp_v6_init_req(struct request_sock *req,
699 const struct sock *sk_listener,
702 struct inet_request_sock *ireq = inet_rsk(req);
703 const struct ipv6_pinfo *np = inet6_sk(sk_listener);
705 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
706 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
708 /* So that link locals have meaning */
709 if (!sk_listener->sk_bound_dev_if &&
710 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
711 ireq->ir_iif = tcp_v6_iif(skb);
713 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
714 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
715 np->rxopt.bits.rxinfo ||
716 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
717 np->rxopt.bits.rxohlim || np->repflow)) {
718 atomic_inc(&skb->users);
723 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
725 const struct request_sock *req)
727 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
730 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
732 .obj_size = sizeof(struct tcp6_request_sock),
733 .rtx_syn_ack = tcp_rtx_synack,
734 .send_ack = tcp_v6_reqsk_send_ack,
735 .destructor = tcp_v6_reqsk_destructor,
736 .send_reset = tcp_v6_send_reset,
737 .syn_ack_timeout = tcp_syn_ack_timeout,
740 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
741 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
742 sizeof(struct ipv6hdr),
743 #ifdef CONFIG_TCP_MD5SIG
744 .req_md5_lookup = tcp_v6_md5_lookup,
745 .calc_md5_hash = tcp_v6_md5_hash_skb,
747 .init_req = tcp_v6_init_req,
748 #ifdef CONFIG_SYN_COOKIES
749 .cookie_init_seq = cookie_v6_init_sequence,
751 .route_req = tcp_v6_route_req,
752 .init_seq_tsoff = tcp_v6_init_seq_and_tsoff,
753 .send_synack = tcp_v6_send_synack,
756 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
757 u32 ack, u32 win, u32 tsval, u32 tsecr,
758 int oif, struct tcp_md5sig_key *key, int rst,
759 u8 tclass, __be32 label)
761 const struct tcphdr *th = tcp_hdr(skb);
763 struct sk_buff *buff;
765 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
766 struct sock *ctl_sk = net->ipv6.tcp_sk;
767 unsigned int tot_len = sizeof(struct tcphdr);
768 struct dst_entry *dst;
772 tot_len += TCPOLEN_TSTAMP_ALIGNED;
773 #ifdef CONFIG_TCP_MD5SIG
775 tot_len += TCPOLEN_MD5SIG_ALIGNED;
778 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
783 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
785 t1 = (struct tcphdr *) skb_push(buff, tot_len);
786 skb_reset_transport_header(buff);
788 /* Swap the send and the receive. */
789 memset(t1, 0, sizeof(*t1));
790 t1->dest = th->source;
791 t1->source = th->dest;
792 t1->doff = tot_len / 4;
793 t1->seq = htonl(seq);
794 t1->ack_seq = htonl(ack);
795 t1->ack = !rst || !th->ack;
797 t1->window = htons(win);
799 topt = (__be32 *)(t1 + 1);
802 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
803 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
804 *topt++ = htonl(tsval);
805 *topt++ = htonl(tsecr);
808 #ifdef CONFIG_TCP_MD5SIG
810 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
811 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
812 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
813 &ipv6_hdr(skb)->saddr,
814 &ipv6_hdr(skb)->daddr, t1);
818 memset(&fl6, 0, sizeof(fl6));
819 fl6.daddr = ipv6_hdr(skb)->saddr;
820 fl6.saddr = ipv6_hdr(skb)->daddr;
821 fl6.flowlabel = label;
823 buff->ip_summed = CHECKSUM_PARTIAL;
826 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
828 fl6.flowi6_proto = IPPROTO_TCP;
829 if (rt6_need_strict(&fl6.daddr) && !oif)
830 fl6.flowi6_oif = tcp_v6_iif(skb);
832 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
835 fl6.flowi6_oif = oif;
838 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
839 fl6.fl6_dport = t1->dest;
840 fl6.fl6_sport = t1->source;
841 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
842 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
844 /* Pass a socket to ip6_dst_lookup either it is for RST
845 * Underlying function will use this to retrieve the network
848 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
850 skb_dst_set(buff, dst);
851 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
852 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
854 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
861 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
863 const struct tcphdr *th = tcp_hdr(skb);
864 u32 seq = 0, ack_seq = 0;
865 struct tcp_md5sig_key *key = NULL;
866 #ifdef CONFIG_TCP_MD5SIG
867 const __u8 *hash_location = NULL;
868 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
869 unsigned char newhash[16];
871 struct sock *sk1 = NULL;
878 /* If sk not NULL, it means we did a successful lookup and incoming
879 * route had to be correct. prequeue might have dropped our dst.
881 if (!sk && !ipv6_unicast_destination(skb))
884 #ifdef CONFIG_TCP_MD5SIG
886 hash_location = tcp_parse_md5sig_option(th);
887 if (sk && sk_fullsock(sk)) {
888 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
889 } else if (hash_location) {
891 * active side is lost. Try to find listening socket through
892 * source port, and then find md5 key through listening socket.
893 * we are not loose security here:
894 * Incoming packet is checked with md5 hash with finding key,
895 * no RST generated if md5 hash doesn't match.
897 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
898 &tcp_hashinfo, NULL, 0,
900 th->source, &ipv6h->daddr,
901 ntohs(th->source), tcp_v6_iif(skb));
905 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
909 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
910 if (genhash || memcmp(hash_location, newhash, 16) != 0)
916 seq = ntohl(th->ack_seq);
918 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
921 oif = sk ? sk->sk_bound_dev_if : 0;
922 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
924 #ifdef CONFIG_TCP_MD5SIG
930 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
931 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
932 struct tcp_md5sig_key *key, u8 tclass,
935 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
939 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
941 struct inet_timewait_sock *tw = inet_twsk(sk);
942 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
944 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
945 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
946 tcp_time_stamp + tcptw->tw_ts_offset,
947 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
948 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
953 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
954 struct request_sock *req)
956 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
957 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
960 * The window field (SEG.WND) of every outgoing segment, with the
961 * exception of <SYN> segments, MUST be right-shifted by
962 * Rcv.Wind.Shift bits:
964 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
965 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
966 tcp_rsk(req)->rcv_nxt,
967 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
968 tcp_time_stamp + tcp_rsk(req)->ts_off,
969 req->ts_recent, sk->sk_bound_dev_if,
970 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
975 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
977 #ifdef CONFIG_SYN_COOKIES
978 const struct tcphdr *th = tcp_hdr(skb);
981 sk = cookie_v6_check(sk, skb);
986 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
988 if (skb->protocol == htons(ETH_P_IP))
989 return tcp_v4_conn_request(sk, skb);
991 if (!ipv6_unicast_destination(skb))
994 return tcp_conn_request(&tcp6_request_sock_ops,
995 &tcp_request_sock_ipv6_ops, sk, skb);
999 return 0; /* don't send reset */
1002 static void tcp_v6_restore_cb(struct sk_buff *skb)
1004 /* We need to move header back to the beginning if xfrm6_policy_check()
1005 * and tcp_v6_fill_cb() are going to be called again.
1006 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1008 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1009 sizeof(struct inet6_skb_parm));
1012 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1013 struct request_sock *req,
1014 struct dst_entry *dst,
1015 struct request_sock *req_unhash,
1018 struct inet_request_sock *ireq;
1019 struct ipv6_pinfo *newnp;
1020 const struct ipv6_pinfo *np = inet6_sk(sk);
1021 struct ipv6_txoptions *opt;
1022 struct tcp6_sock *newtcp6sk;
1023 struct inet_sock *newinet;
1024 struct tcp_sock *newtp;
1026 #ifdef CONFIG_TCP_MD5SIG
1027 struct tcp_md5sig_key *key;
1031 if (skb->protocol == htons(ETH_P_IP)) {
1036 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1037 req_unhash, own_req);
1042 newtcp6sk = (struct tcp6_sock *)newsk;
1043 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1045 newinet = inet_sk(newsk);
1046 newnp = inet6_sk(newsk);
1047 newtp = tcp_sk(newsk);
1049 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1051 newnp->saddr = newsk->sk_v6_rcv_saddr;
1053 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1054 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1055 #ifdef CONFIG_TCP_MD5SIG
1056 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1059 newnp->ipv6_ac_list = NULL;
1060 newnp->ipv6_fl_list = NULL;
1061 newnp->pktoptions = NULL;
1063 newnp->mcast_oif = tcp_v6_iif(skb);
1064 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1065 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1067 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1070 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1071 * here, tcp_create_openreq_child now does this for us, see the comment in
1072 * that function for the gory details. -acme
1075 /* It is tricky place. Until this moment IPv4 tcp
1076 worked with IPv6 icsk.icsk_af_ops.
1079 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1084 ireq = inet_rsk(req);
1086 if (sk_acceptq_is_full(sk))
1090 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1095 newsk = tcp_create_openreq_child(sk, req, skb);
1100 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1101 * count here, tcp_create_openreq_child now does this for us, see the
1102 * comment in that function for the gory details. -acme
1105 newsk->sk_gso_type = SKB_GSO_TCPV6;
1106 ip6_dst_store(newsk, dst, NULL, NULL);
1107 inet6_sk_rx_dst_set(newsk, skb);
1109 newtcp6sk = (struct tcp6_sock *)newsk;
1110 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1112 newtp = tcp_sk(newsk);
1113 newinet = inet_sk(newsk);
1114 newnp = inet6_sk(newsk);
1116 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1118 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1119 newnp->saddr = ireq->ir_v6_loc_addr;
1120 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1121 newsk->sk_bound_dev_if = ireq->ir_iif;
1123 /* Now IPv6 options...
1125 First: no IPv4 options.
1127 newinet->inet_opt = NULL;
1128 newnp->ipv6_ac_list = NULL;
1129 newnp->ipv6_fl_list = NULL;
1132 newnp->rxopt.all = np->rxopt.all;
1134 newnp->pktoptions = NULL;
1136 newnp->mcast_oif = tcp_v6_iif(skb);
1137 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1138 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1140 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1142 /* Clone native IPv6 options from listening socket (if any)
1144 Yes, keeping reference count would be much more clever,
1145 but we make one more one thing there: reattach optmem
1148 opt = ireq->ipv6_opt;
1150 opt = rcu_dereference(np->opt);
1152 opt = ipv6_dup_options(newsk, opt);
1153 RCU_INIT_POINTER(newnp->opt, opt);
1155 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1157 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1160 tcp_ca_openreq_child(newsk, dst);
1162 tcp_sync_mss(newsk, dst_mtu(dst));
1163 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1165 tcp_initialize_rcv_mss(newsk);
1167 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1168 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1170 #ifdef CONFIG_TCP_MD5SIG
1171 /* Copy over the MD5 key from the original socket */
1172 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1174 /* We're using one, so create a matching key
1175 * on the newsk structure. If we fail to get
1176 * memory, then we end up not copying the key
1179 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1180 AF_INET6, key->key, key->keylen,
1181 sk_gfp_mask(sk, GFP_ATOMIC));
1185 if (__inet_inherit_port(sk, newsk) < 0) {
1186 inet_csk_prepare_forced_close(newsk);
1190 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1192 tcp_move_syn(newtp, req);
1194 /* Clone pktoptions received with SYN, if we own the req */
1195 if (ireq->pktopts) {
1196 newnp->pktoptions = skb_clone(ireq->pktopts,
1197 sk_gfp_mask(sk, GFP_ATOMIC));
1198 consume_skb(ireq->pktopts);
1199 ireq->pktopts = NULL;
1200 if (newnp->pktoptions) {
1201 tcp_v6_restore_cb(newnp->pktoptions);
1202 skb_set_owner_r(newnp->pktoptions, newsk);
1210 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1218 /* The socket must have it's spinlock held when we get
1219 * here, unless it is a TCP_LISTEN socket.
1221 * We have a potential double-lock case here, so even when
1222 * doing backlog processing we use the BH locking scheme.
1223 * This is because we cannot sleep with the original spinlock
1226 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1228 struct ipv6_pinfo *np = inet6_sk(sk);
1229 struct tcp_sock *tp;
1230 struct sk_buff *opt_skb = NULL;
1232 /* Imagine: socket is IPv6. IPv4 packet arrives,
1233 goes to IPv4 receive handler and backlogged.
1234 From backlog it always goes here. Kerboom...
1235 Fortunately, tcp_rcv_established and rcv_established
1236 handle them correctly, but it is not case with
1237 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1240 if (skb->protocol == htons(ETH_P_IP))
1241 return tcp_v4_do_rcv(sk, skb);
1243 if (tcp_filter(sk, skb))
1247 * socket locking is here for SMP purposes as backlog rcv
1248 * is currently called with bh processing disabled.
1251 /* Do Stevens' IPV6_PKTOPTIONS.
1253 Yes, guys, it is the only place in our code, where we
1254 may make it not affecting IPv4.
1255 The rest of code is protocol independent,
1256 and I do not like idea to uglify IPv4.
1258 Actually, all the idea behind IPV6_PKTOPTIONS
1259 looks not very well thought. For now we latch
1260 options, received in the last packet, enqueued
1261 by tcp. Feel free to propose better solution.
1265 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1267 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1268 struct dst_entry *dst = sk->sk_rx_dst;
1270 sock_rps_save_rxhash(sk, skb);
1271 sk_mark_napi_id(sk, skb);
1273 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1274 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1276 sk->sk_rx_dst = NULL;
1280 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1282 goto ipv6_pktoptions;
1286 if (tcp_checksum_complete(skb))
1289 if (sk->sk_state == TCP_LISTEN) {
1290 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1296 sock_rps_save_rxhash(nsk, skb);
1297 sk_mark_napi_id(nsk, skb);
1298 if (tcp_child_process(sk, nsk, skb))
1301 __kfree_skb(opt_skb);
1305 sock_rps_save_rxhash(sk, skb);
1307 if (tcp_rcv_state_process(sk, skb))
1310 goto ipv6_pktoptions;
1314 tcp_v6_send_reset(sk, skb);
1317 __kfree_skb(opt_skb);
1321 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1322 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1327 /* Do you ask, what is it?
1329 1. skb was enqueued by tcp.
1330 2. skb is added to tail of read queue, rather than out of order.
1331 3. socket is not in passive state.
1332 4. Finally, it really contains options, which user wants to receive.
1335 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1336 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1337 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1338 np->mcast_oif = tcp_v6_iif(opt_skb);
1339 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1340 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1341 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1342 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1344 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1345 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1346 skb_set_owner_r(opt_skb, sk);
1347 tcp_v6_restore_cb(opt_skb);
1348 opt_skb = xchg(&np->pktoptions, opt_skb);
1350 __kfree_skb(opt_skb);
1351 opt_skb = xchg(&np->pktoptions, NULL);
1359 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1360 const struct tcphdr *th)
1362 /* This is tricky: we move IP6CB at its correct location into
1363 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1364 * _decode_session6() uses IP6CB().
1365 * barrier() makes sure compiler won't play aliasing games.
1367 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1368 sizeof(struct inet6_skb_parm));
1371 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1372 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1373 skb->len - th->doff*4);
1374 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1375 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1376 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1377 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1378 TCP_SKB_CB(skb)->sacked = 0;
1381 static int tcp_v6_rcv(struct sk_buff *skb)
1383 const struct tcphdr *th;
1384 const struct ipv6hdr *hdr;
1388 struct net *net = dev_net(skb->dev);
1390 if (skb->pkt_type != PACKET_HOST)
1394 * Count it even if it's bad.
1396 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1398 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1401 th = (const struct tcphdr *)skb->data;
1403 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1405 if (!pskb_may_pull(skb, th->doff*4))
1408 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1411 th = (const struct tcphdr *)skb->data;
1412 hdr = ipv6_hdr(skb);
1415 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1416 th->source, th->dest, inet6_iif(skb),
1422 if (sk->sk_state == TCP_TIME_WAIT)
1425 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1426 struct request_sock *req = inet_reqsk(sk);
1429 sk = req->rsk_listener;
1430 tcp_v6_fill_cb(skb, hdr, th);
1431 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1432 sk_drops_add(sk, skb);
1436 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1437 inet_csk_reqsk_queue_drop_and_put(sk, req);
1442 nsk = tcp_check_req(sk, skb, req, false);
1445 goto discard_and_relse;
1449 tcp_v6_restore_cb(skb);
1450 } else if (tcp_child_process(sk, nsk, skb)) {
1451 tcp_v6_send_reset(nsk, skb);
1452 goto discard_and_relse;
1458 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1459 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1460 goto discard_and_relse;
1463 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1464 goto discard_and_relse;
1466 tcp_v6_fill_cb(skb, hdr, th);
1468 if (tcp_v6_inbound_md5_hash(sk, skb))
1469 goto discard_and_relse;
1471 if (tcp_filter(sk, skb))
1472 goto discard_and_relse;
1473 th = (const struct tcphdr *)skb->data;
1474 hdr = ipv6_hdr(skb);
1478 if (sk->sk_state == TCP_LISTEN) {
1479 ret = tcp_v6_do_rcv(sk, skb);
1480 goto put_and_return;
1483 sk_incoming_cpu_update(sk);
1485 bh_lock_sock_nested(sk);
1486 tcp_segs_in(tcp_sk(sk), skb);
1488 if (!sock_owned_by_user(sk)) {
1489 if (!tcp_prequeue(sk, skb))
1490 ret = tcp_v6_do_rcv(sk, skb);
1491 } else if (tcp_add_backlog(sk, skb)) {
1492 goto discard_and_relse;
1499 return ret ? -1 : 0;
1502 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1505 tcp_v6_fill_cb(skb, hdr, th);
1507 if (tcp_checksum_complete(skb)) {
1509 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1511 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1513 tcp_v6_send_reset(NULL, skb);
1521 sk_drops_add(sk, skb);
1527 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1528 inet_twsk_put(inet_twsk(sk));
1532 tcp_v6_fill_cb(skb, hdr, th);
1534 if (tcp_checksum_complete(skb)) {
1535 inet_twsk_put(inet_twsk(sk));
1539 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1544 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1545 skb, __tcp_hdrlen(th),
1546 &ipv6_hdr(skb)->saddr, th->source,
1547 &ipv6_hdr(skb)->daddr,
1548 ntohs(th->dest), tcp_v6_iif(skb));
1550 struct inet_timewait_sock *tw = inet_twsk(sk);
1551 inet_twsk_deschedule_put(tw);
1553 tcp_v6_restore_cb(skb);
1557 /* Fall through to ACK */
1560 tcp_v6_timewait_ack(sk, skb);
1563 tcp_v6_restore_cb(skb);
1564 tcp_v6_send_reset(sk, skb);
1565 inet_twsk_deschedule_put(inet_twsk(sk));
1567 case TCP_TW_SUCCESS:
1573 static void tcp_v6_early_demux(struct sk_buff *skb)
1575 const struct ipv6hdr *hdr;
1576 const struct tcphdr *th;
1579 if (skb->pkt_type != PACKET_HOST)
1582 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1585 hdr = ipv6_hdr(skb);
1588 if (th->doff < sizeof(struct tcphdr) / 4)
1591 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1592 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1593 &hdr->saddr, th->source,
1594 &hdr->daddr, ntohs(th->dest),
1598 skb->destructor = sock_edemux;
1599 if (sk_fullsock(sk)) {
1600 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1603 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1605 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1606 skb_dst_set_noref(skb, dst);
1611 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1612 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1613 .twsk_unique = tcp_twsk_unique,
1614 .twsk_destructor = tcp_twsk_destructor,
1617 static const struct inet_connection_sock_af_ops ipv6_specific = {
1618 .queue_xmit = inet6_csk_xmit,
1619 .send_check = tcp_v6_send_check,
1620 .rebuild_header = inet6_sk_rebuild_header,
1621 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1622 .conn_request = tcp_v6_conn_request,
1623 .syn_recv_sock = tcp_v6_syn_recv_sock,
1624 .net_header_len = sizeof(struct ipv6hdr),
1625 .net_frag_header_len = sizeof(struct frag_hdr),
1626 .setsockopt = ipv6_setsockopt,
1627 .getsockopt = ipv6_getsockopt,
1628 .addr2sockaddr = inet6_csk_addr2sockaddr,
1629 .sockaddr_len = sizeof(struct sockaddr_in6),
1630 #ifdef CONFIG_COMPAT
1631 .compat_setsockopt = compat_ipv6_setsockopt,
1632 .compat_getsockopt = compat_ipv6_getsockopt,
1634 .mtu_reduced = tcp_v6_mtu_reduced,
1637 #ifdef CONFIG_TCP_MD5SIG
1638 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1639 .md5_lookup = tcp_v6_md5_lookup,
1640 .calc_md5_hash = tcp_v6_md5_hash_skb,
1641 .md5_parse = tcp_v6_parse_md5_keys,
1646 * TCP over IPv4 via INET6 API
1648 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1649 .queue_xmit = ip_queue_xmit,
1650 .send_check = tcp_v4_send_check,
1651 .rebuild_header = inet_sk_rebuild_header,
1652 .sk_rx_dst_set = inet_sk_rx_dst_set,
1653 .conn_request = tcp_v6_conn_request,
1654 .syn_recv_sock = tcp_v6_syn_recv_sock,
1655 .net_header_len = sizeof(struct iphdr),
1656 .setsockopt = ipv6_setsockopt,
1657 .getsockopt = ipv6_getsockopt,
1658 .addr2sockaddr = inet6_csk_addr2sockaddr,
1659 .sockaddr_len = sizeof(struct sockaddr_in6),
1660 #ifdef CONFIG_COMPAT
1661 .compat_setsockopt = compat_ipv6_setsockopt,
1662 .compat_getsockopt = compat_ipv6_getsockopt,
1664 .mtu_reduced = tcp_v4_mtu_reduced,
1667 #ifdef CONFIG_TCP_MD5SIG
1668 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1669 .md5_lookup = tcp_v4_md5_lookup,
1670 .calc_md5_hash = tcp_v4_md5_hash_skb,
1671 .md5_parse = tcp_v6_parse_md5_keys,
1675 /* NOTE: A lot of things set to zero explicitly by call to
1676 * sk_alloc() so need not be done here.
1678 static int tcp_v6_init_sock(struct sock *sk)
1680 struct inet_connection_sock *icsk = inet_csk(sk);
1684 icsk->icsk_af_ops = &ipv6_specific;
1686 #ifdef CONFIG_TCP_MD5SIG
1687 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1693 static void tcp_v6_destroy_sock(struct sock *sk)
1695 tcp_v4_destroy_sock(sk);
1696 inet6_destroy_sock(sk);
1699 #ifdef CONFIG_PROC_FS
1700 /* Proc filesystem TCPv6 sock list dumping. */
1701 static void get_openreq6(struct seq_file *seq,
1702 const struct request_sock *req, int i)
1704 long ttd = req->rsk_timer.expires - jiffies;
1705 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1706 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1712 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1713 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1715 src->s6_addr32[0], src->s6_addr32[1],
1716 src->s6_addr32[2], src->s6_addr32[3],
1717 inet_rsk(req)->ir_num,
1718 dest->s6_addr32[0], dest->s6_addr32[1],
1719 dest->s6_addr32[2], dest->s6_addr32[3],
1720 ntohs(inet_rsk(req)->ir_rmt_port),
1722 0, 0, /* could print option size, but that is af dependent. */
1723 1, /* timers active (only the expire timer) */
1724 jiffies_to_clock_t(ttd),
1726 from_kuid_munged(seq_user_ns(seq),
1727 sock_i_uid(req->rsk_listener)),
1728 0, /* non standard timer */
1729 0, /* open_requests have no inode */
1733 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1735 const struct in6_addr *dest, *src;
1738 unsigned long timer_expires;
1739 const struct inet_sock *inet = inet_sk(sp);
1740 const struct tcp_sock *tp = tcp_sk(sp);
1741 const struct inet_connection_sock *icsk = inet_csk(sp);
1742 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1746 dest = &sp->sk_v6_daddr;
1747 src = &sp->sk_v6_rcv_saddr;
1748 destp = ntohs(inet->inet_dport);
1749 srcp = ntohs(inet->inet_sport);
1751 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1752 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1753 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1755 timer_expires = icsk->icsk_timeout;
1756 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1758 timer_expires = icsk->icsk_timeout;
1759 } else if (timer_pending(&sp->sk_timer)) {
1761 timer_expires = sp->sk_timer.expires;
1764 timer_expires = jiffies;
1767 state = sk_state_load(sp);
1768 if (state == TCP_LISTEN)
1769 rx_queue = sp->sk_ack_backlog;
1771 /* Because we don't lock the socket,
1772 * we might find a transient negative value.
1774 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1777 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1778 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1780 src->s6_addr32[0], src->s6_addr32[1],
1781 src->s6_addr32[2], src->s6_addr32[3], srcp,
1782 dest->s6_addr32[0], dest->s6_addr32[1],
1783 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1785 tp->write_seq - tp->snd_una,
1788 jiffies_delta_to_clock_t(timer_expires - jiffies),
1789 icsk->icsk_retransmits,
1790 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1791 icsk->icsk_probes_out,
1793 atomic_read(&sp->sk_refcnt), sp,
1794 jiffies_to_clock_t(icsk->icsk_rto),
1795 jiffies_to_clock_t(icsk->icsk_ack.ato),
1796 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1798 state == TCP_LISTEN ?
1799 fastopenq->max_qlen :
1800 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1804 static void get_timewait6_sock(struct seq_file *seq,
1805 struct inet_timewait_sock *tw, int i)
1807 long delta = tw->tw_timer.expires - jiffies;
1808 const struct in6_addr *dest, *src;
1811 dest = &tw->tw_v6_daddr;
1812 src = &tw->tw_v6_rcv_saddr;
1813 destp = ntohs(tw->tw_dport);
1814 srcp = ntohs(tw->tw_sport);
1817 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1818 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1820 src->s6_addr32[0], src->s6_addr32[1],
1821 src->s6_addr32[2], src->s6_addr32[3], srcp,
1822 dest->s6_addr32[0], dest->s6_addr32[1],
1823 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1824 tw->tw_substate, 0, 0,
1825 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1826 atomic_read(&tw->tw_refcnt), tw);
1829 static int tcp6_seq_show(struct seq_file *seq, void *v)
1831 struct tcp_iter_state *st;
1832 struct sock *sk = v;
1834 if (v == SEQ_START_TOKEN) {
1839 "st tx_queue rx_queue tr tm->when retrnsmt"
1840 " uid timeout inode\n");
1845 if (sk->sk_state == TCP_TIME_WAIT)
1846 get_timewait6_sock(seq, v, st->num);
1847 else if (sk->sk_state == TCP_NEW_SYN_RECV)
1848 get_openreq6(seq, v, st->num);
1850 get_tcp6_sock(seq, v, st->num);
1855 static const struct file_operations tcp6_afinfo_seq_fops = {
1856 .owner = THIS_MODULE,
1857 .open = tcp_seq_open,
1859 .llseek = seq_lseek,
1860 .release = seq_release_net
1863 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1866 .seq_fops = &tcp6_afinfo_seq_fops,
1868 .show = tcp6_seq_show,
1872 int __net_init tcp6_proc_init(struct net *net)
1874 return tcp_proc_register(net, &tcp6_seq_afinfo);
1877 void tcp6_proc_exit(struct net *net)
1879 tcp_proc_unregister(net, &tcp6_seq_afinfo);
1883 struct proto tcpv6_prot = {
1885 .owner = THIS_MODULE,
1887 .connect = tcp_v6_connect,
1888 .disconnect = tcp_disconnect,
1889 .accept = inet_csk_accept,
1891 .init = tcp_v6_init_sock,
1892 .destroy = tcp_v6_destroy_sock,
1893 .shutdown = tcp_shutdown,
1894 .setsockopt = tcp_setsockopt,
1895 .getsockopt = tcp_getsockopt,
1896 .keepalive = tcp_set_keepalive,
1897 .recvmsg = tcp_recvmsg,
1898 .sendmsg = tcp_sendmsg,
1899 .sendpage = tcp_sendpage,
1900 .backlog_rcv = tcp_v6_do_rcv,
1901 .release_cb = tcp_release_cb,
1903 .unhash = inet_unhash,
1904 .get_port = inet_csk_get_port,
1905 .enter_memory_pressure = tcp_enter_memory_pressure,
1906 .stream_memory_free = tcp_stream_memory_free,
1907 .sockets_allocated = &tcp_sockets_allocated,
1908 .memory_allocated = &tcp_memory_allocated,
1909 .memory_pressure = &tcp_memory_pressure,
1910 .orphan_count = &tcp_orphan_count,
1911 .sysctl_mem = sysctl_tcp_mem,
1912 .sysctl_wmem = sysctl_tcp_wmem,
1913 .sysctl_rmem = sysctl_tcp_rmem,
1914 .max_header = MAX_TCP_HEADER,
1915 .obj_size = sizeof(struct tcp6_sock),
1916 .slab_flags = SLAB_DESTROY_BY_RCU,
1917 .twsk_prot = &tcp6_timewait_sock_ops,
1918 .rsk_prot = &tcp6_request_sock_ops,
1919 .h.hashinfo = &tcp_hashinfo,
1920 .no_autobind = true,
1921 #ifdef CONFIG_COMPAT
1922 .compat_setsockopt = compat_tcp_setsockopt,
1923 .compat_getsockopt = compat_tcp_getsockopt,
1925 .diag_destroy = tcp_abort,
1928 static struct inet6_protocol tcpv6_protocol = {
1929 .early_demux = tcp_v6_early_demux,
1930 .early_demux_handler = tcp_v6_early_demux,
1931 .handler = tcp_v6_rcv,
1932 .err_handler = tcp_v6_err,
1933 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1936 static struct inet_protosw tcpv6_protosw = {
1937 .type = SOCK_STREAM,
1938 .protocol = IPPROTO_TCP,
1939 .prot = &tcpv6_prot,
1940 .ops = &inet6_stream_ops,
1941 .flags = INET_PROTOSW_PERMANENT |
1945 static int __net_init tcpv6_net_init(struct net *net)
1947 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
1948 SOCK_RAW, IPPROTO_TCP, net);
1951 static void __net_exit tcpv6_net_exit(struct net *net)
1953 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
1956 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
1958 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
1961 static struct pernet_operations tcpv6_net_ops = {
1962 .init = tcpv6_net_init,
1963 .exit = tcpv6_net_exit,
1964 .exit_batch = tcpv6_net_exit_batch,
1967 int __init tcpv6_init(void)
1971 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
1975 /* register inet6 protocol */
1976 ret = inet6_register_protosw(&tcpv6_protosw);
1978 goto out_tcpv6_protocol;
1980 ret = register_pernet_subsys(&tcpv6_net_ops);
1982 goto out_tcpv6_protosw;
1987 inet6_unregister_protosw(&tcpv6_protosw);
1989 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
1993 void tcpv6_exit(void)
1995 unregister_pernet_subsys(&tcpv6_net_ops);
1996 inet6_unregister_protosw(&tcpv6_protosw);
1997 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);