3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
65 #include <net/tcp_memcontrol.h>
67 #include <asm/uaccess.h>
69 #include <linux/proc_fs.h>
70 #include <linux/seq_file.h>
72 #include <linux/crypto.h>
73 #include <linux/scatterlist.h>
75 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
76 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
77 struct request_sock *req);
79 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
80 static void __tcp_v6_send_check(struct sk_buff *skb,
81 const struct in6_addr *saddr,
82 const struct in6_addr *daddr);
84 static const struct inet_connection_sock_af_ops ipv6_mapped;
85 static const struct inet_connection_sock_af_ops ipv6_specific;
86 #ifdef CONFIG_TCP_MD5SIG
87 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
88 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
90 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
91 const struct in6_addr *addr)
97 static void tcp_v6_hash(struct sock *sk)
99 if (sk->sk_state != TCP_CLOSE) {
100 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
105 __inet6_hash(sk, NULL);
110 static __inline__ __sum16 tcp_v6_check(int len,
111 const struct in6_addr *saddr,
112 const struct in6_addr *daddr,
115 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
118 static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
120 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
121 ipv6_hdr(skb)->saddr.s6_addr32,
123 tcp_hdr(skb)->source);
126 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
129 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
130 struct inet_sock *inet = inet_sk(sk);
131 struct inet_connection_sock *icsk = inet_csk(sk);
132 struct ipv6_pinfo *np = inet6_sk(sk);
133 struct tcp_sock *tp = tcp_sk(sk);
134 struct in6_addr *saddr = NULL, *final_p, final;
137 struct dst_entry *dst;
141 if (addr_len < SIN6_LEN_RFC2133)
144 if (usin->sin6_family != AF_INET6)
145 return -EAFNOSUPPORT;
147 memset(&fl6, 0, sizeof(fl6));
150 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
151 IP6_ECN_flow_init(fl6.flowlabel);
152 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
153 struct ip6_flowlabel *flowlabel;
154 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
155 if (flowlabel == NULL)
157 usin->sin6_addr = flowlabel->dst;
158 fl6_sock_release(flowlabel);
163 * connect() to INADDR_ANY means loopback (BSD'ism).
166 if(ipv6_addr_any(&usin->sin6_addr))
167 usin->sin6_addr.s6_addr[15] = 0x1;
169 addr_type = ipv6_addr_type(&usin->sin6_addr);
171 if(addr_type & IPV6_ADDR_MULTICAST)
174 if (addr_type&IPV6_ADDR_LINKLOCAL) {
175 if (addr_len >= sizeof(struct sockaddr_in6) &&
176 usin->sin6_scope_id) {
177 /* If interface is set while binding, indices
180 if (sk->sk_bound_dev_if &&
181 sk->sk_bound_dev_if != usin->sin6_scope_id)
184 sk->sk_bound_dev_if = usin->sin6_scope_id;
187 /* Connect to link-local address requires an interface */
188 if (!sk->sk_bound_dev_if)
192 if (tp->rx_opt.ts_recent_stamp &&
193 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
194 tp->rx_opt.ts_recent = 0;
195 tp->rx_opt.ts_recent_stamp = 0;
199 np->daddr = usin->sin6_addr;
200 np->flow_label = fl6.flowlabel;
206 if (addr_type == IPV6_ADDR_MAPPED) {
207 u32 exthdrlen = icsk->icsk_ext_hdr_len;
208 struct sockaddr_in sin;
210 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
212 if (__ipv6_only_sock(sk))
215 sin.sin_family = AF_INET;
216 sin.sin_port = usin->sin6_port;
217 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
219 icsk->icsk_af_ops = &ipv6_mapped;
220 sk->sk_backlog_rcv = tcp_v4_do_rcv;
221 #ifdef CONFIG_TCP_MD5SIG
222 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
225 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
228 icsk->icsk_ext_hdr_len = exthdrlen;
229 icsk->icsk_af_ops = &ipv6_specific;
230 sk->sk_backlog_rcv = tcp_v6_do_rcv;
231 #ifdef CONFIG_TCP_MD5SIG
232 tp->af_specific = &tcp_sock_ipv6_specific;
236 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
237 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
244 if (!ipv6_addr_any(&np->rcv_saddr))
245 saddr = &np->rcv_saddr;
247 fl6.flowi6_proto = IPPROTO_TCP;
248 fl6.daddr = np->daddr;
249 fl6.saddr = saddr ? *saddr : np->saddr;
250 fl6.flowi6_oif = sk->sk_bound_dev_if;
251 fl6.flowi6_mark = sk->sk_mark;
252 fl6.fl6_dport = usin->sin6_port;
253 fl6.fl6_sport = inet->inet_sport;
255 final_p = fl6_update_dst(&fl6, np->opt, &final);
257 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
259 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
267 np->rcv_saddr = *saddr;
270 /* set the source address */
272 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
274 sk->sk_gso_type = SKB_GSO_TCPV6;
275 __ip6_dst_store(sk, dst, NULL, NULL);
277 rt = (struct rt6_info *) dst;
278 if (tcp_death_row.sysctl_tw_recycle &&
279 !tp->rx_opt.ts_recent_stamp &&
280 ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) {
281 struct inet_peer *peer = rt6_get_peer(rt);
283 * VJ's idea. We save last timestamp seen from
284 * the destination in peer table, when entering state
285 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
286 * when trying new connection.
289 inet_peer_refcheck(peer);
290 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
291 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
292 tp->rx_opt.ts_recent = peer->tcp_ts;
297 icsk->icsk_ext_hdr_len = 0;
299 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
302 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
304 inet->inet_dport = usin->sin6_port;
306 tcp_set_state(sk, TCP_SYN_SENT);
307 err = inet6_hash_connect(&tcp_death_row, sk);
312 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
317 err = tcp_connect(sk);
324 tcp_set_state(sk, TCP_CLOSE);
327 inet->inet_dport = 0;
328 sk->sk_route_caps = 0;
332 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
333 u8 type, u8 code, int offset, __be32 info)
335 const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data;
336 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
337 struct ipv6_pinfo *np;
342 struct net *net = dev_net(skb->dev);
344 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
345 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
348 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
353 if (sk->sk_state == TCP_TIME_WAIT) {
354 inet_twsk_put(inet_twsk(sk));
359 if (sock_owned_by_user(sk))
360 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
362 if (sk->sk_state == TCP_CLOSE)
365 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
366 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
371 seq = ntohl(th->seq);
372 if (sk->sk_state != TCP_LISTEN &&
373 !between(seq, tp->snd_una, tp->snd_nxt)) {
374 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
380 if (type == ICMPV6_PKT_TOOBIG) {
381 struct dst_entry *dst;
383 if (sock_owned_by_user(sk))
385 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
388 /* icmp should have updated the destination cache entry */
389 dst = __sk_dst_check(sk, np->dst_cookie);
392 struct inet_sock *inet = inet_sk(sk);
395 /* BUGGG_FUTURE: Again, it is not clear how
396 to handle rthdr case. Ignore this complexity
399 memset(&fl6, 0, sizeof(fl6));
400 fl6.flowi6_proto = IPPROTO_TCP;
401 fl6.daddr = np->daddr;
402 fl6.saddr = np->saddr;
403 fl6.flowi6_oif = sk->sk_bound_dev_if;
404 fl6.flowi6_mark = sk->sk_mark;
405 fl6.fl6_dport = inet->inet_dport;
406 fl6.fl6_sport = inet->inet_sport;
407 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
409 dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
411 sk->sk_err_soft = -PTR_ERR(dst);
418 dst->ops->update_pmtu(dst, ntohl(info));
420 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
421 tcp_sync_mss(sk, dst_mtu(dst));
422 tcp_simple_retransmit(sk);
423 } /* else let the usual retransmit timer handle it */
428 icmpv6_err_convert(type, code, &err);
430 /* Might be for an request_sock */
431 switch (sk->sk_state) {
432 struct request_sock *req, **prev;
434 if (sock_owned_by_user(sk))
437 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
438 &hdr->saddr, inet6_iif(skb));
442 /* ICMPs are not backlogged, hence we cannot get
443 * an established socket here.
445 WARN_ON(req->sk != NULL);
447 if (seq != tcp_rsk(req)->snt_isn) {
448 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
452 inet_csk_reqsk_queue_drop(sk, req, prev);
456 case TCP_SYN_RECV: /* Cannot happen.
457 It can, it SYNs are crossed. --ANK */
458 if (!sock_owned_by_user(sk)) {
460 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
464 sk->sk_err_soft = err;
468 if (!sock_owned_by_user(sk) && np->recverr) {
470 sk->sk_error_report(sk);
472 sk->sk_err_soft = err;
480 static int tcp_v6_send_synack(struct sock *sk,
481 struct request_sock *req,
482 struct request_values *rvp,
485 struct inet6_request_sock *treq = inet6_rsk(req);
486 struct ipv6_pinfo *np = inet6_sk(sk);
487 struct sk_buff * skb;
488 struct ipv6_txoptions *opt = NULL;
489 struct in6_addr * final_p, final;
491 struct dst_entry *dst;
494 memset(&fl6, 0, sizeof(fl6));
495 fl6.flowi6_proto = IPPROTO_TCP;
496 fl6.daddr = treq->rmt_addr;
497 fl6.saddr = treq->loc_addr;
499 fl6.flowi6_oif = treq->iif;
500 fl6.flowi6_mark = sk->sk_mark;
501 fl6.fl6_dport = inet_rsk(req)->rmt_port;
502 fl6.fl6_sport = inet_rsk(req)->loc_port;
503 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
506 final_p = fl6_update_dst(&fl6, opt, &final);
508 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
514 skb = tcp_make_synack(sk, dst, req, rvp);
517 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
519 fl6.daddr = treq->rmt_addr;
520 skb_set_queue_mapping(skb, queue_mapping);
521 err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
522 err = net_xmit_eval(err);
526 if (opt && opt != np->opt)
527 sock_kfree_s(sk, opt, opt->tot_len);
531 static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
532 struct request_values *rvp)
534 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
535 return tcp_v6_send_synack(sk, req, rvp, 0);
538 static void tcp_v6_reqsk_destructor(struct request_sock *req)
540 kfree_skb(inet6_rsk(req)->pktopts);
543 #ifdef CONFIG_TCP_MD5SIG
544 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
545 const struct in6_addr *addr)
547 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
550 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
551 struct sock *addr_sk)
553 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
556 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
557 struct request_sock *req)
559 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
562 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
565 struct tcp_md5sig cmd;
566 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
568 if (optlen < sizeof(cmd))
571 if (copy_from_user(&cmd, optval, sizeof(cmd)))
574 if (sin6->sin6_family != AF_INET6)
577 if (!cmd.tcpm_keylen) {
578 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
579 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
581 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
585 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
588 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
589 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
590 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
592 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
593 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
596 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
597 const struct in6_addr *daddr,
598 const struct in6_addr *saddr, int nbytes)
600 struct tcp6_pseudohdr *bp;
601 struct scatterlist sg;
603 bp = &hp->md5_blk.ip6;
604 /* 1. TCP pseudo-header (RFC2460) */
607 bp->protocol = cpu_to_be32(IPPROTO_TCP);
608 bp->len = cpu_to_be32(nbytes);
610 sg_init_one(&sg, bp, sizeof(*bp));
611 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
614 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
615 const struct in6_addr *daddr, struct in6_addr *saddr,
616 const struct tcphdr *th)
618 struct tcp_md5sig_pool *hp;
619 struct hash_desc *desc;
621 hp = tcp_get_md5sig_pool();
623 goto clear_hash_noput;
624 desc = &hp->md5_desc;
626 if (crypto_hash_init(desc))
628 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
630 if (tcp_md5_hash_header(hp, th))
632 if (tcp_md5_hash_key(hp, key))
634 if (crypto_hash_final(desc, md5_hash))
637 tcp_put_md5sig_pool();
641 tcp_put_md5sig_pool();
643 memset(md5_hash, 0, 16);
647 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
648 const struct sock *sk,
649 const struct request_sock *req,
650 const struct sk_buff *skb)
652 const struct in6_addr *saddr, *daddr;
653 struct tcp_md5sig_pool *hp;
654 struct hash_desc *desc;
655 const struct tcphdr *th = tcp_hdr(skb);
658 saddr = &inet6_sk(sk)->saddr;
659 daddr = &inet6_sk(sk)->daddr;
661 saddr = &inet6_rsk(req)->loc_addr;
662 daddr = &inet6_rsk(req)->rmt_addr;
664 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
665 saddr = &ip6h->saddr;
666 daddr = &ip6h->daddr;
669 hp = tcp_get_md5sig_pool();
671 goto clear_hash_noput;
672 desc = &hp->md5_desc;
674 if (crypto_hash_init(desc))
677 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
679 if (tcp_md5_hash_header(hp, th))
681 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
683 if (tcp_md5_hash_key(hp, key))
685 if (crypto_hash_final(desc, md5_hash))
688 tcp_put_md5sig_pool();
692 tcp_put_md5sig_pool();
694 memset(md5_hash, 0, 16);
698 static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
700 const __u8 *hash_location = NULL;
701 struct tcp_md5sig_key *hash_expected;
702 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
703 const struct tcphdr *th = tcp_hdr(skb);
707 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
708 hash_location = tcp_parse_md5sig_option(th);
710 /* We've parsed the options - do we have a hash? */
711 if (!hash_expected && !hash_location)
714 if (hash_expected && !hash_location) {
715 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
719 if (!hash_expected && hash_location) {
720 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
724 /* check the signature */
725 genhash = tcp_v6_md5_hash_skb(newhash,
729 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
730 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
731 genhash ? "failed" : "mismatch",
732 &ip6h->saddr, ntohs(th->source),
733 &ip6h->daddr, ntohs(th->dest));
740 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
742 .obj_size = sizeof(struct tcp6_request_sock),
743 .rtx_syn_ack = tcp_v6_rtx_synack,
744 .send_ack = tcp_v6_reqsk_send_ack,
745 .destructor = tcp_v6_reqsk_destructor,
746 .send_reset = tcp_v6_send_reset,
747 .syn_ack_timeout = tcp_syn_ack_timeout,
750 #ifdef CONFIG_TCP_MD5SIG
751 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
752 .md5_lookup = tcp_v6_reqsk_md5_lookup,
753 .calc_md5_hash = tcp_v6_md5_hash_skb,
757 static void __tcp_v6_send_check(struct sk_buff *skb,
758 const struct in6_addr *saddr, const struct in6_addr *daddr)
760 struct tcphdr *th = tcp_hdr(skb);
762 if (skb->ip_summed == CHECKSUM_PARTIAL) {
763 th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
764 skb->csum_start = skb_transport_header(skb) - skb->head;
765 skb->csum_offset = offsetof(struct tcphdr, check);
767 th->check = tcp_v6_check(skb->len, saddr, daddr,
768 csum_partial(th, th->doff << 2,
773 static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
775 struct ipv6_pinfo *np = inet6_sk(sk);
777 __tcp_v6_send_check(skb, &np->saddr, &np->daddr);
780 static int tcp_v6_gso_send_check(struct sk_buff *skb)
782 const struct ipv6hdr *ipv6h;
785 if (!pskb_may_pull(skb, sizeof(*th)))
788 ipv6h = ipv6_hdr(skb);
792 skb->ip_summed = CHECKSUM_PARTIAL;
793 __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
797 static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
800 const struct ipv6hdr *iph = skb_gro_network_header(skb);
802 switch (skb->ip_summed) {
803 case CHECKSUM_COMPLETE:
804 if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
806 skb->ip_summed = CHECKSUM_UNNECESSARY;
812 NAPI_GRO_CB(skb)->flush = 1;
816 return tcp_gro_receive(head, skb);
819 static int tcp6_gro_complete(struct sk_buff *skb)
821 const struct ipv6hdr *iph = ipv6_hdr(skb);
822 struct tcphdr *th = tcp_hdr(skb);
824 th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
825 &iph->saddr, &iph->daddr, 0);
826 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
828 return tcp_gro_complete(skb);
831 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
832 u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass)
834 const struct tcphdr *th = tcp_hdr(skb);
836 struct sk_buff *buff;
838 struct net *net = dev_net(skb_dst(skb)->dev);
839 struct sock *ctl_sk = net->ipv6.tcp_sk;
840 unsigned int tot_len = sizeof(struct tcphdr);
841 struct dst_entry *dst;
845 tot_len += TCPOLEN_TSTAMP_ALIGNED;
846 #ifdef CONFIG_TCP_MD5SIG
848 tot_len += TCPOLEN_MD5SIG_ALIGNED;
851 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
856 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
858 t1 = (struct tcphdr *) skb_push(buff, tot_len);
859 skb_reset_transport_header(buff);
861 /* Swap the send and the receive. */
862 memset(t1, 0, sizeof(*t1));
863 t1->dest = th->source;
864 t1->source = th->dest;
865 t1->doff = tot_len / 4;
866 t1->seq = htonl(seq);
867 t1->ack_seq = htonl(ack);
868 t1->ack = !rst || !th->ack;
870 t1->window = htons(win);
872 topt = (__be32 *)(t1 + 1);
875 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
876 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
877 *topt++ = htonl(tcp_time_stamp);
881 #ifdef CONFIG_TCP_MD5SIG
883 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
884 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
885 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
886 &ipv6_hdr(skb)->saddr,
887 &ipv6_hdr(skb)->daddr, t1);
891 memset(&fl6, 0, sizeof(fl6));
892 fl6.daddr = ipv6_hdr(skb)->saddr;
893 fl6.saddr = ipv6_hdr(skb)->daddr;
895 buff->ip_summed = CHECKSUM_PARTIAL;
898 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
900 fl6.flowi6_proto = IPPROTO_TCP;
901 fl6.flowi6_oif = inet6_iif(skb);
902 fl6.fl6_dport = t1->dest;
903 fl6.fl6_sport = t1->source;
904 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
906 /* Pass a socket to ip6_dst_lookup either it is for RST
907 * Underlying function will use this to retrieve the network
910 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
912 skb_dst_set(buff, dst);
913 ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
914 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
916 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
923 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
925 const struct tcphdr *th = tcp_hdr(skb);
926 u32 seq = 0, ack_seq = 0;
927 struct tcp_md5sig_key *key = NULL;
928 #ifdef CONFIG_TCP_MD5SIG
929 const __u8 *hash_location = NULL;
930 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
931 unsigned char newhash[16];
933 struct sock *sk1 = NULL;
939 if (!ipv6_unicast_destination(skb))
942 #ifdef CONFIG_TCP_MD5SIG
943 hash_location = tcp_parse_md5sig_option(th);
944 if (!sk && hash_location) {
946 * active side is lost. Try to find listening socket through
947 * source port, and then find md5 key through listening socket.
948 * we are not loose security here:
949 * Incoming packet is checked with md5 hash with finding key,
950 * no RST generated if md5 hash doesn't match.
952 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
953 &tcp_hashinfo, &ipv6h->daddr,
954 ntohs(th->source), inet6_iif(skb));
959 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
963 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
964 if (genhash || memcmp(hash_location, newhash, 16) != 0)
967 key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
972 seq = ntohl(th->ack_seq);
974 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
977 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
979 #ifdef CONFIG_TCP_MD5SIG
988 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
989 struct tcp_md5sig_key *key, u8 tclass)
991 tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass);
994 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
996 struct inet_timewait_sock *tw = inet_twsk(sk);
997 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
999 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1000 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1001 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
1007 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
1008 struct request_sock *req)
1010 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
1011 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
1015 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1017 struct request_sock *req, **prev;
1018 const struct tcphdr *th = tcp_hdr(skb);
1021 /* Find possible connection requests. */
1022 req = inet6_csk_search_req(sk, &prev, th->source,
1023 &ipv6_hdr(skb)->saddr,
1024 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
1026 return tcp_check_req(sk, skb, req, prev);
1028 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
1029 &ipv6_hdr(skb)->saddr, th->source,
1030 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
1033 if (nsk->sk_state != TCP_TIME_WAIT) {
1037 inet_twsk_put(inet_twsk(nsk));
1041 #ifdef CONFIG_SYN_COOKIES
1043 sk = cookie_v6_check(sk, skb);
1048 /* FIXME: this is substantially similar to the ipv4 code.
1049 * Can some kind of merge be done? -- erics
1051 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1053 struct tcp_extend_values tmp_ext;
1054 struct tcp_options_received tmp_opt;
1055 const u8 *hash_location;
1056 struct request_sock *req;
1057 struct inet6_request_sock *treq;
1058 struct ipv6_pinfo *np = inet6_sk(sk);
1059 struct tcp_sock *tp = tcp_sk(sk);
1060 __u32 isn = TCP_SKB_CB(skb)->when;
1061 struct dst_entry *dst = NULL;
1063 bool want_cookie = false;
1065 if (skb->protocol == htons(ETH_P_IP))
1066 return tcp_v4_conn_request(sk, skb);
1068 if (!ipv6_unicast_destination(skb))
1071 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1072 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
1077 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1080 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1084 #ifdef CONFIG_TCP_MD5SIG
1085 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1088 tcp_clear_options(&tmp_opt);
1089 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1090 tmp_opt.user_mss = tp->rx_opt.user_mss;
1091 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1093 if (tmp_opt.cookie_plus > 0 &&
1094 tmp_opt.saw_tstamp &&
1095 !tp->rx_opt.cookie_out_never &&
1096 (sysctl_tcp_cookie_size > 0 ||
1097 (tp->cookie_values != NULL &&
1098 tp->cookie_values->cookie_desired > 0))) {
1101 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1102 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1104 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1107 /* Secret recipe starts with IP addresses */
1108 d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
1113 d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
1119 /* plus variable length Initiator Cookie */
1122 *c++ ^= *hash_location++;
1124 want_cookie = false; /* not our kind of cookie */
1125 tmp_ext.cookie_out_never = 0; /* false */
1126 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1127 } else if (!tp->rx_opt.cookie_in_always) {
1128 /* redundant indications, but ensure initialization. */
1129 tmp_ext.cookie_out_never = 1; /* true */
1130 tmp_ext.cookie_plus = 0;
1134 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1136 if (want_cookie && !tmp_opt.saw_tstamp)
1137 tcp_clear_options(&tmp_opt);
1139 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1140 tcp_openreq_init(req, &tmp_opt, skb);
1142 treq = inet6_rsk(req);
1143 treq->rmt_addr = ipv6_hdr(skb)->saddr;
1144 treq->loc_addr = ipv6_hdr(skb)->daddr;
1145 if (!want_cookie || tmp_opt.tstamp_ok)
1146 TCP_ECN_create_request(req, skb);
1148 treq->iif = sk->sk_bound_dev_if;
1150 /* So that link locals have meaning */
1151 if (!sk->sk_bound_dev_if &&
1152 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1153 treq->iif = inet6_iif(skb);
1156 struct inet_peer *peer = NULL;
1158 if (ipv6_opt_accepted(sk, skb) ||
1159 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1160 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1161 atomic_inc(&skb->users);
1162 treq->pktopts = skb;
1166 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1167 req->cookie_ts = tmp_opt.tstamp_ok;
1171 /* VJ's idea. We save last timestamp seen
1172 * from the destination in peer table, when entering
1173 * state TIME-WAIT, and check against it before
1174 * accepting new connection request.
1176 * If "isn" is not zero, this request hit alive
1177 * timewait bucket, so that all the necessary checks
1178 * are made in the function processing timewait state.
1180 if (tmp_opt.saw_tstamp &&
1181 tcp_death_row.sysctl_tw_recycle &&
1182 (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL &&
1183 (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
1184 ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6,
1186 inet_peer_refcheck(peer);
1187 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1188 (s32)(peer->tcp_ts - req->ts_recent) >
1190 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1191 goto drop_and_release;
1194 /* Kill the following clause, if you dislike this way. */
1195 else if (!sysctl_tcp_syncookies &&
1196 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1197 (sysctl_max_syn_backlog >> 2)) &&
1198 (!peer || !peer->tcp_ts_stamp) &&
1199 (!dst || !dst_metric(dst, RTAX_RTT))) {
1200 /* Without syncookies last quarter of
1201 * backlog is filled with destinations,
1202 * proven to be alive.
1203 * It means that we continue to communicate
1204 * to destinations, already remembered
1205 * to the moment of synflood.
1207 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
1208 &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
1209 goto drop_and_release;
1212 isn = tcp_v6_init_sequence(skb);
1215 tcp_rsk(req)->snt_isn = isn;
1216 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1218 if (security_inet_conn_request(sk, skb, req))
1219 goto drop_and_release;
1221 if (tcp_v6_send_synack(sk, req,
1222 (struct request_values *)&tmp_ext,
1223 skb_get_queue_mapping(skb)) ||
1227 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1235 return 0; /* don't send reset */
1238 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1239 struct request_sock *req,
1240 struct dst_entry *dst)
1242 struct inet6_request_sock *treq;
1243 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1244 struct tcp6_sock *newtcp6sk;
1245 struct inet_sock *newinet;
1246 struct tcp_sock *newtp;
1248 struct ipv6_txoptions *opt;
1249 #ifdef CONFIG_TCP_MD5SIG
1250 struct tcp_md5sig_key *key;
1254 if (skb->protocol == htons(ETH_P_IP)) {
1259 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1264 newtcp6sk = (struct tcp6_sock *)newsk;
1265 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1267 newinet = inet_sk(newsk);
1268 newnp = inet6_sk(newsk);
1269 newtp = tcp_sk(newsk);
1271 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1273 ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
1275 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
1277 newnp->rcv_saddr = newnp->saddr;
1279 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1280 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1281 #ifdef CONFIG_TCP_MD5SIG
1282 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1285 newnp->ipv6_ac_list = NULL;
1286 newnp->ipv6_fl_list = NULL;
1287 newnp->pktoptions = NULL;
1289 newnp->mcast_oif = inet6_iif(skb);
1290 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1291 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1294 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1295 * here, tcp_create_openreq_child now does this for us, see the comment in
1296 * that function for the gory details. -acme
1299 /* It is tricky place. Until this moment IPv4 tcp
1300 worked with IPv6 icsk.icsk_af_ops.
1303 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1308 treq = inet6_rsk(req);
1311 if (sk_acceptq_is_full(sk))
1315 dst = inet6_csk_route_req(sk, &fl6, req);
1320 newsk = tcp_create_openreq_child(sk, req, skb);
1325 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1326 * count here, tcp_create_openreq_child now does this for us, see the
1327 * comment in that function for the gory details. -acme
1330 newsk->sk_gso_type = SKB_GSO_TCPV6;
1331 __ip6_dst_store(newsk, dst, NULL, NULL);
1333 newtcp6sk = (struct tcp6_sock *)newsk;
1334 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1336 newtp = tcp_sk(newsk);
1337 newinet = inet_sk(newsk);
1338 newnp = inet6_sk(newsk);
1340 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1342 newnp->daddr = treq->rmt_addr;
1343 newnp->saddr = treq->loc_addr;
1344 newnp->rcv_saddr = treq->loc_addr;
1345 newsk->sk_bound_dev_if = treq->iif;
1347 /* Now IPv6 options...
1349 First: no IPv4 options.
1351 newinet->inet_opt = NULL;
1352 newnp->ipv6_ac_list = NULL;
1353 newnp->ipv6_fl_list = NULL;
1356 newnp->rxopt.all = np->rxopt.all;
1358 /* Clone pktoptions received with SYN */
1359 newnp->pktoptions = NULL;
1360 if (treq->pktopts != NULL) {
1361 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1362 consume_skb(treq->pktopts);
1363 treq->pktopts = NULL;
1364 if (newnp->pktoptions)
1365 skb_set_owner_r(newnp->pktoptions, newsk);
1368 newnp->mcast_oif = inet6_iif(skb);
1369 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1370 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1372 /* Clone native IPv6 options from listening socket (if any)
1374 Yes, keeping reference count would be much more clever,
1375 but we make one more one thing there: reattach optmem
1379 newnp->opt = ipv6_dup_options(newsk, opt);
1381 sock_kfree_s(sk, opt, opt->tot_len);
1384 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1386 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1387 newnp->opt->opt_flen);
1389 tcp_mtup_init(newsk);
1390 tcp_sync_mss(newsk, dst_mtu(dst));
1391 newtp->advmss = dst_metric_advmss(dst);
1392 if (tcp_sk(sk)->rx_opt.user_mss &&
1393 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1394 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1396 tcp_initialize_rcv_mss(newsk);
1397 if (tcp_rsk(req)->snt_synack)
1398 tcp_valid_rtt_meas(newsk,
1399 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1400 newtp->total_retrans = req->retrans;
1402 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1403 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1405 #ifdef CONFIG_TCP_MD5SIG
1406 /* Copy over the MD5 key from the original socket */
1407 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1408 /* We're using one, so create a matching key
1409 * on the newsk structure. If we fail to get
1410 * memory, then we end up not copying the key
1413 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
1414 AF_INET6, key->key, key->keylen, GFP_ATOMIC);
1418 if (__inet_inherit_port(sk, newsk) < 0) {
1422 __inet6_hash(newsk, NULL);
1427 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1429 if (opt && opt != np->opt)
1430 sock_kfree_s(sk, opt, opt->tot_len);
1433 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1437 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1439 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1440 if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
1441 &ipv6_hdr(skb)->daddr, skb->csum)) {
1442 skb->ip_summed = CHECKSUM_UNNECESSARY;
1447 skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
1448 &ipv6_hdr(skb)->saddr,
1449 &ipv6_hdr(skb)->daddr, 0));
1451 if (skb->len <= 76) {
1452 return __skb_checksum_complete(skb);
1457 /* The socket must have it's spinlock held when we get
1460 * We have a potential double-lock case here, so even when
1461 * doing backlog processing we use the BH locking scheme.
1462 * This is because we cannot sleep with the original spinlock
1465 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1467 struct ipv6_pinfo *np = inet6_sk(sk);
1468 struct tcp_sock *tp;
1469 struct sk_buff *opt_skb = NULL;
1471 /* Imagine: socket is IPv6. IPv4 packet arrives,
1472 goes to IPv4 receive handler and backlogged.
1473 From backlog it always goes here. Kerboom...
1474 Fortunately, tcp_rcv_established and rcv_established
1475 handle them correctly, but it is not case with
1476 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1479 if (skb->protocol == htons(ETH_P_IP))
1480 return tcp_v4_do_rcv(sk, skb);
1482 #ifdef CONFIG_TCP_MD5SIG
1483 if (tcp_v6_inbound_md5_hash (sk, skb))
1487 if (sk_filter(sk, skb))
1491 * socket locking is here for SMP purposes as backlog rcv
1492 * is currently called with bh processing disabled.
1495 /* Do Stevens' IPV6_PKTOPTIONS.
1497 Yes, guys, it is the only place in our code, where we
1498 may make it not affecting IPv4.
1499 The rest of code is protocol independent,
1500 and I do not like idea to uglify IPv4.
1502 Actually, all the idea behind IPV6_PKTOPTIONS
1503 looks not very well thought. For now we latch
1504 options, received in the last packet, enqueued
1505 by tcp. Feel free to propose better solution.
1509 opt_skb = skb_clone(skb, GFP_ATOMIC);
1511 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1512 sock_rps_save_rxhash(sk, skb);
1513 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1516 goto ipv6_pktoptions;
1520 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1523 if (sk->sk_state == TCP_LISTEN) {
1524 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1529 * Queue it on the new socket if the new socket is active,
1530 * otherwise we just shortcircuit this and continue with
1534 sock_rps_save_rxhash(nsk, skb);
1535 if (tcp_child_process(sk, nsk, skb))
1538 __kfree_skb(opt_skb);
1542 sock_rps_save_rxhash(sk, skb);
1544 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1547 goto ipv6_pktoptions;
1551 tcp_v6_send_reset(sk, skb);
1554 __kfree_skb(opt_skb);
1558 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1563 /* Do you ask, what is it?
1565 1. skb was enqueued by tcp.
1566 2. skb is added to tail of read queue, rather than out of order.
1567 3. socket is not in passive state.
1568 4. Finally, it really contains options, which user wants to receive.
1571 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1572 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1573 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1574 np->mcast_oif = inet6_iif(opt_skb);
1575 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1576 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1577 if (np->rxopt.bits.rxtclass)
1578 np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1579 if (ipv6_opt_accepted(sk, opt_skb)) {
1580 skb_set_owner_r(opt_skb, sk);
1581 opt_skb = xchg(&np->pktoptions, opt_skb);
1583 __kfree_skb(opt_skb);
1584 opt_skb = xchg(&np->pktoptions, NULL);
1592 static int tcp_v6_rcv(struct sk_buff *skb)
1594 const struct tcphdr *th;
1595 const struct ipv6hdr *hdr;
1598 struct net *net = dev_net(skb->dev);
1600 if (skb->pkt_type != PACKET_HOST)
1604 * Count it even if it's bad.
1606 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1608 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1613 if (th->doff < sizeof(struct tcphdr)/4)
1615 if (!pskb_may_pull(skb, th->doff*4))
1618 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1622 hdr = ipv6_hdr(skb);
1623 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1624 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1625 skb->len - th->doff*4);
1626 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1627 TCP_SKB_CB(skb)->when = 0;
1628 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1629 TCP_SKB_CB(skb)->sacked = 0;
1631 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1636 if (sk->sk_state == TCP_TIME_WAIT)
1639 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1640 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1641 goto discard_and_relse;
1644 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1645 goto discard_and_relse;
1647 if (sk_filter(sk, skb))
1648 goto discard_and_relse;
1652 bh_lock_sock_nested(sk);
1654 if (!sock_owned_by_user(sk)) {
1655 #ifdef CONFIG_NET_DMA
1656 struct tcp_sock *tp = tcp_sk(sk);
1657 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1658 tp->ucopy.dma_chan = net_dma_find_channel();
1659 if (tp->ucopy.dma_chan)
1660 ret = tcp_v6_do_rcv(sk, skb);
1664 if (!tcp_prequeue(sk, skb))
1665 ret = tcp_v6_do_rcv(sk, skb);
1667 } else if (unlikely(sk_add_backlog(sk, skb,
1668 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1670 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1671 goto discard_and_relse;
1676 return ret ? -1 : 0;
1679 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1682 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1684 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1686 tcp_v6_send_reset(NULL, skb);
1703 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1704 inet_twsk_put(inet_twsk(sk));
1708 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1709 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1710 inet_twsk_put(inet_twsk(sk));
1714 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1719 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1720 &ipv6_hdr(skb)->daddr,
1721 ntohs(th->dest), inet6_iif(skb));
1723 struct inet_timewait_sock *tw = inet_twsk(sk);
1724 inet_twsk_deschedule(tw, &tcp_death_row);
1729 /* Fall through to ACK */
1732 tcp_v6_timewait_ack(sk, skb);
1736 case TCP_TW_SUCCESS:;
1741 static struct inet_peer *tcp_v6_get_peer(struct sock *sk)
1743 struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk);
1744 struct ipv6_pinfo *np = inet6_sk(sk);
1746 /* If we don't have a valid cached route, or we're doing IP
1747 * options which make the IPv6 header destination address
1748 * different from our peer's, do not bother with this.
1750 if (!rt || !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr))
1752 return rt6_get_peer_create(rt);
1755 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1756 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1757 .twsk_unique = tcp_twsk_unique,
1758 .twsk_destructor= tcp_twsk_destructor,
1761 static const struct inet_connection_sock_af_ops ipv6_specific = {
1762 .queue_xmit = inet6_csk_xmit,
1763 .send_check = tcp_v6_send_check,
1764 .rebuild_header = inet6_sk_rebuild_header,
1765 .conn_request = tcp_v6_conn_request,
1766 .syn_recv_sock = tcp_v6_syn_recv_sock,
1767 .get_peer = tcp_v6_get_peer,
1768 .net_header_len = sizeof(struct ipv6hdr),
1769 .net_frag_header_len = sizeof(struct frag_hdr),
1770 .setsockopt = ipv6_setsockopt,
1771 .getsockopt = ipv6_getsockopt,
1772 .addr2sockaddr = inet6_csk_addr2sockaddr,
1773 .sockaddr_len = sizeof(struct sockaddr_in6),
1774 .bind_conflict = inet6_csk_bind_conflict,
1775 #ifdef CONFIG_COMPAT
1776 .compat_setsockopt = compat_ipv6_setsockopt,
1777 .compat_getsockopt = compat_ipv6_getsockopt,
1781 #ifdef CONFIG_TCP_MD5SIG
1782 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1783 .md5_lookup = tcp_v6_md5_lookup,
1784 .calc_md5_hash = tcp_v6_md5_hash_skb,
1785 .md5_parse = tcp_v6_parse_md5_keys,
1790 * TCP over IPv4 via INET6 API
1793 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1794 .queue_xmit = ip_queue_xmit,
1795 .send_check = tcp_v4_send_check,
1796 .rebuild_header = inet_sk_rebuild_header,
1797 .conn_request = tcp_v6_conn_request,
1798 .syn_recv_sock = tcp_v6_syn_recv_sock,
1799 .get_peer = tcp_v4_get_peer,
1800 .net_header_len = sizeof(struct iphdr),
1801 .setsockopt = ipv6_setsockopt,
1802 .getsockopt = ipv6_getsockopt,
1803 .addr2sockaddr = inet6_csk_addr2sockaddr,
1804 .sockaddr_len = sizeof(struct sockaddr_in6),
1805 .bind_conflict = inet6_csk_bind_conflict,
1806 #ifdef CONFIG_COMPAT
1807 .compat_setsockopt = compat_ipv6_setsockopt,
1808 .compat_getsockopt = compat_ipv6_getsockopt,
1812 #ifdef CONFIG_TCP_MD5SIG
1813 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1814 .md5_lookup = tcp_v4_md5_lookup,
1815 .calc_md5_hash = tcp_v4_md5_hash_skb,
1816 .md5_parse = tcp_v6_parse_md5_keys,
1820 /* NOTE: A lot of things set to zero explicitly by call to
1821 * sk_alloc() so need not be done here.
1823 static int tcp_v6_init_sock(struct sock *sk)
1825 struct inet_connection_sock *icsk = inet_csk(sk);
1829 icsk->icsk_af_ops = &ipv6_specific;
1831 #ifdef CONFIG_TCP_MD5SIG
1832 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1838 static void tcp_v6_destroy_sock(struct sock *sk)
1840 tcp_v4_destroy_sock(sk);
1841 inet6_destroy_sock(sk);
1844 #ifdef CONFIG_PROC_FS
1845 /* Proc filesystem TCPv6 sock list dumping. */
1846 static void get_openreq6(struct seq_file *seq,
1847 const struct sock *sk, struct request_sock *req, int i, int uid)
1849 int ttd = req->expires - jiffies;
1850 const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1851 const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1857 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1858 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1860 src->s6_addr32[0], src->s6_addr32[1],
1861 src->s6_addr32[2], src->s6_addr32[3],
1862 ntohs(inet_rsk(req)->loc_port),
1863 dest->s6_addr32[0], dest->s6_addr32[1],
1864 dest->s6_addr32[2], dest->s6_addr32[3],
1865 ntohs(inet_rsk(req)->rmt_port),
1867 0,0, /* could print option size, but that is af dependent. */
1868 1, /* timers active (only the expire timer) */
1869 jiffies_to_clock_t(ttd),
1872 0, /* non standard timer */
1873 0, /* open_requests have no inode */
1877 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1879 const struct in6_addr *dest, *src;
1882 unsigned long timer_expires;
1883 const struct inet_sock *inet = inet_sk(sp);
1884 const struct tcp_sock *tp = tcp_sk(sp);
1885 const struct inet_connection_sock *icsk = inet_csk(sp);
1886 const struct ipv6_pinfo *np = inet6_sk(sp);
1889 src = &np->rcv_saddr;
1890 destp = ntohs(inet->inet_dport);
1891 srcp = ntohs(inet->inet_sport);
1893 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1895 timer_expires = icsk->icsk_timeout;
1896 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1898 timer_expires = icsk->icsk_timeout;
1899 } else if (timer_pending(&sp->sk_timer)) {
1901 timer_expires = sp->sk_timer.expires;
1904 timer_expires = jiffies;
1908 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1909 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
1911 src->s6_addr32[0], src->s6_addr32[1],
1912 src->s6_addr32[2], src->s6_addr32[3], srcp,
1913 dest->s6_addr32[0], dest->s6_addr32[1],
1914 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1916 tp->write_seq-tp->snd_una,
1917 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1919 jiffies_to_clock_t(timer_expires - jiffies),
1920 icsk->icsk_retransmits,
1922 icsk->icsk_probes_out,
1924 atomic_read(&sp->sk_refcnt), sp,
1925 jiffies_to_clock_t(icsk->icsk_rto),
1926 jiffies_to_clock_t(icsk->icsk_ack.ato),
1927 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1929 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
1933 static void get_timewait6_sock(struct seq_file *seq,
1934 struct inet_timewait_sock *tw, int i)
1936 const struct in6_addr *dest, *src;
1938 const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1939 int ttd = tw->tw_ttd - jiffies;
1944 dest = &tw6->tw_v6_daddr;
1945 src = &tw6->tw_v6_rcv_saddr;
1946 destp = ntohs(tw->tw_dport);
1947 srcp = ntohs(tw->tw_sport);
1950 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1951 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1953 src->s6_addr32[0], src->s6_addr32[1],
1954 src->s6_addr32[2], src->s6_addr32[3], srcp,
1955 dest->s6_addr32[0], dest->s6_addr32[1],
1956 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1957 tw->tw_substate, 0, 0,
1958 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1959 atomic_read(&tw->tw_refcnt), tw);
1962 static int tcp6_seq_show(struct seq_file *seq, void *v)
1964 struct tcp_iter_state *st;
1966 if (v == SEQ_START_TOKEN) {
1971 "st tx_queue rx_queue tr tm->when retrnsmt"
1972 " uid timeout inode\n");
1977 switch (st->state) {
1978 case TCP_SEQ_STATE_LISTENING:
1979 case TCP_SEQ_STATE_ESTABLISHED:
1980 get_tcp6_sock(seq, v, st->num);
1982 case TCP_SEQ_STATE_OPENREQ:
1983 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1985 case TCP_SEQ_STATE_TIME_WAIT:
1986 get_timewait6_sock(seq, v, st->num);
1993 static const struct file_operations tcp6_afinfo_seq_fops = {
1994 .owner = THIS_MODULE,
1995 .open = tcp_seq_open,
1997 .llseek = seq_lseek,
1998 .release = seq_release_net
2001 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2004 .seq_fops = &tcp6_afinfo_seq_fops,
2006 .show = tcp6_seq_show,
2010 int __net_init tcp6_proc_init(struct net *net)
2012 return tcp_proc_register(net, &tcp6_seq_afinfo);
2015 void tcp6_proc_exit(struct net *net)
2017 tcp_proc_unregister(net, &tcp6_seq_afinfo);
2021 struct proto tcpv6_prot = {
2023 .owner = THIS_MODULE,
2025 .connect = tcp_v6_connect,
2026 .disconnect = tcp_disconnect,
2027 .accept = inet_csk_accept,
2029 .init = tcp_v6_init_sock,
2030 .destroy = tcp_v6_destroy_sock,
2031 .shutdown = tcp_shutdown,
2032 .setsockopt = tcp_setsockopt,
2033 .getsockopt = tcp_getsockopt,
2034 .recvmsg = tcp_recvmsg,
2035 .sendmsg = tcp_sendmsg,
2036 .sendpage = tcp_sendpage,
2037 .backlog_rcv = tcp_v6_do_rcv,
2038 .hash = tcp_v6_hash,
2039 .unhash = inet_unhash,
2040 .get_port = inet_csk_get_port,
2041 .enter_memory_pressure = tcp_enter_memory_pressure,
2042 .sockets_allocated = &tcp_sockets_allocated,
2043 .memory_allocated = &tcp_memory_allocated,
2044 .memory_pressure = &tcp_memory_pressure,
2045 .orphan_count = &tcp_orphan_count,
2046 .sysctl_wmem = sysctl_tcp_wmem,
2047 .sysctl_rmem = sysctl_tcp_rmem,
2048 .max_header = MAX_TCP_HEADER,
2049 .obj_size = sizeof(struct tcp6_sock),
2050 .slab_flags = SLAB_DESTROY_BY_RCU,
2051 .twsk_prot = &tcp6_timewait_sock_ops,
2052 .rsk_prot = &tcp6_request_sock_ops,
2053 .h.hashinfo = &tcp_hashinfo,
2054 .no_autobind = true,
2055 #ifdef CONFIG_COMPAT
2056 .compat_setsockopt = compat_tcp_setsockopt,
2057 .compat_getsockopt = compat_tcp_getsockopt,
2059 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2060 .proto_cgroup = tcp_proto_cgroup,
2064 static const struct inet6_protocol tcpv6_protocol = {
2065 .handler = tcp_v6_rcv,
2066 .err_handler = tcp_v6_err,
2067 .gso_send_check = tcp_v6_gso_send_check,
2068 .gso_segment = tcp_tso_segment,
2069 .gro_receive = tcp6_gro_receive,
2070 .gro_complete = tcp6_gro_complete,
2071 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2074 static struct inet_protosw tcpv6_protosw = {
2075 .type = SOCK_STREAM,
2076 .protocol = IPPROTO_TCP,
2077 .prot = &tcpv6_prot,
2078 .ops = &inet6_stream_ops,
2080 .flags = INET_PROTOSW_PERMANENT |
2084 static int __net_init tcpv6_net_init(struct net *net)
2086 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2087 SOCK_RAW, IPPROTO_TCP, net);
2090 static void __net_exit tcpv6_net_exit(struct net *net)
2092 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2095 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2097 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
2100 static struct pernet_operations tcpv6_net_ops = {
2101 .init = tcpv6_net_init,
2102 .exit = tcpv6_net_exit,
2103 .exit_batch = tcpv6_net_exit_batch,
2106 int __init tcpv6_init(void)
2110 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2114 /* register inet6 protocol */
2115 ret = inet6_register_protosw(&tcpv6_protosw);
2117 goto out_tcpv6_protocol;
2119 ret = register_pernet_subsys(&tcpv6_net_ops);
2121 goto out_tcpv6_protosw;
2126 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2128 inet6_unregister_protosw(&tcpv6_protosw);
2132 void tcpv6_exit(void)
2134 unregister_pernet_subsys(&tcpv6_net_ops);
2135 inet6_unregister_protosw(&tcpv6_protosw);
2136 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);