3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
65 #include <net/tcp_memcontrol.h>
67 #include <asm/uaccess.h>
69 #include <linux/proc_fs.h>
70 #include <linux/seq_file.h>
72 #include <linux/crypto.h>
73 #include <linux/scatterlist.h>
75 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
76 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
77 struct request_sock *req);
79 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
80 static void __tcp_v6_send_check(struct sk_buff *skb,
81 const struct in6_addr *saddr,
82 const struct in6_addr *daddr);
84 static const struct inet_connection_sock_af_ops ipv6_mapped;
85 static const struct inet_connection_sock_af_ops ipv6_specific;
86 #ifdef CONFIG_TCP_MD5SIG
87 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
88 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
90 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
91 const struct in6_addr *addr)
97 static void tcp_v6_hash(struct sock *sk)
99 if (sk->sk_state != TCP_CLOSE) {
100 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
105 __inet6_hash(sk, NULL);
110 static __inline__ __sum16 tcp_v6_check(int len,
111 const struct in6_addr *saddr,
112 const struct in6_addr *daddr,
115 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
118 static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
120 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
121 ipv6_hdr(skb)->saddr.s6_addr32,
123 tcp_hdr(skb)->source);
126 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
129 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
130 struct inet_sock *inet = inet_sk(sk);
131 struct inet_connection_sock *icsk = inet_csk(sk);
132 struct ipv6_pinfo *np = inet6_sk(sk);
133 struct tcp_sock *tp = tcp_sk(sk);
134 struct in6_addr *saddr = NULL, *final_p, final;
137 struct dst_entry *dst;
141 if (addr_len < SIN6_LEN_RFC2133)
144 if (usin->sin6_family != AF_INET6)
145 return -EAFNOSUPPORT;
147 memset(&fl6, 0, sizeof(fl6));
150 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
151 IP6_ECN_flow_init(fl6.flowlabel);
152 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
153 struct ip6_flowlabel *flowlabel;
154 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
155 if (flowlabel == NULL)
157 usin->sin6_addr = flowlabel->dst;
158 fl6_sock_release(flowlabel);
163 * connect() to INADDR_ANY means loopback (BSD'ism).
166 if(ipv6_addr_any(&usin->sin6_addr))
167 usin->sin6_addr.s6_addr[15] = 0x1;
169 addr_type = ipv6_addr_type(&usin->sin6_addr);
171 if(addr_type & IPV6_ADDR_MULTICAST)
174 if (addr_type&IPV6_ADDR_LINKLOCAL) {
175 if (addr_len >= sizeof(struct sockaddr_in6) &&
176 usin->sin6_scope_id) {
177 /* If interface is set while binding, indices
180 if (sk->sk_bound_dev_if &&
181 sk->sk_bound_dev_if != usin->sin6_scope_id)
184 sk->sk_bound_dev_if = usin->sin6_scope_id;
187 /* Connect to link-local address requires an interface */
188 if (!sk->sk_bound_dev_if)
192 if (tp->rx_opt.ts_recent_stamp &&
193 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
194 tp->rx_opt.ts_recent = 0;
195 tp->rx_opt.ts_recent_stamp = 0;
199 np->daddr = usin->sin6_addr;
200 np->flow_label = fl6.flowlabel;
206 if (addr_type == IPV6_ADDR_MAPPED) {
207 u32 exthdrlen = icsk->icsk_ext_hdr_len;
208 struct sockaddr_in sin;
210 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
212 if (__ipv6_only_sock(sk))
215 sin.sin_family = AF_INET;
216 sin.sin_port = usin->sin6_port;
217 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
219 icsk->icsk_af_ops = &ipv6_mapped;
220 sk->sk_backlog_rcv = tcp_v4_do_rcv;
221 #ifdef CONFIG_TCP_MD5SIG
222 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
225 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
228 icsk->icsk_ext_hdr_len = exthdrlen;
229 icsk->icsk_af_ops = &ipv6_specific;
230 sk->sk_backlog_rcv = tcp_v6_do_rcv;
231 #ifdef CONFIG_TCP_MD5SIG
232 tp->af_specific = &tcp_sock_ipv6_specific;
236 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
237 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
244 if (!ipv6_addr_any(&np->rcv_saddr))
245 saddr = &np->rcv_saddr;
247 fl6.flowi6_proto = IPPROTO_TCP;
248 fl6.daddr = np->daddr;
249 fl6.saddr = saddr ? *saddr : np->saddr;
250 fl6.flowi6_oif = sk->sk_bound_dev_if;
251 fl6.flowi6_mark = sk->sk_mark;
252 fl6.fl6_dport = usin->sin6_port;
253 fl6.fl6_sport = inet->inet_sport;
255 final_p = fl6_update_dst(&fl6, np->opt, &final);
257 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
259 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
267 np->rcv_saddr = *saddr;
270 /* set the source address */
272 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
274 sk->sk_gso_type = SKB_GSO_TCPV6;
275 __ip6_dst_store(sk, dst, NULL, NULL);
277 rt = (struct rt6_info *) dst;
278 if (tcp_death_row.sysctl_tw_recycle &&
279 !tp->rx_opt.ts_recent_stamp &&
280 ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr))
281 tcp_fetch_timewait_stamp(sk, dst);
283 icsk->icsk_ext_hdr_len = 0;
285 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
288 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
290 inet->inet_dport = usin->sin6_port;
292 tcp_set_state(sk, TCP_SYN_SENT);
293 err = inet6_hash_connect(&tcp_death_row, sk);
298 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
303 err = tcp_connect(sk);
310 tcp_set_state(sk, TCP_CLOSE);
313 inet->inet_dport = 0;
314 sk->sk_route_caps = 0;
318 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
319 u8 type, u8 code, int offset, __be32 info)
321 const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data;
322 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
323 struct ipv6_pinfo *np;
328 struct net *net = dev_net(skb->dev);
330 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
331 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
334 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
339 if (sk->sk_state == TCP_TIME_WAIT) {
340 inet_twsk_put(inet_twsk(sk));
345 if (sock_owned_by_user(sk))
346 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
348 if (sk->sk_state == TCP_CLOSE)
351 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
352 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
357 seq = ntohl(th->seq);
358 if (sk->sk_state != TCP_LISTEN &&
359 !between(seq, tp->snd_una, tp->snd_nxt)) {
360 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
366 if (type == ICMPV6_PKT_TOOBIG) {
367 struct dst_entry *dst;
369 if (sock_owned_by_user(sk))
371 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
374 /* icmp should have updated the destination cache entry */
375 dst = __sk_dst_check(sk, np->dst_cookie);
378 struct inet_sock *inet = inet_sk(sk);
381 /* BUGGG_FUTURE: Again, it is not clear how
382 to handle rthdr case. Ignore this complexity
385 memset(&fl6, 0, sizeof(fl6));
386 fl6.flowi6_proto = IPPROTO_TCP;
387 fl6.daddr = np->daddr;
388 fl6.saddr = np->saddr;
389 fl6.flowi6_oif = sk->sk_bound_dev_if;
390 fl6.flowi6_mark = sk->sk_mark;
391 fl6.fl6_dport = inet->inet_dport;
392 fl6.fl6_sport = inet->inet_sport;
393 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
395 dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
397 sk->sk_err_soft = -PTR_ERR(dst);
404 dst->ops->update_pmtu(dst, ntohl(info));
406 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
407 tcp_sync_mss(sk, dst_mtu(dst));
408 tcp_simple_retransmit(sk);
409 } /* else let the usual retransmit timer handle it */
414 icmpv6_err_convert(type, code, &err);
416 /* Might be for an request_sock */
417 switch (sk->sk_state) {
418 struct request_sock *req, **prev;
420 if (sock_owned_by_user(sk))
423 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
424 &hdr->saddr, inet6_iif(skb));
428 /* ICMPs are not backlogged, hence we cannot get
429 * an established socket here.
431 WARN_ON(req->sk != NULL);
433 if (seq != tcp_rsk(req)->snt_isn) {
434 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
438 inet_csk_reqsk_queue_drop(sk, req, prev);
442 case TCP_SYN_RECV: /* Cannot happen.
443 It can, it SYNs are crossed. --ANK */
444 if (!sock_owned_by_user(sk)) {
446 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
450 sk->sk_err_soft = err;
454 if (!sock_owned_by_user(sk) && np->recverr) {
456 sk->sk_error_report(sk);
458 sk->sk_err_soft = err;
466 static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
468 struct request_sock *req,
469 struct request_values *rvp,
472 struct inet6_request_sock *treq = inet6_rsk(req);
473 struct ipv6_pinfo *np = inet6_sk(sk);
474 struct sk_buff * skb;
477 /* First, grab a route. */
478 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
481 skb = tcp_make_synack(sk, dst, req, rvp);
484 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
486 fl6->daddr = treq->rmt_addr;
487 skb_set_queue_mapping(skb, queue_mapping);
488 err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
489 err = net_xmit_eval(err);
496 static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
497 struct request_values *rvp)
501 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
502 return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
505 static void tcp_v6_reqsk_destructor(struct request_sock *req)
507 kfree_skb(inet6_rsk(req)->pktopts);
510 #ifdef CONFIG_TCP_MD5SIG
511 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
512 const struct in6_addr *addr)
514 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
517 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
518 struct sock *addr_sk)
520 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
523 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
524 struct request_sock *req)
526 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
529 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
532 struct tcp_md5sig cmd;
533 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
535 if (optlen < sizeof(cmd))
538 if (copy_from_user(&cmd, optval, sizeof(cmd)))
541 if (sin6->sin6_family != AF_INET6)
544 if (!cmd.tcpm_keylen) {
545 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
546 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
548 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
552 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
555 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
556 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
557 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
559 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
560 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
563 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
564 const struct in6_addr *daddr,
565 const struct in6_addr *saddr, int nbytes)
567 struct tcp6_pseudohdr *bp;
568 struct scatterlist sg;
570 bp = &hp->md5_blk.ip6;
571 /* 1. TCP pseudo-header (RFC2460) */
574 bp->protocol = cpu_to_be32(IPPROTO_TCP);
575 bp->len = cpu_to_be32(nbytes);
577 sg_init_one(&sg, bp, sizeof(*bp));
578 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
581 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
582 const struct in6_addr *daddr, struct in6_addr *saddr,
583 const struct tcphdr *th)
585 struct tcp_md5sig_pool *hp;
586 struct hash_desc *desc;
588 hp = tcp_get_md5sig_pool();
590 goto clear_hash_noput;
591 desc = &hp->md5_desc;
593 if (crypto_hash_init(desc))
595 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
597 if (tcp_md5_hash_header(hp, th))
599 if (tcp_md5_hash_key(hp, key))
601 if (crypto_hash_final(desc, md5_hash))
604 tcp_put_md5sig_pool();
608 tcp_put_md5sig_pool();
610 memset(md5_hash, 0, 16);
614 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
615 const struct sock *sk,
616 const struct request_sock *req,
617 const struct sk_buff *skb)
619 const struct in6_addr *saddr, *daddr;
620 struct tcp_md5sig_pool *hp;
621 struct hash_desc *desc;
622 const struct tcphdr *th = tcp_hdr(skb);
625 saddr = &inet6_sk(sk)->saddr;
626 daddr = &inet6_sk(sk)->daddr;
628 saddr = &inet6_rsk(req)->loc_addr;
629 daddr = &inet6_rsk(req)->rmt_addr;
631 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
632 saddr = &ip6h->saddr;
633 daddr = &ip6h->daddr;
636 hp = tcp_get_md5sig_pool();
638 goto clear_hash_noput;
639 desc = &hp->md5_desc;
641 if (crypto_hash_init(desc))
644 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
646 if (tcp_md5_hash_header(hp, th))
648 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
650 if (tcp_md5_hash_key(hp, key))
652 if (crypto_hash_final(desc, md5_hash))
655 tcp_put_md5sig_pool();
659 tcp_put_md5sig_pool();
661 memset(md5_hash, 0, 16);
665 static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
667 const __u8 *hash_location = NULL;
668 struct tcp_md5sig_key *hash_expected;
669 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
670 const struct tcphdr *th = tcp_hdr(skb);
674 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
675 hash_location = tcp_parse_md5sig_option(th);
677 /* We've parsed the options - do we have a hash? */
678 if (!hash_expected && !hash_location)
681 if (hash_expected && !hash_location) {
682 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
686 if (!hash_expected && hash_location) {
687 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
691 /* check the signature */
692 genhash = tcp_v6_md5_hash_skb(newhash,
696 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
697 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
698 genhash ? "failed" : "mismatch",
699 &ip6h->saddr, ntohs(th->source),
700 &ip6h->daddr, ntohs(th->dest));
707 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
709 .obj_size = sizeof(struct tcp6_request_sock),
710 .rtx_syn_ack = tcp_v6_rtx_synack,
711 .send_ack = tcp_v6_reqsk_send_ack,
712 .destructor = tcp_v6_reqsk_destructor,
713 .send_reset = tcp_v6_send_reset,
714 .syn_ack_timeout = tcp_syn_ack_timeout,
717 #ifdef CONFIG_TCP_MD5SIG
718 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
719 .md5_lookup = tcp_v6_reqsk_md5_lookup,
720 .calc_md5_hash = tcp_v6_md5_hash_skb,
724 static void __tcp_v6_send_check(struct sk_buff *skb,
725 const struct in6_addr *saddr, const struct in6_addr *daddr)
727 struct tcphdr *th = tcp_hdr(skb);
729 if (skb->ip_summed == CHECKSUM_PARTIAL) {
730 th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
731 skb->csum_start = skb_transport_header(skb) - skb->head;
732 skb->csum_offset = offsetof(struct tcphdr, check);
734 th->check = tcp_v6_check(skb->len, saddr, daddr,
735 csum_partial(th, th->doff << 2,
740 static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
742 struct ipv6_pinfo *np = inet6_sk(sk);
744 __tcp_v6_send_check(skb, &np->saddr, &np->daddr);
747 static int tcp_v6_gso_send_check(struct sk_buff *skb)
749 const struct ipv6hdr *ipv6h;
752 if (!pskb_may_pull(skb, sizeof(*th)))
755 ipv6h = ipv6_hdr(skb);
759 skb->ip_summed = CHECKSUM_PARTIAL;
760 __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
764 static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
767 const struct ipv6hdr *iph = skb_gro_network_header(skb);
769 switch (skb->ip_summed) {
770 case CHECKSUM_COMPLETE:
771 if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
773 skb->ip_summed = CHECKSUM_UNNECESSARY;
779 NAPI_GRO_CB(skb)->flush = 1;
783 return tcp_gro_receive(head, skb);
786 static int tcp6_gro_complete(struct sk_buff *skb)
788 const struct ipv6hdr *iph = ipv6_hdr(skb);
789 struct tcphdr *th = tcp_hdr(skb);
791 th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
792 &iph->saddr, &iph->daddr, 0);
793 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
795 return tcp_gro_complete(skb);
798 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
799 u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass)
801 const struct tcphdr *th = tcp_hdr(skb);
803 struct sk_buff *buff;
805 struct net *net = dev_net(skb_dst(skb)->dev);
806 struct sock *ctl_sk = net->ipv6.tcp_sk;
807 unsigned int tot_len = sizeof(struct tcphdr);
808 struct dst_entry *dst;
812 tot_len += TCPOLEN_TSTAMP_ALIGNED;
813 #ifdef CONFIG_TCP_MD5SIG
815 tot_len += TCPOLEN_MD5SIG_ALIGNED;
818 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
823 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
825 t1 = (struct tcphdr *) skb_push(buff, tot_len);
826 skb_reset_transport_header(buff);
828 /* Swap the send and the receive. */
829 memset(t1, 0, sizeof(*t1));
830 t1->dest = th->source;
831 t1->source = th->dest;
832 t1->doff = tot_len / 4;
833 t1->seq = htonl(seq);
834 t1->ack_seq = htonl(ack);
835 t1->ack = !rst || !th->ack;
837 t1->window = htons(win);
839 topt = (__be32 *)(t1 + 1);
842 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
843 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
844 *topt++ = htonl(tcp_time_stamp);
848 #ifdef CONFIG_TCP_MD5SIG
850 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
851 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
852 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
853 &ipv6_hdr(skb)->saddr,
854 &ipv6_hdr(skb)->daddr, t1);
858 memset(&fl6, 0, sizeof(fl6));
859 fl6.daddr = ipv6_hdr(skb)->saddr;
860 fl6.saddr = ipv6_hdr(skb)->daddr;
862 buff->ip_summed = CHECKSUM_PARTIAL;
865 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
867 fl6.flowi6_proto = IPPROTO_TCP;
868 fl6.flowi6_oif = inet6_iif(skb);
869 fl6.fl6_dport = t1->dest;
870 fl6.fl6_sport = t1->source;
871 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
873 /* Pass a socket to ip6_dst_lookup either it is for RST
874 * Underlying function will use this to retrieve the network
877 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
879 skb_dst_set(buff, dst);
880 ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
881 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
883 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
890 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
892 const struct tcphdr *th = tcp_hdr(skb);
893 u32 seq = 0, ack_seq = 0;
894 struct tcp_md5sig_key *key = NULL;
895 #ifdef CONFIG_TCP_MD5SIG
896 const __u8 *hash_location = NULL;
897 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
898 unsigned char newhash[16];
900 struct sock *sk1 = NULL;
906 if (!ipv6_unicast_destination(skb))
909 #ifdef CONFIG_TCP_MD5SIG
910 hash_location = tcp_parse_md5sig_option(th);
911 if (!sk && hash_location) {
913 * active side is lost. Try to find listening socket through
914 * source port, and then find md5 key through listening socket.
915 * we are not loose security here:
916 * Incoming packet is checked with md5 hash with finding key,
917 * no RST generated if md5 hash doesn't match.
919 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
920 &tcp_hashinfo, &ipv6h->daddr,
921 ntohs(th->source), inet6_iif(skb));
926 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
930 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
931 if (genhash || memcmp(hash_location, newhash, 16) != 0)
934 key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
939 seq = ntohl(th->ack_seq);
941 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
944 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
946 #ifdef CONFIG_TCP_MD5SIG
955 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
956 struct tcp_md5sig_key *key, u8 tclass)
958 tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass);
961 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
963 struct inet_timewait_sock *tw = inet_twsk(sk);
964 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
966 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
967 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
968 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
974 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
975 struct request_sock *req)
977 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
978 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
982 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
984 struct request_sock *req, **prev;
985 const struct tcphdr *th = tcp_hdr(skb);
988 /* Find possible connection requests. */
989 req = inet6_csk_search_req(sk, &prev, th->source,
990 &ipv6_hdr(skb)->saddr,
991 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
993 return tcp_check_req(sk, skb, req, prev);
995 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
996 &ipv6_hdr(skb)->saddr, th->source,
997 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
1000 if (nsk->sk_state != TCP_TIME_WAIT) {
1004 inet_twsk_put(inet_twsk(nsk));
1008 #ifdef CONFIG_SYN_COOKIES
1010 sk = cookie_v6_check(sk, skb);
1015 /* FIXME: this is substantially similar to the ipv4 code.
1016 * Can some kind of merge be done? -- erics
1018 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1020 struct tcp_extend_values tmp_ext;
1021 struct tcp_options_received tmp_opt;
1022 const u8 *hash_location;
1023 struct request_sock *req;
1024 struct inet6_request_sock *treq;
1025 struct ipv6_pinfo *np = inet6_sk(sk);
1026 struct tcp_sock *tp = tcp_sk(sk);
1027 __u32 isn = TCP_SKB_CB(skb)->when;
1028 struct dst_entry *dst = NULL;
1030 bool want_cookie = false;
1032 if (skb->protocol == htons(ETH_P_IP))
1033 return tcp_v4_conn_request(sk, skb);
1035 if (!ipv6_unicast_destination(skb))
1038 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1039 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
1044 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1047 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1051 #ifdef CONFIG_TCP_MD5SIG
1052 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1055 tcp_clear_options(&tmp_opt);
1056 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1057 tmp_opt.user_mss = tp->rx_opt.user_mss;
1058 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1060 if (tmp_opt.cookie_plus > 0 &&
1061 tmp_opt.saw_tstamp &&
1062 !tp->rx_opt.cookie_out_never &&
1063 (sysctl_tcp_cookie_size > 0 ||
1064 (tp->cookie_values != NULL &&
1065 tp->cookie_values->cookie_desired > 0))) {
1068 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1069 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1071 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1074 /* Secret recipe starts with IP addresses */
1075 d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
1080 d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
1086 /* plus variable length Initiator Cookie */
1089 *c++ ^= *hash_location++;
1091 want_cookie = false; /* not our kind of cookie */
1092 tmp_ext.cookie_out_never = 0; /* false */
1093 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1094 } else if (!tp->rx_opt.cookie_in_always) {
1095 /* redundant indications, but ensure initialization. */
1096 tmp_ext.cookie_out_never = 1; /* true */
1097 tmp_ext.cookie_plus = 0;
1101 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1103 if (want_cookie && !tmp_opt.saw_tstamp)
1104 tcp_clear_options(&tmp_opt);
1106 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1107 tcp_openreq_init(req, &tmp_opt, skb);
1109 treq = inet6_rsk(req);
1110 treq->rmt_addr = ipv6_hdr(skb)->saddr;
1111 treq->loc_addr = ipv6_hdr(skb)->daddr;
1112 if (!want_cookie || tmp_opt.tstamp_ok)
1113 TCP_ECN_create_request(req, skb);
1115 treq->iif = sk->sk_bound_dev_if;
1117 /* So that link locals have meaning */
1118 if (!sk->sk_bound_dev_if &&
1119 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1120 treq->iif = inet6_iif(skb);
1123 if (ipv6_opt_accepted(sk, skb) ||
1124 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1125 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1126 atomic_inc(&skb->users);
1127 treq->pktopts = skb;
1131 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1132 req->cookie_ts = tmp_opt.tstamp_ok;
1136 /* VJ's idea. We save last timestamp seen
1137 * from the destination in peer table, when entering
1138 * state TIME-WAIT, and check against it before
1139 * accepting new connection request.
1141 * If "isn" is not zero, this request hit alive
1142 * timewait bucket, so that all the necessary checks
1143 * are made in the function processing timewait state.
1145 if (tmp_opt.saw_tstamp &&
1146 tcp_death_row.sysctl_tw_recycle &&
1147 (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
1148 if (!tcp_peer_is_proven(req, dst, true)) {
1149 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1150 goto drop_and_release;
1153 /* Kill the following clause, if you dislike this way. */
1154 else if (!sysctl_tcp_syncookies &&
1155 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1156 (sysctl_max_syn_backlog >> 2)) &&
1157 !tcp_peer_is_proven(req, dst, false)) {
1158 /* Without syncookies last quarter of
1159 * backlog is filled with destinations,
1160 * proven to be alive.
1161 * It means that we continue to communicate
1162 * to destinations, already remembered
1163 * to the moment of synflood.
1165 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
1166 &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
1167 goto drop_and_release;
1170 isn = tcp_v6_init_sequence(skb);
1173 tcp_rsk(req)->snt_isn = isn;
1174 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1176 if (security_inet_conn_request(sk, skb, req))
1177 goto drop_and_release;
1179 if (tcp_v6_send_synack(sk, dst, &fl6, req,
1180 (struct request_values *)&tmp_ext,
1181 skb_get_queue_mapping(skb)) ||
1185 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1193 return 0; /* don't send reset */
1196 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1197 struct request_sock *req,
1198 struct dst_entry *dst)
1200 struct inet6_request_sock *treq;
1201 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1202 struct tcp6_sock *newtcp6sk;
1203 struct inet_sock *newinet;
1204 struct tcp_sock *newtp;
1206 #ifdef CONFIG_TCP_MD5SIG
1207 struct tcp_md5sig_key *key;
1211 if (skb->protocol == htons(ETH_P_IP)) {
1216 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1221 newtcp6sk = (struct tcp6_sock *)newsk;
1222 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1224 newinet = inet_sk(newsk);
1225 newnp = inet6_sk(newsk);
1226 newtp = tcp_sk(newsk);
1228 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1230 ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
1232 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
1234 newnp->rcv_saddr = newnp->saddr;
1236 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1237 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1238 #ifdef CONFIG_TCP_MD5SIG
1239 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1242 newnp->ipv6_ac_list = NULL;
1243 newnp->ipv6_fl_list = NULL;
1244 newnp->pktoptions = NULL;
1246 newnp->mcast_oif = inet6_iif(skb);
1247 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1248 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1251 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1252 * here, tcp_create_openreq_child now does this for us, see the comment in
1253 * that function for the gory details. -acme
1256 /* It is tricky place. Until this moment IPv4 tcp
1257 worked with IPv6 icsk.icsk_af_ops.
1260 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1265 treq = inet6_rsk(req);
1267 if (sk_acceptq_is_full(sk))
1271 dst = inet6_csk_route_req(sk, &fl6, req);
1276 newsk = tcp_create_openreq_child(sk, req, skb);
1281 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1282 * count here, tcp_create_openreq_child now does this for us, see the
1283 * comment in that function for the gory details. -acme
1286 newsk->sk_gso_type = SKB_GSO_TCPV6;
1287 __ip6_dst_store(newsk, dst, NULL, NULL);
1289 newtcp6sk = (struct tcp6_sock *)newsk;
1290 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1292 newtp = tcp_sk(newsk);
1293 newinet = inet_sk(newsk);
1294 newnp = inet6_sk(newsk);
1296 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1298 newnp->daddr = treq->rmt_addr;
1299 newnp->saddr = treq->loc_addr;
1300 newnp->rcv_saddr = treq->loc_addr;
1301 newsk->sk_bound_dev_if = treq->iif;
1303 /* Now IPv6 options...
1305 First: no IPv4 options.
1307 newinet->inet_opt = NULL;
1308 newnp->ipv6_ac_list = NULL;
1309 newnp->ipv6_fl_list = NULL;
1312 newnp->rxopt.all = np->rxopt.all;
1314 /* Clone pktoptions received with SYN */
1315 newnp->pktoptions = NULL;
1316 if (treq->pktopts != NULL) {
1317 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1318 consume_skb(treq->pktopts);
1319 treq->pktopts = NULL;
1320 if (newnp->pktoptions)
1321 skb_set_owner_r(newnp->pktoptions, newsk);
1324 newnp->mcast_oif = inet6_iif(skb);
1325 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1326 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1328 /* Clone native IPv6 options from listening socket (if any)
1330 Yes, keeping reference count would be much more clever,
1331 but we make one more one thing there: reattach optmem
1335 newnp->opt = ipv6_dup_options(newsk, np->opt);
1337 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1339 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1340 newnp->opt->opt_flen);
1342 tcp_mtup_init(newsk);
1343 tcp_sync_mss(newsk, dst_mtu(dst));
1344 newtp->advmss = dst_metric_advmss(dst);
1345 if (tcp_sk(sk)->rx_opt.user_mss &&
1346 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1347 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1349 tcp_initialize_rcv_mss(newsk);
1350 if (tcp_rsk(req)->snt_synack)
1351 tcp_valid_rtt_meas(newsk,
1352 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1353 newtp->total_retrans = req->retrans;
1355 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1356 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1358 #ifdef CONFIG_TCP_MD5SIG
1359 /* Copy over the MD5 key from the original socket */
1360 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1361 /* We're using one, so create a matching key
1362 * on the newsk structure. If we fail to get
1363 * memory, then we end up not copying the key
1366 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
1367 AF_INET6, key->key, key->keylen, GFP_ATOMIC);
1371 if (__inet_inherit_port(sk, newsk) < 0) {
1375 __inet6_hash(newsk, NULL);
1380 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1384 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1388 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1390 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1391 if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
1392 &ipv6_hdr(skb)->daddr, skb->csum)) {
1393 skb->ip_summed = CHECKSUM_UNNECESSARY;
1398 skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
1399 &ipv6_hdr(skb)->saddr,
1400 &ipv6_hdr(skb)->daddr, 0));
1402 if (skb->len <= 76) {
1403 return __skb_checksum_complete(skb);
1408 /* The socket must have it's spinlock held when we get
1411 * We have a potential double-lock case here, so even when
1412 * doing backlog processing we use the BH locking scheme.
1413 * This is because we cannot sleep with the original spinlock
1416 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1418 struct ipv6_pinfo *np = inet6_sk(sk);
1419 struct tcp_sock *tp;
1420 struct sk_buff *opt_skb = NULL;
1422 /* Imagine: socket is IPv6. IPv4 packet arrives,
1423 goes to IPv4 receive handler and backlogged.
1424 From backlog it always goes here. Kerboom...
1425 Fortunately, tcp_rcv_established and rcv_established
1426 handle them correctly, but it is not case with
1427 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1430 if (skb->protocol == htons(ETH_P_IP))
1431 return tcp_v4_do_rcv(sk, skb);
1433 #ifdef CONFIG_TCP_MD5SIG
1434 if (tcp_v6_inbound_md5_hash (sk, skb))
1438 if (sk_filter(sk, skb))
1442 * socket locking is here for SMP purposes as backlog rcv
1443 * is currently called with bh processing disabled.
1446 /* Do Stevens' IPV6_PKTOPTIONS.
1448 Yes, guys, it is the only place in our code, where we
1449 may make it not affecting IPv4.
1450 The rest of code is protocol independent,
1451 and I do not like idea to uglify IPv4.
1453 Actually, all the idea behind IPV6_PKTOPTIONS
1454 looks not very well thought. For now we latch
1455 options, received in the last packet, enqueued
1456 by tcp. Feel free to propose better solution.
1460 opt_skb = skb_clone(skb, GFP_ATOMIC);
1462 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1463 sock_rps_save_rxhash(sk, skb);
1464 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1467 goto ipv6_pktoptions;
1471 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1474 if (sk->sk_state == TCP_LISTEN) {
1475 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1480 * Queue it on the new socket if the new socket is active,
1481 * otherwise we just shortcircuit this and continue with
1485 sock_rps_save_rxhash(nsk, skb);
1486 if (tcp_child_process(sk, nsk, skb))
1489 __kfree_skb(opt_skb);
1493 sock_rps_save_rxhash(sk, skb);
1495 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1498 goto ipv6_pktoptions;
1502 tcp_v6_send_reset(sk, skb);
1505 __kfree_skb(opt_skb);
1509 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1514 /* Do you ask, what is it?
1516 1. skb was enqueued by tcp.
1517 2. skb is added to tail of read queue, rather than out of order.
1518 3. socket is not in passive state.
1519 4. Finally, it really contains options, which user wants to receive.
1522 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1523 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1524 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1525 np->mcast_oif = inet6_iif(opt_skb);
1526 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1527 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1528 if (np->rxopt.bits.rxtclass)
1529 np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1530 if (ipv6_opt_accepted(sk, opt_skb)) {
1531 skb_set_owner_r(opt_skb, sk);
1532 opt_skb = xchg(&np->pktoptions, opt_skb);
1534 __kfree_skb(opt_skb);
1535 opt_skb = xchg(&np->pktoptions, NULL);
1543 static int tcp_v6_rcv(struct sk_buff *skb)
1545 const struct tcphdr *th;
1546 const struct ipv6hdr *hdr;
1549 struct net *net = dev_net(skb->dev);
1551 if (skb->pkt_type != PACKET_HOST)
1555 * Count it even if it's bad.
1557 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1559 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1564 if (th->doff < sizeof(struct tcphdr)/4)
1566 if (!pskb_may_pull(skb, th->doff*4))
1569 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1573 hdr = ipv6_hdr(skb);
1574 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1575 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1576 skb->len - th->doff*4);
1577 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1578 TCP_SKB_CB(skb)->when = 0;
1579 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1580 TCP_SKB_CB(skb)->sacked = 0;
1582 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1587 if (sk->sk_state == TCP_TIME_WAIT)
1590 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1591 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1592 goto discard_and_relse;
1595 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1596 goto discard_and_relse;
1598 if (sk_filter(sk, skb))
1599 goto discard_and_relse;
1603 bh_lock_sock_nested(sk);
1605 if (!sock_owned_by_user(sk)) {
1606 #ifdef CONFIG_NET_DMA
1607 struct tcp_sock *tp = tcp_sk(sk);
1608 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1609 tp->ucopy.dma_chan = net_dma_find_channel();
1610 if (tp->ucopy.dma_chan)
1611 ret = tcp_v6_do_rcv(sk, skb);
1615 if (!tcp_prequeue(sk, skb))
1616 ret = tcp_v6_do_rcv(sk, skb);
1618 } else if (unlikely(sk_add_backlog(sk, skb,
1619 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1621 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1622 goto discard_and_relse;
1627 return ret ? -1 : 0;
1630 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1633 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1635 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1637 tcp_v6_send_reset(NULL, skb);
1654 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1655 inet_twsk_put(inet_twsk(sk));
1659 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1660 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1661 inet_twsk_put(inet_twsk(sk));
1665 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1670 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1671 &ipv6_hdr(skb)->daddr,
1672 ntohs(th->dest), inet6_iif(skb));
1674 struct inet_timewait_sock *tw = inet_twsk(sk);
1675 inet_twsk_deschedule(tw, &tcp_death_row);
1680 /* Fall through to ACK */
1683 tcp_v6_timewait_ack(sk, skb);
1687 case TCP_TW_SUCCESS:;
1692 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1693 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1694 .twsk_unique = tcp_twsk_unique,
1695 .twsk_destructor= tcp_twsk_destructor,
1698 static const struct inet_connection_sock_af_ops ipv6_specific = {
1699 .queue_xmit = inet6_csk_xmit,
1700 .send_check = tcp_v6_send_check,
1701 .rebuild_header = inet6_sk_rebuild_header,
1702 .conn_request = tcp_v6_conn_request,
1703 .syn_recv_sock = tcp_v6_syn_recv_sock,
1704 .net_header_len = sizeof(struct ipv6hdr),
1705 .net_frag_header_len = sizeof(struct frag_hdr),
1706 .setsockopt = ipv6_setsockopt,
1707 .getsockopt = ipv6_getsockopt,
1708 .addr2sockaddr = inet6_csk_addr2sockaddr,
1709 .sockaddr_len = sizeof(struct sockaddr_in6),
1710 .bind_conflict = inet6_csk_bind_conflict,
1711 #ifdef CONFIG_COMPAT
1712 .compat_setsockopt = compat_ipv6_setsockopt,
1713 .compat_getsockopt = compat_ipv6_getsockopt,
1717 #ifdef CONFIG_TCP_MD5SIG
1718 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1719 .md5_lookup = tcp_v6_md5_lookup,
1720 .calc_md5_hash = tcp_v6_md5_hash_skb,
1721 .md5_parse = tcp_v6_parse_md5_keys,
1726 * TCP over IPv4 via INET6 API
1729 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1730 .queue_xmit = ip_queue_xmit,
1731 .send_check = tcp_v4_send_check,
1732 .rebuild_header = inet_sk_rebuild_header,
1733 .conn_request = tcp_v6_conn_request,
1734 .syn_recv_sock = tcp_v6_syn_recv_sock,
1735 .net_header_len = sizeof(struct iphdr),
1736 .setsockopt = ipv6_setsockopt,
1737 .getsockopt = ipv6_getsockopt,
1738 .addr2sockaddr = inet6_csk_addr2sockaddr,
1739 .sockaddr_len = sizeof(struct sockaddr_in6),
1740 .bind_conflict = inet6_csk_bind_conflict,
1741 #ifdef CONFIG_COMPAT
1742 .compat_setsockopt = compat_ipv6_setsockopt,
1743 .compat_getsockopt = compat_ipv6_getsockopt,
1747 #ifdef CONFIG_TCP_MD5SIG
1748 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1749 .md5_lookup = tcp_v4_md5_lookup,
1750 .calc_md5_hash = tcp_v4_md5_hash_skb,
1751 .md5_parse = tcp_v6_parse_md5_keys,
1755 /* NOTE: A lot of things set to zero explicitly by call to
1756 * sk_alloc() so need not be done here.
1758 static int tcp_v6_init_sock(struct sock *sk)
1760 struct inet_connection_sock *icsk = inet_csk(sk);
1764 icsk->icsk_af_ops = &ipv6_specific;
1766 #ifdef CONFIG_TCP_MD5SIG
1767 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1773 static void tcp_v6_destroy_sock(struct sock *sk)
1775 tcp_v4_destroy_sock(sk);
1776 inet6_destroy_sock(sk);
1779 #ifdef CONFIG_PROC_FS
1780 /* Proc filesystem TCPv6 sock list dumping. */
1781 static void get_openreq6(struct seq_file *seq,
1782 const struct sock *sk, struct request_sock *req, int i, int uid)
1784 int ttd = req->expires - jiffies;
1785 const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1786 const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1792 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1793 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1795 src->s6_addr32[0], src->s6_addr32[1],
1796 src->s6_addr32[2], src->s6_addr32[3],
1797 ntohs(inet_rsk(req)->loc_port),
1798 dest->s6_addr32[0], dest->s6_addr32[1],
1799 dest->s6_addr32[2], dest->s6_addr32[3],
1800 ntohs(inet_rsk(req)->rmt_port),
1802 0,0, /* could print option size, but that is af dependent. */
1803 1, /* timers active (only the expire timer) */
1804 jiffies_to_clock_t(ttd),
1807 0, /* non standard timer */
1808 0, /* open_requests have no inode */
1812 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1814 const struct in6_addr *dest, *src;
1817 unsigned long timer_expires;
1818 const struct inet_sock *inet = inet_sk(sp);
1819 const struct tcp_sock *tp = tcp_sk(sp);
1820 const struct inet_connection_sock *icsk = inet_csk(sp);
1821 const struct ipv6_pinfo *np = inet6_sk(sp);
1824 src = &np->rcv_saddr;
1825 destp = ntohs(inet->inet_dport);
1826 srcp = ntohs(inet->inet_sport);
1828 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1830 timer_expires = icsk->icsk_timeout;
1831 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1833 timer_expires = icsk->icsk_timeout;
1834 } else if (timer_pending(&sp->sk_timer)) {
1836 timer_expires = sp->sk_timer.expires;
1839 timer_expires = jiffies;
1843 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1844 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
1846 src->s6_addr32[0], src->s6_addr32[1],
1847 src->s6_addr32[2], src->s6_addr32[3], srcp,
1848 dest->s6_addr32[0], dest->s6_addr32[1],
1849 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1851 tp->write_seq-tp->snd_una,
1852 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1854 jiffies_to_clock_t(timer_expires - jiffies),
1855 icsk->icsk_retransmits,
1857 icsk->icsk_probes_out,
1859 atomic_read(&sp->sk_refcnt), sp,
1860 jiffies_to_clock_t(icsk->icsk_rto),
1861 jiffies_to_clock_t(icsk->icsk_ack.ato),
1862 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1864 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
1868 static void get_timewait6_sock(struct seq_file *seq,
1869 struct inet_timewait_sock *tw, int i)
1871 const struct in6_addr *dest, *src;
1873 const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1874 int ttd = tw->tw_ttd - jiffies;
1879 dest = &tw6->tw_v6_daddr;
1880 src = &tw6->tw_v6_rcv_saddr;
1881 destp = ntohs(tw->tw_dport);
1882 srcp = ntohs(tw->tw_sport);
1885 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1886 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1888 src->s6_addr32[0], src->s6_addr32[1],
1889 src->s6_addr32[2], src->s6_addr32[3], srcp,
1890 dest->s6_addr32[0], dest->s6_addr32[1],
1891 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1892 tw->tw_substate, 0, 0,
1893 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1894 atomic_read(&tw->tw_refcnt), tw);
1897 static int tcp6_seq_show(struct seq_file *seq, void *v)
1899 struct tcp_iter_state *st;
1901 if (v == SEQ_START_TOKEN) {
1906 "st tx_queue rx_queue tr tm->when retrnsmt"
1907 " uid timeout inode\n");
1912 switch (st->state) {
1913 case TCP_SEQ_STATE_LISTENING:
1914 case TCP_SEQ_STATE_ESTABLISHED:
1915 get_tcp6_sock(seq, v, st->num);
1917 case TCP_SEQ_STATE_OPENREQ:
1918 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1920 case TCP_SEQ_STATE_TIME_WAIT:
1921 get_timewait6_sock(seq, v, st->num);
1928 static const struct file_operations tcp6_afinfo_seq_fops = {
1929 .owner = THIS_MODULE,
1930 .open = tcp_seq_open,
1932 .llseek = seq_lseek,
1933 .release = seq_release_net
1936 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1939 .seq_fops = &tcp6_afinfo_seq_fops,
1941 .show = tcp6_seq_show,
1945 int __net_init tcp6_proc_init(struct net *net)
1947 return tcp_proc_register(net, &tcp6_seq_afinfo);
1950 void tcp6_proc_exit(struct net *net)
1952 tcp_proc_unregister(net, &tcp6_seq_afinfo);
1956 struct proto tcpv6_prot = {
1958 .owner = THIS_MODULE,
1960 .connect = tcp_v6_connect,
1961 .disconnect = tcp_disconnect,
1962 .accept = inet_csk_accept,
1964 .init = tcp_v6_init_sock,
1965 .destroy = tcp_v6_destroy_sock,
1966 .shutdown = tcp_shutdown,
1967 .setsockopt = tcp_setsockopt,
1968 .getsockopt = tcp_getsockopt,
1969 .recvmsg = tcp_recvmsg,
1970 .sendmsg = tcp_sendmsg,
1971 .sendpage = tcp_sendpage,
1972 .backlog_rcv = tcp_v6_do_rcv,
1973 .release_cb = tcp_release_cb,
1974 .hash = tcp_v6_hash,
1975 .unhash = inet_unhash,
1976 .get_port = inet_csk_get_port,
1977 .enter_memory_pressure = tcp_enter_memory_pressure,
1978 .sockets_allocated = &tcp_sockets_allocated,
1979 .memory_allocated = &tcp_memory_allocated,
1980 .memory_pressure = &tcp_memory_pressure,
1981 .orphan_count = &tcp_orphan_count,
1982 .sysctl_wmem = sysctl_tcp_wmem,
1983 .sysctl_rmem = sysctl_tcp_rmem,
1984 .max_header = MAX_TCP_HEADER,
1985 .obj_size = sizeof(struct tcp6_sock),
1986 .slab_flags = SLAB_DESTROY_BY_RCU,
1987 .twsk_prot = &tcp6_timewait_sock_ops,
1988 .rsk_prot = &tcp6_request_sock_ops,
1989 .h.hashinfo = &tcp_hashinfo,
1990 .no_autobind = true,
1991 #ifdef CONFIG_COMPAT
1992 .compat_setsockopt = compat_tcp_setsockopt,
1993 .compat_getsockopt = compat_tcp_getsockopt,
1995 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
1996 .proto_cgroup = tcp_proto_cgroup,
2000 static const struct inet6_protocol tcpv6_protocol = {
2001 .handler = tcp_v6_rcv,
2002 .err_handler = tcp_v6_err,
2003 .gso_send_check = tcp_v6_gso_send_check,
2004 .gso_segment = tcp_tso_segment,
2005 .gro_receive = tcp6_gro_receive,
2006 .gro_complete = tcp6_gro_complete,
2007 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2010 static struct inet_protosw tcpv6_protosw = {
2011 .type = SOCK_STREAM,
2012 .protocol = IPPROTO_TCP,
2013 .prot = &tcpv6_prot,
2014 .ops = &inet6_stream_ops,
2016 .flags = INET_PROTOSW_PERMANENT |
2020 static int __net_init tcpv6_net_init(struct net *net)
2022 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2023 SOCK_RAW, IPPROTO_TCP, net);
2026 static void __net_exit tcpv6_net_exit(struct net *net)
2028 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2031 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2033 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
2036 static struct pernet_operations tcpv6_net_ops = {
2037 .init = tcpv6_net_init,
2038 .exit = tcpv6_net_exit,
2039 .exit_batch = tcpv6_net_exit_batch,
2042 int __init tcpv6_init(void)
2046 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2050 /* register inet6 protocol */
2051 ret = inet6_register_protosw(&tcpv6_protosw);
2053 goto out_tcpv6_protocol;
2055 ret = register_pernet_subsys(&tcpv6_net_ops);
2057 goto out_tcpv6_protosw;
2062 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2064 inet6_unregister_protosw(&tcpv6_protosw);
2068 void tcpv6_exit(void)
2070 unregister_pernet_subsys(&tcpv6_net_ops);
2071 inet6_unregister_protosw(&tcpv6_protosw);
2072 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);