tcp: annotate tp->copied_seq lockless reads
[linux-2.6-block.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 static const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr)
85 {
86         return NULL;
87 }
88 #endif
89
90 /* Helper returning the inet6 address from a given tcp socket.
91  * It can be used in TCP stack instead of inet6_sk(sk).
92  * This avoids a dereference and allow compiler optimizations.
93  * It is a specialized version of inet6_sk_generic().
94  */
95 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
96 {
97         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
98
99         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
100 }
101
102 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
103 {
104         struct dst_entry *dst = skb_dst(skb);
105
106         if (dst && dst_hold_safe(dst)) {
107                 const struct rt6_info *rt = (const struct rt6_info *)dst;
108
109                 sk->sk_rx_dst = dst;
110                 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
111                 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
112         }
113 }
114
115 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
116 {
117         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
118                                 ipv6_hdr(skb)->saddr.s6_addr32,
119                                 tcp_hdr(skb)->dest,
120                                 tcp_hdr(skb)->source);
121 }
122
123 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
124 {
125         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
126                                    ipv6_hdr(skb)->saddr.s6_addr32);
127 }
128
129 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
130                               int addr_len)
131 {
132         /* This check is replicated from tcp_v6_connect() and intended to
133          * prevent BPF program called below from accessing bytes that are out
134          * of the bound specified by user in addr_len.
135          */
136         if (addr_len < SIN6_LEN_RFC2133)
137                 return -EINVAL;
138
139         sock_owned_by_me(sk);
140
141         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
142 }
143
144 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
145                           int addr_len)
146 {
147         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
148         struct inet_sock *inet = inet_sk(sk);
149         struct inet_connection_sock *icsk = inet_csk(sk);
150         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
151         struct tcp_sock *tp = tcp_sk(sk);
152         struct in6_addr *saddr = NULL, *final_p, final;
153         struct ipv6_txoptions *opt;
154         struct flowi6 fl6;
155         struct dst_entry *dst;
156         int addr_type;
157         int err;
158         struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
159
160         if (addr_len < SIN6_LEN_RFC2133)
161                 return -EINVAL;
162
163         if (usin->sin6_family != AF_INET6)
164                 return -EAFNOSUPPORT;
165
166         memset(&fl6, 0, sizeof(fl6));
167
168         if (np->sndflow) {
169                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
170                 IP6_ECN_flow_init(fl6.flowlabel);
171                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
172                         struct ip6_flowlabel *flowlabel;
173                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
174                         if (IS_ERR(flowlabel))
175                                 return -EINVAL;
176                         fl6_sock_release(flowlabel);
177                 }
178         }
179
180         /*
181          *      connect() to INADDR_ANY means loopback (BSD'ism).
182          */
183
184         if (ipv6_addr_any(&usin->sin6_addr)) {
185                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
186                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
187                                                &usin->sin6_addr);
188                 else
189                         usin->sin6_addr = in6addr_loopback;
190         }
191
192         addr_type = ipv6_addr_type(&usin->sin6_addr);
193
194         if (addr_type & IPV6_ADDR_MULTICAST)
195                 return -ENETUNREACH;
196
197         if (addr_type&IPV6_ADDR_LINKLOCAL) {
198                 if (addr_len >= sizeof(struct sockaddr_in6) &&
199                     usin->sin6_scope_id) {
200                         /* If interface is set while binding, indices
201                          * must coincide.
202                          */
203                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
204                                 return -EINVAL;
205
206                         sk->sk_bound_dev_if = usin->sin6_scope_id;
207                 }
208
209                 /* Connect to link-local address requires an interface */
210                 if (!sk->sk_bound_dev_if)
211                         return -EINVAL;
212         }
213
214         if (tp->rx_opt.ts_recent_stamp &&
215             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
216                 tp->rx_opt.ts_recent = 0;
217                 tp->rx_opt.ts_recent_stamp = 0;
218                 tp->write_seq = 0;
219         }
220
221         sk->sk_v6_daddr = usin->sin6_addr;
222         np->flow_label = fl6.flowlabel;
223
224         /*
225          *      TCP over IPv4
226          */
227
228         if (addr_type & IPV6_ADDR_MAPPED) {
229                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
230                 struct sockaddr_in sin;
231
232                 if (__ipv6_only_sock(sk))
233                         return -ENETUNREACH;
234
235                 sin.sin_family = AF_INET;
236                 sin.sin_port = usin->sin6_port;
237                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
238
239                 icsk->icsk_af_ops = &ipv6_mapped;
240                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
241 #ifdef CONFIG_TCP_MD5SIG
242                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
243 #endif
244
245                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
246
247                 if (err) {
248                         icsk->icsk_ext_hdr_len = exthdrlen;
249                         icsk->icsk_af_ops = &ipv6_specific;
250                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
251 #ifdef CONFIG_TCP_MD5SIG
252                         tp->af_specific = &tcp_sock_ipv6_specific;
253 #endif
254                         goto failure;
255                 }
256                 np->saddr = sk->sk_v6_rcv_saddr;
257
258                 return err;
259         }
260
261         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
262                 saddr = &sk->sk_v6_rcv_saddr;
263
264         fl6.flowi6_proto = IPPROTO_TCP;
265         fl6.daddr = sk->sk_v6_daddr;
266         fl6.saddr = saddr ? *saddr : np->saddr;
267         fl6.flowi6_oif = sk->sk_bound_dev_if;
268         fl6.flowi6_mark = sk->sk_mark;
269         fl6.fl6_dport = usin->sin6_port;
270         fl6.fl6_sport = inet->inet_sport;
271         fl6.flowi6_uid = sk->sk_uid;
272
273         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
274         final_p = fl6_update_dst(&fl6, opt, &final);
275
276         security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
277
278         dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
279         if (IS_ERR(dst)) {
280                 err = PTR_ERR(dst);
281                 goto failure;
282         }
283
284         if (!saddr) {
285                 saddr = &fl6.saddr;
286                 sk->sk_v6_rcv_saddr = *saddr;
287         }
288
289         /* set the source address */
290         np->saddr = *saddr;
291         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
292
293         sk->sk_gso_type = SKB_GSO_TCPV6;
294         ip6_dst_store(sk, dst, NULL, NULL);
295
296         icsk->icsk_ext_hdr_len = 0;
297         if (opt)
298                 icsk->icsk_ext_hdr_len = opt->opt_flen +
299                                          opt->opt_nflen;
300
301         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
302
303         inet->inet_dport = usin->sin6_port;
304
305         tcp_set_state(sk, TCP_SYN_SENT);
306         err = inet6_hash_connect(tcp_death_row, sk);
307         if (err)
308                 goto late_failure;
309
310         sk_set_txhash(sk);
311
312         if (likely(!tp->repair)) {
313                 if (!tp->write_seq)
314                         tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
315                                                          sk->sk_v6_daddr.s6_addr32,
316                                                          inet->inet_sport,
317                                                          inet->inet_dport);
318                 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
319                                                    np->saddr.s6_addr32,
320                                                    sk->sk_v6_daddr.s6_addr32);
321         }
322
323         if (tcp_fastopen_defer_connect(sk, &err))
324                 return err;
325         if (err)
326                 goto late_failure;
327
328         err = tcp_connect(sk);
329         if (err)
330                 goto late_failure;
331
332         return 0;
333
334 late_failure:
335         tcp_set_state(sk, TCP_CLOSE);
336 failure:
337         inet->inet_dport = 0;
338         sk->sk_route_caps = 0;
339         return err;
340 }
341
342 static void tcp_v6_mtu_reduced(struct sock *sk)
343 {
344         struct dst_entry *dst;
345
346         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
347                 return;
348
349         dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
350         if (!dst)
351                 return;
352
353         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
354                 tcp_sync_mss(sk, dst_mtu(dst));
355                 tcp_simple_retransmit(sk);
356         }
357 }
358
359 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
360                 u8 type, u8 code, int offset, __be32 info)
361 {
362         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
363         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
364         struct net *net = dev_net(skb->dev);
365         struct request_sock *fastopen;
366         struct ipv6_pinfo *np;
367         struct tcp_sock *tp;
368         __u32 seq, snd_una;
369         struct sock *sk;
370         bool fatal;
371         int err;
372
373         sk = __inet6_lookup_established(net, &tcp_hashinfo,
374                                         &hdr->daddr, th->dest,
375                                         &hdr->saddr, ntohs(th->source),
376                                         skb->dev->ifindex, inet6_sdif(skb));
377
378         if (!sk) {
379                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
380                                   ICMP6_MIB_INERRORS);
381                 return -ENOENT;
382         }
383
384         if (sk->sk_state == TCP_TIME_WAIT) {
385                 inet_twsk_put(inet_twsk(sk));
386                 return 0;
387         }
388         seq = ntohl(th->seq);
389         fatal = icmpv6_err_convert(type, code, &err);
390         if (sk->sk_state == TCP_NEW_SYN_RECV) {
391                 tcp_req_err(sk, seq, fatal);
392                 return 0;
393         }
394
395         bh_lock_sock(sk);
396         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
397                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
398
399         if (sk->sk_state == TCP_CLOSE)
400                 goto out;
401
402         if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
403                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
404                 goto out;
405         }
406
407         tp = tcp_sk(sk);
408         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
409         fastopen = rcu_dereference(tp->fastopen_rsk);
410         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
411         if (sk->sk_state != TCP_LISTEN &&
412             !between(seq, snd_una, tp->snd_nxt)) {
413                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
414                 goto out;
415         }
416
417         np = tcp_inet6_sk(sk);
418
419         if (type == NDISC_REDIRECT) {
420                 if (!sock_owned_by_user(sk)) {
421                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
422
423                         if (dst)
424                                 dst->ops->redirect(dst, sk, skb);
425                 }
426                 goto out;
427         }
428
429         if (type == ICMPV6_PKT_TOOBIG) {
430                 /* We are not interested in TCP_LISTEN and open_requests
431                  * (SYN-ACKs send out by Linux are always <576bytes so
432                  * they should go through unfragmented).
433                  */
434                 if (sk->sk_state == TCP_LISTEN)
435                         goto out;
436
437                 if (!ip6_sk_accept_pmtu(sk))
438                         goto out;
439
440                 tp->mtu_info = ntohl(info);
441                 if (!sock_owned_by_user(sk))
442                         tcp_v6_mtu_reduced(sk);
443                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
444                                            &sk->sk_tsq_flags))
445                         sock_hold(sk);
446                 goto out;
447         }
448
449
450         /* Might be for an request_sock */
451         switch (sk->sk_state) {
452         case TCP_SYN_SENT:
453         case TCP_SYN_RECV:
454                 /* Only in fast or simultaneous open. If a fast open socket is
455                  * is already accepted it is treated as a connected one below.
456                  */
457                 if (fastopen && !fastopen->sk)
458                         break;
459
460                 if (!sock_owned_by_user(sk)) {
461                         sk->sk_err = err;
462                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
463
464                         tcp_done(sk);
465                 } else
466                         sk->sk_err_soft = err;
467                 goto out;
468         }
469
470         if (!sock_owned_by_user(sk) && np->recverr) {
471                 sk->sk_err = err;
472                 sk->sk_error_report(sk);
473         } else
474                 sk->sk_err_soft = err;
475
476 out:
477         bh_unlock_sock(sk);
478         sock_put(sk);
479         return 0;
480 }
481
482
483 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
484                               struct flowi *fl,
485                               struct request_sock *req,
486                               struct tcp_fastopen_cookie *foc,
487                               enum tcp_synack_type synack_type)
488 {
489         struct inet_request_sock *ireq = inet_rsk(req);
490         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
491         struct ipv6_txoptions *opt;
492         struct flowi6 *fl6 = &fl->u.ip6;
493         struct sk_buff *skb;
494         int err = -ENOMEM;
495
496         /* First, grab a route. */
497         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
498                                                IPPROTO_TCP)) == NULL)
499                 goto done;
500
501         skb = tcp_make_synack(sk, dst, req, foc, synack_type);
502
503         if (skb) {
504                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
505                                     &ireq->ir_v6_rmt_addr);
506
507                 fl6->daddr = ireq->ir_v6_rmt_addr;
508                 if (np->repflow && ireq->pktopts)
509                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
510
511                 rcu_read_lock();
512                 opt = ireq->ipv6_opt;
513                 if (!opt)
514                         opt = rcu_dereference(np->opt);
515                 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass,
516                                sk->sk_priority);
517                 rcu_read_unlock();
518                 err = net_xmit_eval(err);
519         }
520
521 done:
522         return err;
523 }
524
525
526 static void tcp_v6_reqsk_destructor(struct request_sock *req)
527 {
528         kfree(inet_rsk(req)->ipv6_opt);
529         kfree_skb(inet_rsk(req)->pktopts);
530 }
531
532 #ifdef CONFIG_TCP_MD5SIG
533 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
534                                                    const struct in6_addr *addr)
535 {
536         return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
537 }
538
539 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
540                                                 const struct sock *addr_sk)
541 {
542         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
543 }
544
545 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
546                                  char __user *optval, int optlen)
547 {
548         struct tcp_md5sig cmd;
549         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
550         u8 prefixlen;
551
552         if (optlen < sizeof(cmd))
553                 return -EINVAL;
554
555         if (copy_from_user(&cmd, optval, sizeof(cmd)))
556                 return -EFAULT;
557
558         if (sin6->sin6_family != AF_INET6)
559                 return -EINVAL;
560
561         if (optname == TCP_MD5SIG_EXT &&
562             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
563                 prefixlen = cmd.tcpm_prefixlen;
564                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
565                                         prefixlen > 32))
566                         return -EINVAL;
567         } else {
568                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
569         }
570
571         if (!cmd.tcpm_keylen) {
572                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
573                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
574                                               AF_INET, prefixlen);
575                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
576                                       AF_INET6, prefixlen);
577         }
578
579         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
580                 return -EINVAL;
581
582         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
583                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
584                                       AF_INET, prefixlen, cmd.tcpm_key,
585                                       cmd.tcpm_keylen, GFP_KERNEL);
586
587         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
588                               AF_INET6, prefixlen, cmd.tcpm_key,
589                               cmd.tcpm_keylen, GFP_KERNEL);
590 }
591
592 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
593                                    const struct in6_addr *daddr,
594                                    const struct in6_addr *saddr,
595                                    const struct tcphdr *th, int nbytes)
596 {
597         struct tcp6_pseudohdr *bp;
598         struct scatterlist sg;
599         struct tcphdr *_th;
600
601         bp = hp->scratch;
602         /* 1. TCP pseudo-header (RFC2460) */
603         bp->saddr = *saddr;
604         bp->daddr = *daddr;
605         bp->protocol = cpu_to_be32(IPPROTO_TCP);
606         bp->len = cpu_to_be32(nbytes);
607
608         _th = (struct tcphdr *)(bp + 1);
609         memcpy(_th, th, sizeof(*th));
610         _th->check = 0;
611
612         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
613         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
614                                 sizeof(*bp) + sizeof(*th));
615         return crypto_ahash_update(hp->md5_req);
616 }
617
618 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
619                                const struct in6_addr *daddr, struct in6_addr *saddr,
620                                const struct tcphdr *th)
621 {
622         struct tcp_md5sig_pool *hp;
623         struct ahash_request *req;
624
625         hp = tcp_get_md5sig_pool();
626         if (!hp)
627                 goto clear_hash_noput;
628         req = hp->md5_req;
629
630         if (crypto_ahash_init(req))
631                 goto clear_hash;
632         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
633                 goto clear_hash;
634         if (tcp_md5_hash_key(hp, key))
635                 goto clear_hash;
636         ahash_request_set_crypt(req, NULL, md5_hash, 0);
637         if (crypto_ahash_final(req))
638                 goto clear_hash;
639
640         tcp_put_md5sig_pool();
641         return 0;
642
643 clear_hash:
644         tcp_put_md5sig_pool();
645 clear_hash_noput:
646         memset(md5_hash, 0, 16);
647         return 1;
648 }
649
650 static int tcp_v6_md5_hash_skb(char *md5_hash,
651                                const struct tcp_md5sig_key *key,
652                                const struct sock *sk,
653                                const struct sk_buff *skb)
654 {
655         const struct in6_addr *saddr, *daddr;
656         struct tcp_md5sig_pool *hp;
657         struct ahash_request *req;
658         const struct tcphdr *th = tcp_hdr(skb);
659
660         if (sk) { /* valid for establish/request sockets */
661                 saddr = &sk->sk_v6_rcv_saddr;
662                 daddr = &sk->sk_v6_daddr;
663         } else {
664                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
665                 saddr = &ip6h->saddr;
666                 daddr = &ip6h->daddr;
667         }
668
669         hp = tcp_get_md5sig_pool();
670         if (!hp)
671                 goto clear_hash_noput;
672         req = hp->md5_req;
673
674         if (crypto_ahash_init(req))
675                 goto clear_hash;
676
677         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
678                 goto clear_hash;
679         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
680                 goto clear_hash;
681         if (tcp_md5_hash_key(hp, key))
682                 goto clear_hash;
683         ahash_request_set_crypt(req, NULL, md5_hash, 0);
684         if (crypto_ahash_final(req))
685                 goto clear_hash;
686
687         tcp_put_md5sig_pool();
688         return 0;
689
690 clear_hash:
691         tcp_put_md5sig_pool();
692 clear_hash_noput:
693         memset(md5_hash, 0, 16);
694         return 1;
695 }
696
697 #endif
698
699 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
700                                     const struct sk_buff *skb)
701 {
702 #ifdef CONFIG_TCP_MD5SIG
703         const __u8 *hash_location = NULL;
704         struct tcp_md5sig_key *hash_expected;
705         const struct ipv6hdr *ip6h = ipv6_hdr(skb);
706         const struct tcphdr *th = tcp_hdr(skb);
707         int genhash;
708         u8 newhash[16];
709
710         hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
711         hash_location = tcp_parse_md5sig_option(th);
712
713         /* We've parsed the options - do we have a hash? */
714         if (!hash_expected && !hash_location)
715                 return false;
716
717         if (hash_expected && !hash_location) {
718                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
719                 return true;
720         }
721
722         if (!hash_expected && hash_location) {
723                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
724                 return true;
725         }
726
727         /* check the signature */
728         genhash = tcp_v6_md5_hash_skb(newhash,
729                                       hash_expected,
730                                       NULL, skb);
731
732         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
733                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
734                 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
735                                      genhash ? "failed" : "mismatch",
736                                      &ip6h->saddr, ntohs(th->source),
737                                      &ip6h->daddr, ntohs(th->dest));
738                 return true;
739         }
740 #endif
741         return false;
742 }
743
744 static void tcp_v6_init_req(struct request_sock *req,
745                             const struct sock *sk_listener,
746                             struct sk_buff *skb)
747 {
748         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
749         struct inet_request_sock *ireq = inet_rsk(req);
750         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
751
752         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
753         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
754
755         /* So that link locals have meaning */
756         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
757             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
758                 ireq->ir_iif = tcp_v6_iif(skb);
759
760         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
761             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
762              np->rxopt.bits.rxinfo ||
763              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
764              np->rxopt.bits.rxohlim || np->repflow)) {
765                 refcount_inc(&skb->users);
766                 ireq->pktopts = skb;
767         }
768 }
769
770 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
771                                           struct flowi *fl,
772                                           const struct request_sock *req)
773 {
774         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
775 }
776
777 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
778         .family         =       AF_INET6,
779         .obj_size       =       sizeof(struct tcp6_request_sock),
780         .rtx_syn_ack    =       tcp_rtx_synack,
781         .send_ack       =       tcp_v6_reqsk_send_ack,
782         .destructor     =       tcp_v6_reqsk_destructor,
783         .send_reset     =       tcp_v6_send_reset,
784         .syn_ack_timeout =      tcp_syn_ack_timeout,
785 };
786
787 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
788         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
789                                 sizeof(struct ipv6hdr),
790 #ifdef CONFIG_TCP_MD5SIG
791         .req_md5_lookup =       tcp_v6_md5_lookup,
792         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
793 #endif
794         .init_req       =       tcp_v6_init_req,
795 #ifdef CONFIG_SYN_COOKIES
796         .cookie_init_seq =      cookie_v6_init_sequence,
797 #endif
798         .route_req      =       tcp_v6_route_req,
799         .init_seq       =       tcp_v6_init_seq,
800         .init_ts_off    =       tcp_v6_init_ts_off,
801         .send_synack    =       tcp_v6_send_synack,
802 };
803
804 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
805                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
806                                  int oif, struct tcp_md5sig_key *key, int rst,
807                                  u8 tclass, __be32 label, u32 priority)
808 {
809         const struct tcphdr *th = tcp_hdr(skb);
810         struct tcphdr *t1;
811         struct sk_buff *buff;
812         struct flowi6 fl6;
813         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
814         struct sock *ctl_sk = net->ipv6.tcp_sk;
815         unsigned int tot_len = sizeof(struct tcphdr);
816         struct dst_entry *dst;
817         __be32 *topt;
818         __u32 mark = 0;
819
820         if (tsecr)
821                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
822 #ifdef CONFIG_TCP_MD5SIG
823         if (key)
824                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
825 #endif
826
827         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
828                          GFP_ATOMIC);
829         if (!buff)
830                 return;
831
832         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
833
834         t1 = skb_push(buff, tot_len);
835         skb_reset_transport_header(buff);
836
837         /* Swap the send and the receive. */
838         memset(t1, 0, sizeof(*t1));
839         t1->dest = th->source;
840         t1->source = th->dest;
841         t1->doff = tot_len / 4;
842         t1->seq = htonl(seq);
843         t1->ack_seq = htonl(ack);
844         t1->ack = !rst || !th->ack;
845         t1->rst = rst;
846         t1->window = htons(win);
847
848         topt = (__be32 *)(t1 + 1);
849
850         if (tsecr) {
851                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
852                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
853                 *topt++ = htonl(tsval);
854                 *topt++ = htonl(tsecr);
855         }
856
857 #ifdef CONFIG_TCP_MD5SIG
858         if (key) {
859                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
860                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
861                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
862                                     &ipv6_hdr(skb)->saddr,
863                                     &ipv6_hdr(skb)->daddr, t1);
864         }
865 #endif
866
867         memset(&fl6, 0, sizeof(fl6));
868         fl6.daddr = ipv6_hdr(skb)->saddr;
869         fl6.saddr = ipv6_hdr(skb)->daddr;
870         fl6.flowlabel = label;
871
872         buff->ip_summed = CHECKSUM_PARTIAL;
873         buff->csum = 0;
874
875         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
876
877         fl6.flowi6_proto = IPPROTO_TCP;
878         if (rt6_need_strict(&fl6.daddr) && !oif)
879                 fl6.flowi6_oif = tcp_v6_iif(skb);
880         else {
881                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
882                         oif = skb->skb_iif;
883
884                 fl6.flowi6_oif = oif;
885         }
886
887         if (sk) {
888                 if (sk->sk_state == TCP_TIME_WAIT) {
889                         mark = inet_twsk(sk)->tw_mark;
890                         /* autoflowlabel relies on buff->hash */
891                         skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
892                                      PKT_HASH_TYPE_L4);
893                 } else {
894                         mark = sk->sk_mark;
895                 }
896                 buff->tstamp = tcp_transmit_time(sk);
897         }
898         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
899         fl6.fl6_dport = t1->dest;
900         fl6.fl6_sport = t1->source;
901         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
902         security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
903
904         /* Pass a socket to ip6_dst_lookup either it is for RST
905          * Underlying function will use this to retrieve the network
906          * namespace
907          */
908         dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
909         if (!IS_ERR(dst)) {
910                 skb_dst_set(buff, dst);
911                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
912                          priority);
913                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
914                 if (rst)
915                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
916                 return;
917         }
918
919         kfree_skb(buff);
920 }
921
922 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
923 {
924         const struct tcphdr *th = tcp_hdr(skb);
925         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
926         u32 seq = 0, ack_seq = 0;
927         struct tcp_md5sig_key *key = NULL;
928 #ifdef CONFIG_TCP_MD5SIG
929         const __u8 *hash_location = NULL;
930         unsigned char newhash[16];
931         int genhash;
932         struct sock *sk1 = NULL;
933 #endif
934         __be32 label = 0;
935         u32 priority = 0;
936         struct net *net;
937         int oif = 0;
938
939         if (th->rst)
940                 return;
941
942         /* If sk not NULL, it means we did a successful lookup and incoming
943          * route had to be correct. prequeue might have dropped our dst.
944          */
945         if (!sk && !ipv6_unicast_destination(skb))
946                 return;
947
948         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
949 #ifdef CONFIG_TCP_MD5SIG
950         rcu_read_lock();
951         hash_location = tcp_parse_md5sig_option(th);
952         if (sk && sk_fullsock(sk)) {
953                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
954         } else if (hash_location) {
955                 /*
956                  * active side is lost. Try to find listening socket through
957                  * source port, and then find md5 key through listening socket.
958                  * we are not loose security here:
959                  * Incoming packet is checked with md5 hash with finding key,
960                  * no RST generated if md5 hash doesn't match.
961                  */
962                 sk1 = inet6_lookup_listener(net,
963                                            &tcp_hashinfo, NULL, 0,
964                                            &ipv6h->saddr,
965                                            th->source, &ipv6h->daddr,
966                                            ntohs(th->source),
967                                            tcp_v6_iif_l3_slave(skb),
968                                            tcp_v6_sdif(skb));
969                 if (!sk1)
970                         goto out;
971
972                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
973                 if (!key)
974                         goto out;
975
976                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
977                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
978                         goto out;
979         }
980 #endif
981
982         if (th->ack)
983                 seq = ntohl(th->ack_seq);
984         else
985                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
986                           (th->doff << 2);
987
988         if (sk) {
989                 oif = sk->sk_bound_dev_if;
990                 if (sk_fullsock(sk)) {
991                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
992
993                         trace_tcp_send_reset(sk, skb);
994                         if (np->repflow)
995                                 label = ip6_flowlabel(ipv6h);
996                         priority = sk->sk_priority;
997                 }
998                 if (sk->sk_state == TCP_TIME_WAIT) {
999                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1000                         priority = inet_twsk(sk)->tw_priority;
1001                 }
1002         } else {
1003                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1004                         label = ip6_flowlabel(ipv6h);
1005         }
1006
1007         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
1008                              label, priority);
1009
1010 #ifdef CONFIG_TCP_MD5SIG
1011 out:
1012         rcu_read_unlock();
1013 #endif
1014 }
1015
1016 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1017                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1018                             struct tcp_md5sig_key *key, u8 tclass,
1019                             __be32 label, u32 priority)
1020 {
1021         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1022                              tclass, label, priority);
1023 }
1024
1025 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1026 {
1027         struct inet_timewait_sock *tw = inet_twsk(sk);
1028         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1029
1030         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1031                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1032                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1033                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1034                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1035
1036         inet_twsk_put(tw);
1037 }
1038
1039 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1040                                   struct request_sock *req)
1041 {
1042         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1043          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1044          */
1045         /* RFC 7323 2.3
1046          * The window field (SEG.WND) of every outgoing segment, with the
1047          * exception of <SYN> segments, MUST be right-shifted by
1048          * Rcv.Wind.Shift bits:
1049          */
1050         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1051                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1052                         tcp_rsk(req)->rcv_nxt,
1053                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1054                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1055                         req->ts_recent, sk->sk_bound_dev_if,
1056                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1057                         0, 0, sk->sk_priority);
1058 }
1059
1060
1061 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1062 {
1063 #ifdef CONFIG_SYN_COOKIES
1064         const struct tcphdr *th = tcp_hdr(skb);
1065
1066         if (!th->syn)
1067                 sk = cookie_v6_check(sk, skb);
1068 #endif
1069         return sk;
1070 }
1071
1072 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1073                          struct tcphdr *th, u32 *cookie)
1074 {
1075         u16 mss = 0;
1076 #ifdef CONFIG_SYN_COOKIES
1077         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1078                                     &tcp_request_sock_ipv6_ops, sk, th);
1079         if (mss) {
1080                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1081                 tcp_synq_overflow(sk);
1082         }
1083 #endif
1084         return mss;
1085 }
1086
1087 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1088 {
1089         if (skb->protocol == htons(ETH_P_IP))
1090                 return tcp_v4_conn_request(sk, skb);
1091
1092         if (!ipv6_unicast_destination(skb))
1093                 goto drop;
1094
1095         return tcp_conn_request(&tcp6_request_sock_ops,
1096                                 &tcp_request_sock_ipv6_ops, sk, skb);
1097
1098 drop:
1099         tcp_listendrop(sk);
1100         return 0; /* don't send reset */
1101 }
1102
1103 static void tcp_v6_restore_cb(struct sk_buff *skb)
1104 {
1105         /* We need to move header back to the beginning if xfrm6_policy_check()
1106          * and tcp_v6_fill_cb() are going to be called again.
1107          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1108          */
1109         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1110                 sizeof(struct inet6_skb_parm));
1111 }
1112
1113 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1114                                          struct request_sock *req,
1115                                          struct dst_entry *dst,
1116                                          struct request_sock *req_unhash,
1117                                          bool *own_req)
1118 {
1119         struct inet_request_sock *ireq;
1120         struct ipv6_pinfo *newnp;
1121         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1122         struct ipv6_txoptions *opt;
1123         struct inet_sock *newinet;
1124         struct tcp_sock *newtp;
1125         struct sock *newsk;
1126 #ifdef CONFIG_TCP_MD5SIG
1127         struct tcp_md5sig_key *key;
1128 #endif
1129         struct flowi6 fl6;
1130
1131         if (skb->protocol == htons(ETH_P_IP)) {
1132                 /*
1133                  *      v6 mapped
1134                  */
1135
1136                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1137                                              req_unhash, own_req);
1138
1139                 if (!newsk)
1140                         return NULL;
1141
1142                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1143
1144                 newinet = inet_sk(newsk);
1145                 newnp = tcp_inet6_sk(newsk);
1146                 newtp = tcp_sk(newsk);
1147
1148                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1149
1150                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1151
1152                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1153                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1154 #ifdef CONFIG_TCP_MD5SIG
1155                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1156 #endif
1157
1158                 newnp->ipv6_mc_list = NULL;
1159                 newnp->ipv6_ac_list = NULL;
1160                 newnp->ipv6_fl_list = NULL;
1161                 newnp->pktoptions  = NULL;
1162                 newnp->opt         = NULL;
1163                 newnp->mcast_oif   = inet_iif(skb);
1164                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1165                 newnp->rcv_flowinfo = 0;
1166                 if (np->repflow)
1167                         newnp->flow_label = 0;
1168
1169                 /*
1170                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1171                  * here, tcp_create_openreq_child now does this for us, see the comment in
1172                  * that function for the gory details. -acme
1173                  */
1174
1175                 /* It is tricky place. Until this moment IPv4 tcp
1176                    worked with IPv6 icsk.icsk_af_ops.
1177                    Sync it now.
1178                  */
1179                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1180
1181                 return newsk;
1182         }
1183
1184         ireq = inet_rsk(req);
1185
1186         if (sk_acceptq_is_full(sk))
1187                 goto out_overflow;
1188
1189         if (!dst) {
1190                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1191                 if (!dst)
1192                         goto out;
1193         }
1194
1195         newsk = tcp_create_openreq_child(sk, req, skb);
1196         if (!newsk)
1197                 goto out_nonewsk;
1198
1199         /*
1200          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1201          * count here, tcp_create_openreq_child now does this for us, see the
1202          * comment in that function for the gory details. -acme
1203          */
1204
1205         newsk->sk_gso_type = SKB_GSO_TCPV6;
1206         ip6_dst_store(newsk, dst, NULL, NULL);
1207         inet6_sk_rx_dst_set(newsk, skb);
1208
1209         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1210
1211         newtp = tcp_sk(newsk);
1212         newinet = inet_sk(newsk);
1213         newnp = tcp_inet6_sk(newsk);
1214
1215         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1216
1217         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1218         newnp->saddr = ireq->ir_v6_loc_addr;
1219         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1220         newsk->sk_bound_dev_if = ireq->ir_iif;
1221
1222         /* Now IPv6 options...
1223
1224            First: no IPv4 options.
1225          */
1226         newinet->inet_opt = NULL;
1227         newnp->ipv6_mc_list = NULL;
1228         newnp->ipv6_ac_list = NULL;
1229         newnp->ipv6_fl_list = NULL;
1230
1231         /* Clone RX bits */
1232         newnp->rxopt.all = np->rxopt.all;
1233
1234         newnp->pktoptions = NULL;
1235         newnp->opt        = NULL;
1236         newnp->mcast_oif  = tcp_v6_iif(skb);
1237         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1238         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1239         if (np->repflow)
1240                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1241
1242         /* Clone native IPv6 options from listening socket (if any)
1243
1244            Yes, keeping reference count would be much more clever,
1245            but we make one more one thing there: reattach optmem
1246            to newsk.
1247          */
1248         opt = ireq->ipv6_opt;
1249         if (!opt)
1250                 opt = rcu_dereference(np->opt);
1251         if (opt) {
1252                 opt = ipv6_dup_options(newsk, opt);
1253                 RCU_INIT_POINTER(newnp->opt, opt);
1254         }
1255         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1256         if (opt)
1257                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1258                                                     opt->opt_flen;
1259
1260         tcp_ca_openreq_child(newsk, dst);
1261
1262         tcp_sync_mss(newsk, dst_mtu(dst));
1263         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1264
1265         tcp_initialize_rcv_mss(newsk);
1266
1267         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1268         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1269
1270 #ifdef CONFIG_TCP_MD5SIG
1271         /* Copy over the MD5 key from the original socket */
1272         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1273         if (key) {
1274                 /* We're using one, so create a matching key
1275                  * on the newsk structure. If we fail to get
1276                  * memory, then we end up not copying the key
1277                  * across. Shucks.
1278                  */
1279                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1280                                AF_INET6, 128, key->key, key->keylen,
1281                                sk_gfp_mask(sk, GFP_ATOMIC));
1282         }
1283 #endif
1284
1285         if (__inet_inherit_port(sk, newsk) < 0) {
1286                 inet_csk_prepare_forced_close(newsk);
1287                 tcp_done(newsk);
1288                 goto out;
1289         }
1290         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1291         if (*own_req) {
1292                 tcp_move_syn(newtp, req);
1293
1294                 /* Clone pktoptions received with SYN, if we own the req */
1295                 if (ireq->pktopts) {
1296                         newnp->pktoptions = skb_clone(ireq->pktopts,
1297                                                       sk_gfp_mask(sk, GFP_ATOMIC));
1298                         consume_skb(ireq->pktopts);
1299                         ireq->pktopts = NULL;
1300                         if (newnp->pktoptions) {
1301                                 tcp_v6_restore_cb(newnp->pktoptions);
1302                                 skb_set_owner_r(newnp->pktoptions, newsk);
1303                         }
1304                 }
1305         }
1306
1307         return newsk;
1308
1309 out_overflow:
1310         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1311 out_nonewsk:
1312         dst_release(dst);
1313 out:
1314         tcp_listendrop(sk);
1315         return NULL;
1316 }
1317
1318 /* The socket must have it's spinlock held when we get
1319  * here, unless it is a TCP_LISTEN socket.
1320  *
1321  * We have a potential double-lock case here, so even when
1322  * doing backlog processing we use the BH locking scheme.
1323  * This is because we cannot sleep with the original spinlock
1324  * held.
1325  */
1326 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1327 {
1328         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1329         struct sk_buff *opt_skb = NULL;
1330         struct tcp_sock *tp;
1331
1332         /* Imagine: socket is IPv6. IPv4 packet arrives,
1333            goes to IPv4 receive handler and backlogged.
1334            From backlog it always goes here. Kerboom...
1335            Fortunately, tcp_rcv_established and rcv_established
1336            handle them correctly, but it is not case with
1337            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1338          */
1339
1340         if (skb->protocol == htons(ETH_P_IP))
1341                 return tcp_v4_do_rcv(sk, skb);
1342
1343         /*
1344          *      socket locking is here for SMP purposes as backlog rcv
1345          *      is currently called with bh processing disabled.
1346          */
1347
1348         /* Do Stevens' IPV6_PKTOPTIONS.
1349
1350            Yes, guys, it is the only place in our code, where we
1351            may make it not affecting IPv4.
1352            The rest of code is protocol independent,
1353            and I do not like idea to uglify IPv4.
1354
1355            Actually, all the idea behind IPV6_PKTOPTIONS
1356            looks not very well thought. For now we latch
1357            options, received in the last packet, enqueued
1358            by tcp. Feel free to propose better solution.
1359                                                --ANK (980728)
1360          */
1361         if (np->rxopt.all)
1362                 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1363
1364         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1365                 struct dst_entry *dst = sk->sk_rx_dst;
1366
1367                 sock_rps_save_rxhash(sk, skb);
1368                 sk_mark_napi_id(sk, skb);
1369                 if (dst) {
1370                         if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1371                             dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1372                                 dst_release(dst);
1373                                 sk->sk_rx_dst = NULL;
1374                         }
1375                 }
1376
1377                 tcp_rcv_established(sk, skb);
1378                 if (opt_skb)
1379                         goto ipv6_pktoptions;
1380                 return 0;
1381         }
1382
1383         if (tcp_checksum_complete(skb))
1384                 goto csum_err;
1385
1386         if (sk->sk_state == TCP_LISTEN) {
1387                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1388
1389                 if (!nsk)
1390                         goto discard;
1391
1392                 if (nsk != sk) {
1393                         if (tcp_child_process(sk, nsk, skb))
1394                                 goto reset;
1395                         if (opt_skb)
1396                                 __kfree_skb(opt_skb);
1397                         return 0;
1398                 }
1399         } else
1400                 sock_rps_save_rxhash(sk, skb);
1401
1402         if (tcp_rcv_state_process(sk, skb))
1403                 goto reset;
1404         if (opt_skb)
1405                 goto ipv6_pktoptions;
1406         return 0;
1407
1408 reset:
1409         tcp_v6_send_reset(sk, skb);
1410 discard:
1411         if (opt_skb)
1412                 __kfree_skb(opt_skb);
1413         kfree_skb(skb);
1414         return 0;
1415 csum_err:
1416         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1417         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1418         goto discard;
1419
1420
1421 ipv6_pktoptions:
1422         /* Do you ask, what is it?
1423
1424            1. skb was enqueued by tcp.
1425            2. skb is added to tail of read queue, rather than out of order.
1426            3. socket is not in passive state.
1427            4. Finally, it really contains options, which user wants to receive.
1428          */
1429         tp = tcp_sk(sk);
1430         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1431             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1432                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1433                         np->mcast_oif = tcp_v6_iif(opt_skb);
1434                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1435                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1436                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1437                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1438                 if (np->repflow)
1439                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1440                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1441                         skb_set_owner_r(opt_skb, sk);
1442                         tcp_v6_restore_cb(opt_skb);
1443                         opt_skb = xchg(&np->pktoptions, opt_skb);
1444                 } else {
1445                         __kfree_skb(opt_skb);
1446                         opt_skb = xchg(&np->pktoptions, NULL);
1447                 }
1448         }
1449
1450         kfree_skb(opt_skb);
1451         return 0;
1452 }
1453
1454 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1455                            const struct tcphdr *th)
1456 {
1457         /* This is tricky: we move IP6CB at its correct location into
1458          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1459          * _decode_session6() uses IP6CB().
1460          * barrier() makes sure compiler won't play aliasing games.
1461          */
1462         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1463                 sizeof(struct inet6_skb_parm));
1464         barrier();
1465
1466         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1467         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1468                                     skb->len - th->doff*4);
1469         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1470         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1471         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1472         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1473         TCP_SKB_CB(skb)->sacked = 0;
1474         TCP_SKB_CB(skb)->has_rxtstamp =
1475                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1476 }
1477
1478 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1479 {
1480         struct sk_buff *skb_to_free;
1481         int sdif = inet6_sdif(skb);
1482         const struct tcphdr *th;
1483         const struct ipv6hdr *hdr;
1484         bool refcounted;
1485         struct sock *sk;
1486         int ret;
1487         struct net *net = dev_net(skb->dev);
1488
1489         if (skb->pkt_type != PACKET_HOST)
1490                 goto discard_it;
1491
1492         /*
1493          *      Count it even if it's bad.
1494          */
1495         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1496
1497         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1498                 goto discard_it;
1499
1500         th = (const struct tcphdr *)skb->data;
1501
1502         if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1503                 goto bad_packet;
1504         if (!pskb_may_pull(skb, th->doff*4))
1505                 goto discard_it;
1506
1507         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1508                 goto csum_error;
1509
1510         th = (const struct tcphdr *)skb->data;
1511         hdr = ipv6_hdr(skb);
1512
1513 lookup:
1514         sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1515                                 th->source, th->dest, inet6_iif(skb), sdif,
1516                                 &refcounted);
1517         if (!sk)
1518                 goto no_tcp_socket;
1519
1520 process:
1521         if (sk->sk_state == TCP_TIME_WAIT)
1522                 goto do_time_wait;
1523
1524         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1525                 struct request_sock *req = inet_reqsk(sk);
1526                 bool req_stolen = false;
1527                 struct sock *nsk;
1528
1529                 sk = req->rsk_listener;
1530                 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1531                         sk_drops_add(sk, skb);
1532                         reqsk_put(req);
1533                         goto discard_it;
1534                 }
1535                 if (tcp_checksum_complete(skb)) {
1536                         reqsk_put(req);
1537                         goto csum_error;
1538                 }
1539                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1540                         inet_csk_reqsk_queue_drop_and_put(sk, req);
1541                         goto lookup;
1542                 }
1543                 sock_hold(sk);
1544                 refcounted = true;
1545                 nsk = NULL;
1546                 if (!tcp_filter(sk, skb)) {
1547                         th = (const struct tcphdr *)skb->data;
1548                         hdr = ipv6_hdr(skb);
1549                         tcp_v6_fill_cb(skb, hdr, th);
1550                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1551                 }
1552                 if (!nsk) {
1553                         reqsk_put(req);
1554                         if (req_stolen) {
1555                                 /* Another cpu got exclusive access to req
1556                                  * and created a full blown socket.
1557                                  * Try to feed this packet to this socket
1558                                  * instead of discarding it.
1559                                  */
1560                                 tcp_v6_restore_cb(skb);
1561                                 sock_put(sk);
1562                                 goto lookup;
1563                         }
1564                         goto discard_and_relse;
1565                 }
1566                 if (nsk == sk) {
1567                         reqsk_put(req);
1568                         tcp_v6_restore_cb(skb);
1569                 } else if (tcp_child_process(sk, nsk, skb)) {
1570                         tcp_v6_send_reset(nsk, skb);
1571                         goto discard_and_relse;
1572                 } else {
1573                         sock_put(sk);
1574                         return 0;
1575                 }
1576         }
1577         if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1578                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1579                 goto discard_and_relse;
1580         }
1581
1582         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1583                 goto discard_and_relse;
1584
1585         if (tcp_v6_inbound_md5_hash(sk, skb))
1586                 goto discard_and_relse;
1587
1588         if (tcp_filter(sk, skb))
1589                 goto discard_and_relse;
1590         th = (const struct tcphdr *)skb->data;
1591         hdr = ipv6_hdr(skb);
1592         tcp_v6_fill_cb(skb, hdr, th);
1593
1594         skb->dev = NULL;
1595
1596         if (sk->sk_state == TCP_LISTEN) {
1597                 ret = tcp_v6_do_rcv(sk, skb);
1598                 goto put_and_return;
1599         }
1600
1601         sk_incoming_cpu_update(sk);
1602
1603         bh_lock_sock_nested(sk);
1604         tcp_segs_in(tcp_sk(sk), skb);
1605         ret = 0;
1606         if (!sock_owned_by_user(sk)) {
1607                 skb_to_free = sk->sk_rx_skb_cache;
1608                 sk->sk_rx_skb_cache = NULL;
1609                 ret = tcp_v6_do_rcv(sk, skb);
1610         } else {
1611                 if (tcp_add_backlog(sk, skb))
1612                         goto discard_and_relse;
1613                 skb_to_free = NULL;
1614         }
1615         bh_unlock_sock(sk);
1616         if (skb_to_free)
1617                 __kfree_skb(skb_to_free);
1618 put_and_return:
1619         if (refcounted)
1620                 sock_put(sk);
1621         return ret ? -1 : 0;
1622
1623 no_tcp_socket:
1624         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1625                 goto discard_it;
1626
1627         tcp_v6_fill_cb(skb, hdr, th);
1628
1629         if (tcp_checksum_complete(skb)) {
1630 csum_error:
1631                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1632 bad_packet:
1633                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1634         } else {
1635                 tcp_v6_send_reset(NULL, skb);
1636         }
1637
1638 discard_it:
1639         kfree_skb(skb);
1640         return 0;
1641
1642 discard_and_relse:
1643         sk_drops_add(sk, skb);
1644         if (refcounted)
1645                 sock_put(sk);
1646         goto discard_it;
1647
1648 do_time_wait:
1649         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1650                 inet_twsk_put(inet_twsk(sk));
1651                 goto discard_it;
1652         }
1653
1654         tcp_v6_fill_cb(skb, hdr, th);
1655
1656         if (tcp_checksum_complete(skb)) {
1657                 inet_twsk_put(inet_twsk(sk));
1658                 goto csum_error;
1659         }
1660
1661         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1662         case TCP_TW_SYN:
1663         {
1664                 struct sock *sk2;
1665
1666                 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1667                                             skb, __tcp_hdrlen(th),
1668                                             &ipv6_hdr(skb)->saddr, th->source,
1669                                             &ipv6_hdr(skb)->daddr,
1670                                             ntohs(th->dest),
1671                                             tcp_v6_iif_l3_slave(skb),
1672                                             sdif);
1673                 if (sk2) {
1674                         struct inet_timewait_sock *tw = inet_twsk(sk);
1675                         inet_twsk_deschedule_put(tw);
1676                         sk = sk2;
1677                         tcp_v6_restore_cb(skb);
1678                         refcounted = false;
1679                         goto process;
1680                 }
1681         }
1682                 /* to ACK */
1683                 /* fall through */
1684         case TCP_TW_ACK:
1685                 tcp_v6_timewait_ack(sk, skb);
1686                 break;
1687         case TCP_TW_RST:
1688                 tcp_v6_send_reset(sk, skb);
1689                 inet_twsk_deschedule_put(inet_twsk(sk));
1690                 goto discard_it;
1691         case TCP_TW_SUCCESS:
1692                 ;
1693         }
1694         goto discard_it;
1695 }
1696
1697 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1698 {
1699         const struct ipv6hdr *hdr;
1700         const struct tcphdr *th;
1701         struct sock *sk;
1702
1703         if (skb->pkt_type != PACKET_HOST)
1704                 return;
1705
1706         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1707                 return;
1708
1709         hdr = ipv6_hdr(skb);
1710         th = tcp_hdr(skb);
1711
1712         if (th->doff < sizeof(struct tcphdr) / 4)
1713                 return;
1714
1715         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1716         sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1717                                         &hdr->saddr, th->source,
1718                                         &hdr->daddr, ntohs(th->dest),
1719                                         inet6_iif(skb), inet6_sdif(skb));
1720         if (sk) {
1721                 skb->sk = sk;
1722                 skb->destructor = sock_edemux;
1723                 if (sk_fullsock(sk)) {
1724                         struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1725
1726                         if (dst)
1727                                 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1728                         if (dst &&
1729                             inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1730                                 skb_dst_set_noref(skb, dst);
1731                 }
1732         }
1733 }
1734
1735 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1736         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1737         .twsk_unique    = tcp_twsk_unique,
1738         .twsk_destructor = tcp_twsk_destructor,
1739 };
1740
1741 static const struct inet_connection_sock_af_ops ipv6_specific = {
1742         .queue_xmit        = inet6_csk_xmit,
1743         .send_check        = tcp_v6_send_check,
1744         .rebuild_header    = inet6_sk_rebuild_header,
1745         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1746         .conn_request      = tcp_v6_conn_request,
1747         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1748         .net_header_len    = sizeof(struct ipv6hdr),
1749         .net_frag_header_len = sizeof(struct frag_hdr),
1750         .setsockopt        = ipv6_setsockopt,
1751         .getsockopt        = ipv6_getsockopt,
1752         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1753         .sockaddr_len      = sizeof(struct sockaddr_in6),
1754 #ifdef CONFIG_COMPAT
1755         .compat_setsockopt = compat_ipv6_setsockopt,
1756         .compat_getsockopt = compat_ipv6_getsockopt,
1757 #endif
1758         .mtu_reduced       = tcp_v6_mtu_reduced,
1759 };
1760
1761 #ifdef CONFIG_TCP_MD5SIG
1762 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1763         .md5_lookup     =       tcp_v6_md5_lookup,
1764         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1765         .md5_parse      =       tcp_v6_parse_md5_keys,
1766 };
1767 #endif
1768
1769 /*
1770  *      TCP over IPv4 via INET6 API
1771  */
1772 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1773         .queue_xmit        = ip_queue_xmit,
1774         .send_check        = tcp_v4_send_check,
1775         .rebuild_header    = inet_sk_rebuild_header,
1776         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1777         .conn_request      = tcp_v6_conn_request,
1778         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1779         .net_header_len    = sizeof(struct iphdr),
1780         .setsockopt        = ipv6_setsockopt,
1781         .getsockopt        = ipv6_getsockopt,
1782         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1783         .sockaddr_len      = sizeof(struct sockaddr_in6),
1784 #ifdef CONFIG_COMPAT
1785         .compat_setsockopt = compat_ipv6_setsockopt,
1786         .compat_getsockopt = compat_ipv6_getsockopt,
1787 #endif
1788         .mtu_reduced       = tcp_v4_mtu_reduced,
1789 };
1790
1791 #ifdef CONFIG_TCP_MD5SIG
1792 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1793         .md5_lookup     =       tcp_v4_md5_lookup,
1794         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1795         .md5_parse      =       tcp_v6_parse_md5_keys,
1796 };
1797 #endif
1798
1799 /* NOTE: A lot of things set to zero explicitly by call to
1800  *       sk_alloc() so need not be done here.
1801  */
1802 static int tcp_v6_init_sock(struct sock *sk)
1803 {
1804         struct inet_connection_sock *icsk = inet_csk(sk);
1805
1806         tcp_init_sock(sk);
1807
1808         icsk->icsk_af_ops = &ipv6_specific;
1809
1810 #ifdef CONFIG_TCP_MD5SIG
1811         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1812 #endif
1813
1814         return 0;
1815 }
1816
1817 static void tcp_v6_destroy_sock(struct sock *sk)
1818 {
1819         tcp_v4_destroy_sock(sk);
1820         inet6_destroy_sock(sk);
1821 }
1822
1823 #ifdef CONFIG_PROC_FS
1824 /* Proc filesystem TCPv6 sock list dumping. */
1825 static void get_openreq6(struct seq_file *seq,
1826                          const struct request_sock *req, int i)
1827 {
1828         long ttd = req->rsk_timer.expires - jiffies;
1829         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1830         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1831
1832         if (ttd < 0)
1833                 ttd = 0;
1834
1835         seq_printf(seq,
1836                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1837                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1838                    i,
1839                    src->s6_addr32[0], src->s6_addr32[1],
1840                    src->s6_addr32[2], src->s6_addr32[3],
1841                    inet_rsk(req)->ir_num,
1842                    dest->s6_addr32[0], dest->s6_addr32[1],
1843                    dest->s6_addr32[2], dest->s6_addr32[3],
1844                    ntohs(inet_rsk(req)->ir_rmt_port),
1845                    TCP_SYN_RECV,
1846                    0, 0, /* could print option size, but that is af dependent. */
1847                    1,   /* timers active (only the expire timer) */
1848                    jiffies_to_clock_t(ttd),
1849                    req->num_timeout,
1850                    from_kuid_munged(seq_user_ns(seq),
1851                                     sock_i_uid(req->rsk_listener)),
1852                    0,  /* non standard timer */
1853                    0, /* open_requests have no inode */
1854                    0, req);
1855 }
1856
1857 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1858 {
1859         const struct in6_addr *dest, *src;
1860         __u16 destp, srcp;
1861         int timer_active;
1862         unsigned long timer_expires;
1863         const struct inet_sock *inet = inet_sk(sp);
1864         const struct tcp_sock *tp = tcp_sk(sp);
1865         const struct inet_connection_sock *icsk = inet_csk(sp);
1866         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1867         int rx_queue;
1868         int state;
1869
1870         dest  = &sp->sk_v6_daddr;
1871         src   = &sp->sk_v6_rcv_saddr;
1872         destp = ntohs(inet->inet_dport);
1873         srcp  = ntohs(inet->inet_sport);
1874
1875         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1876             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1877             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1878                 timer_active    = 1;
1879                 timer_expires   = icsk->icsk_timeout;
1880         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1881                 timer_active    = 4;
1882                 timer_expires   = icsk->icsk_timeout;
1883         } else if (timer_pending(&sp->sk_timer)) {
1884                 timer_active    = 2;
1885                 timer_expires   = sp->sk_timer.expires;
1886         } else {
1887                 timer_active    = 0;
1888                 timer_expires = jiffies;
1889         }
1890
1891         state = inet_sk_state_load(sp);
1892         if (state == TCP_LISTEN)
1893                 rx_queue = sp->sk_ack_backlog;
1894         else
1895                 /* Because we don't lock the socket,
1896                  * we might find a transient negative value.
1897                  */
1898                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1899                                       READ_ONCE(tp->copied_seq), 0);
1900
1901         seq_printf(seq,
1902                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1903                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1904                    i,
1905                    src->s6_addr32[0], src->s6_addr32[1],
1906                    src->s6_addr32[2], src->s6_addr32[3], srcp,
1907                    dest->s6_addr32[0], dest->s6_addr32[1],
1908                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
1909                    state,
1910                    tp->write_seq - tp->snd_una,
1911                    rx_queue,
1912                    timer_active,
1913                    jiffies_delta_to_clock_t(timer_expires - jiffies),
1914                    icsk->icsk_retransmits,
1915                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1916                    icsk->icsk_probes_out,
1917                    sock_i_ino(sp),
1918                    refcount_read(&sp->sk_refcnt), sp,
1919                    jiffies_to_clock_t(icsk->icsk_rto),
1920                    jiffies_to_clock_t(icsk->icsk_ack.ato),
1921                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
1922                    tp->snd_cwnd,
1923                    state == TCP_LISTEN ?
1924                         fastopenq->max_qlen :
1925                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1926                    );
1927 }
1928
1929 static void get_timewait6_sock(struct seq_file *seq,
1930                                struct inet_timewait_sock *tw, int i)
1931 {
1932         long delta = tw->tw_timer.expires - jiffies;
1933         const struct in6_addr *dest, *src;
1934         __u16 destp, srcp;
1935
1936         dest = &tw->tw_v6_daddr;
1937         src  = &tw->tw_v6_rcv_saddr;
1938         destp = ntohs(tw->tw_dport);
1939         srcp  = ntohs(tw->tw_sport);
1940
1941         seq_printf(seq,
1942                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1943                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1944                    i,
1945                    src->s6_addr32[0], src->s6_addr32[1],
1946                    src->s6_addr32[2], src->s6_addr32[3], srcp,
1947                    dest->s6_addr32[0], dest->s6_addr32[1],
1948                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
1949                    tw->tw_substate, 0, 0,
1950                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1951                    refcount_read(&tw->tw_refcnt), tw);
1952 }
1953
1954 static int tcp6_seq_show(struct seq_file *seq, void *v)
1955 {
1956         struct tcp_iter_state *st;
1957         struct sock *sk = v;
1958
1959         if (v == SEQ_START_TOKEN) {
1960                 seq_puts(seq,
1961                          "  sl  "
1962                          "local_address                         "
1963                          "remote_address                        "
1964                          "st tx_queue rx_queue tr tm->when retrnsmt"
1965                          "   uid  timeout inode\n");
1966                 goto out;
1967         }
1968         st = seq->private;
1969
1970         if (sk->sk_state == TCP_TIME_WAIT)
1971                 get_timewait6_sock(seq, v, st->num);
1972         else if (sk->sk_state == TCP_NEW_SYN_RECV)
1973                 get_openreq6(seq, v, st->num);
1974         else
1975                 get_tcp6_sock(seq, v, st->num);
1976 out:
1977         return 0;
1978 }
1979
1980 static const struct seq_operations tcp6_seq_ops = {
1981         .show           = tcp6_seq_show,
1982         .start          = tcp_seq_start,
1983         .next           = tcp_seq_next,
1984         .stop           = tcp_seq_stop,
1985 };
1986
1987 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1988         .family         = AF_INET6,
1989 };
1990
1991 int __net_init tcp6_proc_init(struct net *net)
1992 {
1993         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
1994                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
1995                 return -ENOMEM;
1996         return 0;
1997 }
1998
1999 void tcp6_proc_exit(struct net *net)
2000 {
2001         remove_proc_entry("tcp6", net->proc_net);
2002 }
2003 #endif
2004
2005 struct proto tcpv6_prot = {
2006         .name                   = "TCPv6",
2007         .owner                  = THIS_MODULE,
2008         .close                  = tcp_close,
2009         .pre_connect            = tcp_v6_pre_connect,
2010         .connect                = tcp_v6_connect,
2011         .disconnect             = tcp_disconnect,
2012         .accept                 = inet_csk_accept,
2013         .ioctl                  = tcp_ioctl,
2014         .init                   = tcp_v6_init_sock,
2015         .destroy                = tcp_v6_destroy_sock,
2016         .shutdown               = tcp_shutdown,
2017         .setsockopt             = tcp_setsockopt,
2018         .getsockopt             = tcp_getsockopt,
2019         .keepalive              = tcp_set_keepalive,
2020         .recvmsg                = tcp_recvmsg,
2021         .sendmsg                = tcp_sendmsg,
2022         .sendpage               = tcp_sendpage,
2023         .backlog_rcv            = tcp_v6_do_rcv,
2024         .release_cb             = tcp_release_cb,
2025         .hash                   = inet6_hash,
2026         .unhash                 = inet_unhash,
2027         .get_port               = inet_csk_get_port,
2028         .enter_memory_pressure  = tcp_enter_memory_pressure,
2029         .leave_memory_pressure  = tcp_leave_memory_pressure,
2030         .stream_memory_free     = tcp_stream_memory_free,
2031         .sockets_allocated      = &tcp_sockets_allocated,
2032         .memory_allocated       = &tcp_memory_allocated,
2033         .memory_pressure        = &tcp_memory_pressure,
2034         .orphan_count           = &tcp_orphan_count,
2035         .sysctl_mem             = sysctl_tcp_mem,
2036         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2037         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2038         .max_header             = MAX_TCP_HEADER,
2039         .obj_size               = sizeof(struct tcp6_sock),
2040         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2041         .twsk_prot              = &tcp6_timewait_sock_ops,
2042         .rsk_prot               = &tcp6_request_sock_ops,
2043         .h.hashinfo             = &tcp_hashinfo,
2044         .no_autobind            = true,
2045 #ifdef CONFIG_COMPAT
2046         .compat_setsockopt      = compat_tcp_setsockopt,
2047         .compat_getsockopt      = compat_tcp_getsockopt,
2048 #endif
2049         .diag_destroy           = tcp_abort,
2050 };
2051
2052 /* thinking of making this const? Don't.
2053  * early_demux can change based on sysctl.
2054  */
2055 static struct inet6_protocol tcpv6_protocol = {
2056         .early_demux    =       tcp_v6_early_demux,
2057         .early_demux_handler =  tcp_v6_early_demux,
2058         .handler        =       tcp_v6_rcv,
2059         .err_handler    =       tcp_v6_err,
2060         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2061 };
2062
2063 static struct inet_protosw tcpv6_protosw = {
2064         .type           =       SOCK_STREAM,
2065         .protocol       =       IPPROTO_TCP,
2066         .prot           =       &tcpv6_prot,
2067         .ops            =       &inet6_stream_ops,
2068         .flags          =       INET_PROTOSW_PERMANENT |
2069                                 INET_PROTOSW_ICSK,
2070 };
2071
2072 static int __net_init tcpv6_net_init(struct net *net)
2073 {
2074         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2075                                     SOCK_RAW, IPPROTO_TCP, net);
2076 }
2077
2078 static void __net_exit tcpv6_net_exit(struct net *net)
2079 {
2080         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2081 }
2082
2083 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2084 {
2085         inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2086 }
2087
2088 static struct pernet_operations tcpv6_net_ops = {
2089         .init       = tcpv6_net_init,
2090         .exit       = tcpv6_net_exit,
2091         .exit_batch = tcpv6_net_exit_batch,
2092 };
2093
2094 int __init tcpv6_init(void)
2095 {
2096         int ret;
2097
2098         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2099         if (ret)
2100                 goto out;
2101
2102         /* register inet6 protocol */
2103         ret = inet6_register_protosw(&tcpv6_protosw);
2104         if (ret)
2105                 goto out_tcpv6_protocol;
2106
2107         ret = register_pernet_subsys(&tcpv6_net_ops);
2108         if (ret)
2109                 goto out_tcpv6_protosw;
2110 out:
2111         return ret;
2112
2113 out_tcpv6_protosw:
2114         inet6_unregister_protosw(&tcpv6_protosw);
2115 out_tcpv6_protocol:
2116         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2117         goto out;
2118 }
2119
2120 void tcpv6_exit(void)
2121 {
2122         unregister_pernet_subsys(&tcpv6_net_ops);
2123         inet6_unregister_protosw(&tcpv6_protosw);
2124         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2125 }