tcp: allocate tcp_death_row outside of struct netns_ipv4
[linux-block.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 sk->sk_rx_dst_ifindex = skb->skb_iif;
112                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_sock *inet = inet_sk(sk);
150         struct inet_connection_sock *icsk = inet_csk(sk);
151         struct inet_timewait_death_row *tcp_death_row;
152         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153         struct tcp_sock *tp = tcp_sk(sk);
154         struct in6_addr *saddr = NULL, *final_p, final;
155         struct ipv6_txoptions *opt;
156         struct flowi6 fl6;
157         struct dst_entry *dst;
158         int addr_type;
159         int err;
160
161         if (addr_len < SIN6_LEN_RFC2133)
162                 return -EINVAL;
163
164         if (usin->sin6_family != AF_INET6)
165                 return -EAFNOSUPPORT;
166
167         memset(&fl6, 0, sizeof(fl6));
168
169         if (np->sndflow) {
170                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171                 IP6_ECN_flow_init(fl6.flowlabel);
172                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173                         struct ip6_flowlabel *flowlabel;
174                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175                         if (IS_ERR(flowlabel))
176                                 return -EINVAL;
177                         fl6_sock_release(flowlabel);
178                 }
179         }
180
181         /*
182          *      connect() to INADDR_ANY means loopback (BSD'ism).
183          */
184
185         if (ipv6_addr_any(&usin->sin6_addr)) {
186                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188                                                &usin->sin6_addr);
189                 else
190                         usin->sin6_addr = in6addr_loopback;
191         }
192
193         addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195         if (addr_type & IPV6_ADDR_MULTICAST)
196                 return -ENETUNREACH;
197
198         if (addr_type&IPV6_ADDR_LINKLOCAL) {
199                 if (addr_len >= sizeof(struct sockaddr_in6) &&
200                     usin->sin6_scope_id) {
201                         /* If interface is set while binding, indices
202                          * must coincide.
203                          */
204                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205                                 return -EINVAL;
206
207                         sk->sk_bound_dev_if = usin->sin6_scope_id;
208                 }
209
210                 /* Connect to link-local address requires an interface */
211                 if (!sk->sk_bound_dev_if)
212                         return -EINVAL;
213         }
214
215         if (tp->rx_opt.ts_recent_stamp &&
216             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217                 tp->rx_opt.ts_recent = 0;
218                 tp->rx_opt.ts_recent_stamp = 0;
219                 WRITE_ONCE(tp->write_seq, 0);
220         }
221
222         sk->sk_v6_daddr = usin->sin6_addr;
223         np->flow_label = fl6.flowlabel;
224
225         /*
226          *      TCP over IPv4
227          */
228
229         if (addr_type & IPV6_ADDR_MAPPED) {
230                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231                 struct sockaddr_in sin;
232
233                 if (__ipv6_only_sock(sk))
234                         return -ENETUNREACH;
235
236                 sin.sin_family = AF_INET;
237                 sin.sin_port = usin->sin6_port;
238                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240                 icsk->icsk_af_ops = &ipv6_mapped;
241                 if (sk_is_mptcp(sk))
242                         mptcpv6_handle_mapped(sk, true);
243                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247
248                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249
250                 if (err) {
251                         icsk->icsk_ext_hdr_len = exthdrlen;
252                         icsk->icsk_af_ops = &ipv6_specific;
253                         if (sk_is_mptcp(sk))
254                                 mptcpv6_handle_mapped(sk, false);
255                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257                         tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259                         goto failure;
260                 }
261                 np->saddr = sk->sk_v6_rcv_saddr;
262
263                 return err;
264         }
265
266         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267                 saddr = &sk->sk_v6_rcv_saddr;
268
269         fl6.flowi6_proto = IPPROTO_TCP;
270         fl6.daddr = sk->sk_v6_daddr;
271         fl6.saddr = saddr ? *saddr : np->saddr;
272         fl6.flowi6_oif = sk->sk_bound_dev_if;
273         fl6.flowi6_mark = sk->sk_mark;
274         fl6.fl6_dport = usin->sin6_port;
275         fl6.fl6_sport = inet->inet_sport;
276         fl6.flowi6_uid = sk->sk_uid;
277
278         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279         final_p = fl6_update_dst(&fl6, opt, &final);
280
281         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282
283         dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284         if (IS_ERR(dst)) {
285                 err = PTR_ERR(dst);
286                 goto failure;
287         }
288
289         if (!saddr) {
290                 saddr = &fl6.saddr;
291                 sk->sk_v6_rcv_saddr = *saddr;
292         }
293
294         /* set the source address */
295         np->saddr = *saddr;
296         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297
298         sk->sk_gso_type = SKB_GSO_TCPV6;
299         ip6_dst_store(sk, dst, NULL, NULL);
300
301         icsk->icsk_ext_hdr_len = 0;
302         if (opt)
303                 icsk->icsk_ext_hdr_len = opt->opt_flen +
304                                          opt->opt_nflen;
305
306         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307
308         inet->inet_dport = usin->sin6_port;
309
310         tcp_set_state(sk, TCP_SYN_SENT);
311         tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
312         err = inet6_hash_connect(tcp_death_row, sk);
313         if (err)
314                 goto late_failure;
315
316         sk_set_txhash(sk);
317
318         if (likely(!tp->repair)) {
319                 if (!tp->write_seq)
320                         WRITE_ONCE(tp->write_seq,
321                                    secure_tcpv6_seq(np->saddr.s6_addr32,
322                                                     sk->sk_v6_daddr.s6_addr32,
323                                                     inet->inet_sport,
324                                                     inet->inet_dport));
325                 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
326                                                    np->saddr.s6_addr32,
327                                                    sk->sk_v6_daddr.s6_addr32);
328         }
329
330         if (tcp_fastopen_defer_connect(sk, &err))
331                 return err;
332         if (err)
333                 goto late_failure;
334
335         err = tcp_connect(sk);
336         if (err)
337                 goto late_failure;
338
339         return 0;
340
341 late_failure:
342         tcp_set_state(sk, TCP_CLOSE);
343 failure:
344         inet->inet_dport = 0;
345         sk->sk_route_caps = 0;
346         return err;
347 }
348
349 static void tcp_v6_mtu_reduced(struct sock *sk)
350 {
351         struct dst_entry *dst;
352         u32 mtu;
353
354         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
355                 return;
356
357         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
358
359         /* Drop requests trying to increase our current mss.
360          * Check done in __ip6_rt_update_pmtu() is too late.
361          */
362         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
363                 return;
364
365         dst = inet6_csk_update_pmtu(sk, mtu);
366         if (!dst)
367                 return;
368
369         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
370                 tcp_sync_mss(sk, dst_mtu(dst));
371                 tcp_simple_retransmit(sk);
372         }
373 }
374
375 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
376                 u8 type, u8 code, int offset, __be32 info)
377 {
378         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
379         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
380         struct net *net = dev_net(skb->dev);
381         struct request_sock *fastopen;
382         struct ipv6_pinfo *np;
383         struct tcp_sock *tp;
384         __u32 seq, snd_una;
385         struct sock *sk;
386         bool fatal;
387         int err;
388
389         sk = __inet6_lookup_established(net, &tcp_hashinfo,
390                                         &hdr->daddr, th->dest,
391                                         &hdr->saddr, ntohs(th->source),
392                                         skb->dev->ifindex, inet6_sdif(skb));
393
394         if (!sk) {
395                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
396                                   ICMP6_MIB_INERRORS);
397                 return -ENOENT;
398         }
399
400         if (sk->sk_state == TCP_TIME_WAIT) {
401                 inet_twsk_put(inet_twsk(sk));
402                 return 0;
403         }
404         seq = ntohl(th->seq);
405         fatal = icmpv6_err_convert(type, code, &err);
406         if (sk->sk_state == TCP_NEW_SYN_RECV) {
407                 tcp_req_err(sk, seq, fatal);
408                 return 0;
409         }
410
411         bh_lock_sock(sk);
412         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
413                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
414
415         if (sk->sk_state == TCP_CLOSE)
416                 goto out;
417
418         if (static_branch_unlikely(&ip6_min_hopcount)) {
419                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
420                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
421                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
422                         goto out;
423                 }
424         }
425
426         tp = tcp_sk(sk);
427         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
428         fastopen = rcu_dereference(tp->fastopen_rsk);
429         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
430         if (sk->sk_state != TCP_LISTEN &&
431             !between(seq, snd_una, tp->snd_nxt)) {
432                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
433                 goto out;
434         }
435
436         np = tcp_inet6_sk(sk);
437
438         if (type == NDISC_REDIRECT) {
439                 if (!sock_owned_by_user(sk)) {
440                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
441
442                         if (dst)
443                                 dst->ops->redirect(dst, sk, skb);
444                 }
445                 goto out;
446         }
447
448         if (type == ICMPV6_PKT_TOOBIG) {
449                 u32 mtu = ntohl(info);
450
451                 /* We are not interested in TCP_LISTEN and open_requests
452                  * (SYN-ACKs send out by Linux are always <576bytes so
453                  * they should go through unfragmented).
454                  */
455                 if (sk->sk_state == TCP_LISTEN)
456                         goto out;
457
458                 if (!ip6_sk_accept_pmtu(sk))
459                         goto out;
460
461                 if (mtu < IPV6_MIN_MTU)
462                         goto out;
463
464                 WRITE_ONCE(tp->mtu_info, mtu);
465
466                 if (!sock_owned_by_user(sk))
467                         tcp_v6_mtu_reduced(sk);
468                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
469                                            &sk->sk_tsq_flags))
470                         sock_hold(sk);
471                 goto out;
472         }
473
474
475         /* Might be for an request_sock */
476         switch (sk->sk_state) {
477         case TCP_SYN_SENT:
478         case TCP_SYN_RECV:
479                 /* Only in fast or simultaneous open. If a fast open socket is
480                  * already accepted it is treated as a connected one below.
481                  */
482                 if (fastopen && !fastopen->sk)
483                         break;
484
485                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
486
487                 if (!sock_owned_by_user(sk)) {
488                         sk->sk_err = err;
489                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
490
491                         tcp_done(sk);
492                 } else
493                         sk->sk_err_soft = err;
494                 goto out;
495         case TCP_LISTEN:
496                 break;
497         default:
498                 /* check if this ICMP message allows revert of backoff.
499                  * (see RFC 6069)
500                  */
501                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
502                     code == ICMPV6_NOROUTE)
503                         tcp_ld_RTO_revert(sk, seq);
504         }
505
506         if (!sock_owned_by_user(sk) && np->recverr) {
507                 sk->sk_err = err;
508                 sk_error_report(sk);
509         } else
510                 sk->sk_err_soft = err;
511
512 out:
513         bh_unlock_sock(sk);
514         sock_put(sk);
515         return 0;
516 }
517
518
519 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
520                               struct flowi *fl,
521                               struct request_sock *req,
522                               struct tcp_fastopen_cookie *foc,
523                               enum tcp_synack_type synack_type,
524                               struct sk_buff *syn_skb)
525 {
526         struct inet_request_sock *ireq = inet_rsk(req);
527         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
528         struct ipv6_txoptions *opt;
529         struct flowi6 *fl6 = &fl->u.ip6;
530         struct sk_buff *skb;
531         int err = -ENOMEM;
532         u8 tclass;
533
534         /* First, grab a route. */
535         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
536                                                IPPROTO_TCP)) == NULL)
537                 goto done;
538
539         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
540
541         if (skb) {
542                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
543                                     &ireq->ir_v6_rmt_addr);
544
545                 fl6->daddr = ireq->ir_v6_rmt_addr;
546                 if (np->repflow && ireq->pktopts)
547                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
548
549                 tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
550                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
551                                 (np->tclass & INET_ECN_MASK) :
552                                 np->tclass;
553
554                 if (!INET_ECN_is_capable(tclass) &&
555                     tcp_bpf_ca_needs_ecn((struct sock *)req))
556                         tclass |= INET_ECN_ECT_0;
557
558                 rcu_read_lock();
559                 opt = ireq->ipv6_opt;
560                 if (!opt)
561                         opt = rcu_dereference(np->opt);
562                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
563                                tclass, sk->sk_priority);
564                 rcu_read_unlock();
565                 err = net_xmit_eval(err);
566         }
567
568 done:
569         return err;
570 }
571
572
573 static void tcp_v6_reqsk_destructor(struct request_sock *req)
574 {
575         kfree(inet_rsk(req)->ipv6_opt);
576         consume_skb(inet_rsk(req)->pktopts);
577 }
578
579 #ifdef CONFIG_TCP_MD5SIG
580 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
581                                                    const struct in6_addr *addr,
582                                                    int l3index)
583 {
584         return tcp_md5_do_lookup(sk, l3index,
585                                  (union tcp_md5_addr *)addr, AF_INET6);
586 }
587
588 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
589                                                 const struct sock *addr_sk)
590 {
591         int l3index;
592
593         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
594                                                  addr_sk->sk_bound_dev_if);
595         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
596                                     l3index);
597 }
598
599 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
600                                  sockptr_t optval, int optlen)
601 {
602         struct tcp_md5sig cmd;
603         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
604         int l3index = 0;
605         u8 prefixlen;
606         u8 flags;
607
608         if (optlen < sizeof(cmd))
609                 return -EINVAL;
610
611         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
612                 return -EFAULT;
613
614         if (sin6->sin6_family != AF_INET6)
615                 return -EINVAL;
616
617         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
618
619         if (optname == TCP_MD5SIG_EXT &&
620             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
621                 prefixlen = cmd.tcpm_prefixlen;
622                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
623                                         prefixlen > 32))
624                         return -EINVAL;
625         } else {
626                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
627         }
628
629         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
630             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
631                 struct net_device *dev;
632
633                 rcu_read_lock();
634                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
635                 if (dev && netif_is_l3_master(dev))
636                         l3index = dev->ifindex;
637                 rcu_read_unlock();
638
639                 /* ok to reference set/not set outside of rcu;
640                  * right now device MUST be an L3 master
641                  */
642                 if (!dev || !l3index)
643                         return -EINVAL;
644         }
645
646         if (!cmd.tcpm_keylen) {
647                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
648                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
649                                               AF_INET, prefixlen,
650                                               l3index, flags);
651                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
652                                       AF_INET6, prefixlen, l3index, flags);
653         }
654
655         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
656                 return -EINVAL;
657
658         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
659                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
660                                       AF_INET, prefixlen, l3index, flags,
661                                       cmd.tcpm_key, cmd.tcpm_keylen,
662                                       GFP_KERNEL);
663
664         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
665                               AF_INET6, prefixlen, l3index, flags,
666                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
667 }
668
669 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
670                                    const struct in6_addr *daddr,
671                                    const struct in6_addr *saddr,
672                                    const struct tcphdr *th, int nbytes)
673 {
674         struct tcp6_pseudohdr *bp;
675         struct scatterlist sg;
676         struct tcphdr *_th;
677
678         bp = hp->scratch;
679         /* 1. TCP pseudo-header (RFC2460) */
680         bp->saddr = *saddr;
681         bp->daddr = *daddr;
682         bp->protocol = cpu_to_be32(IPPROTO_TCP);
683         bp->len = cpu_to_be32(nbytes);
684
685         _th = (struct tcphdr *)(bp + 1);
686         memcpy(_th, th, sizeof(*th));
687         _th->check = 0;
688
689         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
690         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
691                                 sizeof(*bp) + sizeof(*th));
692         return crypto_ahash_update(hp->md5_req);
693 }
694
695 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
696                                const struct in6_addr *daddr, struct in6_addr *saddr,
697                                const struct tcphdr *th)
698 {
699         struct tcp_md5sig_pool *hp;
700         struct ahash_request *req;
701
702         hp = tcp_get_md5sig_pool();
703         if (!hp)
704                 goto clear_hash_noput;
705         req = hp->md5_req;
706
707         if (crypto_ahash_init(req))
708                 goto clear_hash;
709         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
710                 goto clear_hash;
711         if (tcp_md5_hash_key(hp, key))
712                 goto clear_hash;
713         ahash_request_set_crypt(req, NULL, md5_hash, 0);
714         if (crypto_ahash_final(req))
715                 goto clear_hash;
716
717         tcp_put_md5sig_pool();
718         return 0;
719
720 clear_hash:
721         tcp_put_md5sig_pool();
722 clear_hash_noput:
723         memset(md5_hash, 0, 16);
724         return 1;
725 }
726
727 static int tcp_v6_md5_hash_skb(char *md5_hash,
728                                const struct tcp_md5sig_key *key,
729                                const struct sock *sk,
730                                const struct sk_buff *skb)
731 {
732         const struct in6_addr *saddr, *daddr;
733         struct tcp_md5sig_pool *hp;
734         struct ahash_request *req;
735         const struct tcphdr *th = tcp_hdr(skb);
736
737         if (sk) { /* valid for establish/request sockets */
738                 saddr = &sk->sk_v6_rcv_saddr;
739                 daddr = &sk->sk_v6_daddr;
740         } else {
741                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
742                 saddr = &ip6h->saddr;
743                 daddr = &ip6h->daddr;
744         }
745
746         hp = tcp_get_md5sig_pool();
747         if (!hp)
748                 goto clear_hash_noput;
749         req = hp->md5_req;
750
751         if (crypto_ahash_init(req))
752                 goto clear_hash;
753
754         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
755                 goto clear_hash;
756         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
757                 goto clear_hash;
758         if (tcp_md5_hash_key(hp, key))
759                 goto clear_hash;
760         ahash_request_set_crypt(req, NULL, md5_hash, 0);
761         if (crypto_ahash_final(req))
762                 goto clear_hash;
763
764         tcp_put_md5sig_pool();
765         return 0;
766
767 clear_hash:
768         tcp_put_md5sig_pool();
769 clear_hash_noput:
770         memset(md5_hash, 0, 16);
771         return 1;
772 }
773
774 #endif
775
776 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
777                                     const struct sk_buff *skb,
778                                     int dif, int sdif)
779 {
780 #ifdef CONFIG_TCP_MD5SIG
781         const __u8 *hash_location = NULL;
782         struct tcp_md5sig_key *hash_expected;
783         const struct ipv6hdr *ip6h = ipv6_hdr(skb);
784         const struct tcphdr *th = tcp_hdr(skb);
785         int genhash, l3index;
786         u8 newhash[16];
787
788         /* sdif set, means packet ingressed via a device
789          * in an L3 domain and dif is set to the l3mdev
790          */
791         l3index = sdif ? dif : 0;
792
793         hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
794         hash_location = tcp_parse_md5sig_option(th);
795
796         /* We've parsed the options - do we have a hash? */
797         if (!hash_expected && !hash_location)
798                 return false;
799
800         if (hash_expected && !hash_location) {
801                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
802                 return true;
803         }
804
805         if (!hash_expected && hash_location) {
806                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
807                 return true;
808         }
809
810         /* check the signature */
811         genhash = tcp_v6_md5_hash_skb(newhash,
812                                       hash_expected,
813                                       NULL, skb);
814
815         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
816                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
817                 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
818                                      genhash ? "failed" : "mismatch",
819                                      &ip6h->saddr, ntohs(th->source),
820                                      &ip6h->daddr, ntohs(th->dest), l3index);
821                 return true;
822         }
823 #endif
824         return false;
825 }
826
827 static void tcp_v6_init_req(struct request_sock *req,
828                             const struct sock *sk_listener,
829                             struct sk_buff *skb)
830 {
831         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
832         struct inet_request_sock *ireq = inet_rsk(req);
833         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
834
835         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
836         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
837
838         /* So that link locals have meaning */
839         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
840             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
841                 ireq->ir_iif = tcp_v6_iif(skb);
842
843         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
844             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
845              np->rxopt.bits.rxinfo ||
846              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
847              np->rxopt.bits.rxohlim || np->repflow)) {
848                 refcount_inc(&skb->users);
849                 ireq->pktopts = skb;
850         }
851 }
852
853 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
854                                           struct sk_buff *skb,
855                                           struct flowi *fl,
856                                           struct request_sock *req)
857 {
858         tcp_v6_init_req(req, sk, skb);
859
860         if (security_inet_conn_request(sk, skb, req))
861                 return NULL;
862
863         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
864 }
865
866 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
867         .family         =       AF_INET6,
868         .obj_size       =       sizeof(struct tcp6_request_sock),
869         .rtx_syn_ack    =       tcp_rtx_synack,
870         .send_ack       =       tcp_v6_reqsk_send_ack,
871         .destructor     =       tcp_v6_reqsk_destructor,
872         .send_reset     =       tcp_v6_send_reset,
873         .syn_ack_timeout =      tcp_syn_ack_timeout,
874 };
875
876 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
877         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
878                                 sizeof(struct ipv6hdr),
879 #ifdef CONFIG_TCP_MD5SIG
880         .req_md5_lookup =       tcp_v6_md5_lookup,
881         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
882 #endif
883 #ifdef CONFIG_SYN_COOKIES
884         .cookie_init_seq =      cookie_v6_init_sequence,
885 #endif
886         .route_req      =       tcp_v6_route_req,
887         .init_seq       =       tcp_v6_init_seq,
888         .init_ts_off    =       tcp_v6_init_ts_off,
889         .send_synack    =       tcp_v6_send_synack,
890 };
891
892 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
893                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
894                                  int oif, struct tcp_md5sig_key *key, int rst,
895                                  u8 tclass, __be32 label, u32 priority)
896 {
897         const struct tcphdr *th = tcp_hdr(skb);
898         struct tcphdr *t1;
899         struct sk_buff *buff;
900         struct flowi6 fl6;
901         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
902         struct sock *ctl_sk = net->ipv6.tcp_sk;
903         unsigned int tot_len = sizeof(struct tcphdr);
904         __be32 mrst = 0, *topt;
905         struct dst_entry *dst;
906         __u32 mark = 0;
907
908         if (tsecr)
909                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
910 #ifdef CONFIG_TCP_MD5SIG
911         if (key)
912                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
913 #endif
914
915 #ifdef CONFIG_MPTCP
916         if (rst && !key) {
917                 mrst = mptcp_reset_option(skb);
918
919                 if (mrst)
920                         tot_len += sizeof(__be32);
921         }
922 #endif
923
924         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
925                          GFP_ATOMIC);
926         if (!buff)
927                 return;
928
929         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
930
931         t1 = skb_push(buff, tot_len);
932         skb_reset_transport_header(buff);
933
934         /* Swap the send and the receive. */
935         memset(t1, 0, sizeof(*t1));
936         t1->dest = th->source;
937         t1->source = th->dest;
938         t1->doff = tot_len / 4;
939         t1->seq = htonl(seq);
940         t1->ack_seq = htonl(ack);
941         t1->ack = !rst || !th->ack;
942         t1->rst = rst;
943         t1->window = htons(win);
944
945         topt = (__be32 *)(t1 + 1);
946
947         if (tsecr) {
948                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
949                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
950                 *topt++ = htonl(tsval);
951                 *topt++ = htonl(tsecr);
952         }
953
954         if (mrst)
955                 *topt++ = mrst;
956
957 #ifdef CONFIG_TCP_MD5SIG
958         if (key) {
959                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
960                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
961                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
962                                     &ipv6_hdr(skb)->saddr,
963                                     &ipv6_hdr(skb)->daddr, t1);
964         }
965 #endif
966
967         memset(&fl6, 0, sizeof(fl6));
968         fl6.daddr = ipv6_hdr(skb)->saddr;
969         fl6.saddr = ipv6_hdr(skb)->daddr;
970         fl6.flowlabel = label;
971
972         buff->ip_summed = CHECKSUM_PARTIAL;
973
974         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
975
976         fl6.flowi6_proto = IPPROTO_TCP;
977         if (rt6_need_strict(&fl6.daddr) && !oif)
978                 fl6.flowi6_oif = tcp_v6_iif(skb);
979         else {
980                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
981                         oif = skb->skb_iif;
982
983                 fl6.flowi6_oif = oif;
984         }
985
986         if (sk) {
987                 if (sk->sk_state == TCP_TIME_WAIT) {
988                         mark = inet_twsk(sk)->tw_mark;
989                         /* autoflowlabel relies on buff->hash */
990                         skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
991                                      PKT_HASH_TYPE_L4);
992                 } else {
993                         mark = sk->sk_mark;
994                 }
995                 buff->tstamp = tcp_transmit_time(sk);
996         }
997         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
998         fl6.fl6_dport = t1->dest;
999         fl6.fl6_sport = t1->source;
1000         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
1001         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
1002
1003         /* Pass a socket to ip6_dst_lookup either it is for RST
1004          * Underlying function will use this to retrieve the network
1005          * namespace
1006          */
1007         dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
1008         if (!IS_ERR(dst)) {
1009                 skb_dst_set(buff, dst);
1010                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1011                          tclass & ~INET_ECN_MASK, priority);
1012                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1013                 if (rst)
1014                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1015                 return;
1016         }
1017
1018         kfree_skb(buff);
1019 }
1020
1021 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1022 {
1023         const struct tcphdr *th = tcp_hdr(skb);
1024         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1025         u32 seq = 0, ack_seq = 0;
1026         struct tcp_md5sig_key *key = NULL;
1027 #ifdef CONFIG_TCP_MD5SIG
1028         const __u8 *hash_location = NULL;
1029         unsigned char newhash[16];
1030         int genhash;
1031         struct sock *sk1 = NULL;
1032 #endif
1033         __be32 label = 0;
1034         u32 priority = 0;
1035         struct net *net;
1036         int oif = 0;
1037
1038         if (th->rst)
1039                 return;
1040
1041         /* If sk not NULL, it means we did a successful lookup and incoming
1042          * route had to be correct. prequeue might have dropped our dst.
1043          */
1044         if (!sk && !ipv6_unicast_destination(skb))
1045                 return;
1046
1047         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1048 #ifdef CONFIG_TCP_MD5SIG
1049         rcu_read_lock();
1050         hash_location = tcp_parse_md5sig_option(th);
1051         if (sk && sk_fullsock(sk)) {
1052                 int l3index;
1053
1054                 /* sdif set, means packet ingressed via a device
1055                  * in an L3 domain and inet_iif is set to it.
1056                  */
1057                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1058                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1059         } else if (hash_location) {
1060                 int dif = tcp_v6_iif_l3_slave(skb);
1061                 int sdif = tcp_v6_sdif(skb);
1062                 int l3index;
1063
1064                 /*
1065                  * active side is lost. Try to find listening socket through
1066                  * source port, and then find md5 key through listening socket.
1067                  * we are not loose security here:
1068                  * Incoming packet is checked with md5 hash with finding key,
1069                  * no RST generated if md5 hash doesn't match.
1070                  */
1071                 sk1 = inet6_lookup_listener(net,
1072                                            &tcp_hashinfo, NULL, 0,
1073                                            &ipv6h->saddr,
1074                                            th->source, &ipv6h->daddr,
1075                                            ntohs(th->source), dif, sdif);
1076                 if (!sk1)
1077                         goto out;
1078
1079                 /* sdif set, means packet ingressed via a device
1080                  * in an L3 domain and dif is set to it.
1081                  */
1082                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1083
1084                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1085                 if (!key)
1086                         goto out;
1087
1088                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1089                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1090                         goto out;
1091         }
1092 #endif
1093
1094         if (th->ack)
1095                 seq = ntohl(th->ack_seq);
1096         else
1097                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1098                           (th->doff << 2);
1099
1100         if (sk) {
1101                 oif = sk->sk_bound_dev_if;
1102                 if (sk_fullsock(sk)) {
1103                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1104
1105                         trace_tcp_send_reset(sk, skb);
1106                         if (np->repflow)
1107                                 label = ip6_flowlabel(ipv6h);
1108                         priority = sk->sk_priority;
1109                 }
1110                 if (sk->sk_state == TCP_TIME_WAIT) {
1111                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1112                         priority = inet_twsk(sk)->tw_priority;
1113                 }
1114         } else {
1115                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1116                         label = ip6_flowlabel(ipv6h);
1117         }
1118
1119         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1120                              ipv6_get_dsfield(ipv6h), label, priority);
1121
1122 #ifdef CONFIG_TCP_MD5SIG
1123 out:
1124         rcu_read_unlock();
1125 #endif
1126 }
1127
1128 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1129                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1130                             struct tcp_md5sig_key *key, u8 tclass,
1131                             __be32 label, u32 priority)
1132 {
1133         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1134                              tclass, label, priority);
1135 }
1136
1137 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1138 {
1139         struct inet_timewait_sock *tw = inet_twsk(sk);
1140         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1141
1142         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1143                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1144                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1145                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1146                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1147
1148         inet_twsk_put(tw);
1149 }
1150
1151 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1152                                   struct request_sock *req)
1153 {
1154         int l3index;
1155
1156         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1157
1158         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1159          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1160          */
1161         /* RFC 7323 2.3
1162          * The window field (SEG.WND) of every outgoing segment, with the
1163          * exception of <SYN> segments, MUST be right-shifted by
1164          * Rcv.Wind.Shift bits:
1165          */
1166         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1167                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1168                         tcp_rsk(req)->rcv_nxt,
1169                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1170                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1171                         req->ts_recent, sk->sk_bound_dev_if,
1172                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1173                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1174 }
1175
1176
1177 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1178 {
1179 #ifdef CONFIG_SYN_COOKIES
1180         const struct tcphdr *th = tcp_hdr(skb);
1181
1182         if (!th->syn)
1183                 sk = cookie_v6_check(sk, skb);
1184 #endif
1185         return sk;
1186 }
1187
1188 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1189                          struct tcphdr *th, u32 *cookie)
1190 {
1191         u16 mss = 0;
1192 #ifdef CONFIG_SYN_COOKIES
1193         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1194                                     &tcp_request_sock_ipv6_ops, sk, th);
1195         if (mss) {
1196                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1197                 tcp_synq_overflow(sk);
1198         }
1199 #endif
1200         return mss;
1201 }
1202
1203 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1204 {
1205         if (skb->protocol == htons(ETH_P_IP))
1206                 return tcp_v4_conn_request(sk, skb);
1207
1208         if (!ipv6_unicast_destination(skb))
1209                 goto drop;
1210
1211         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1212                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1213                 return 0;
1214         }
1215
1216         return tcp_conn_request(&tcp6_request_sock_ops,
1217                                 &tcp_request_sock_ipv6_ops, sk, skb);
1218
1219 drop:
1220         tcp_listendrop(sk);
1221         return 0; /* don't send reset */
1222 }
1223
1224 static void tcp_v6_restore_cb(struct sk_buff *skb)
1225 {
1226         /* We need to move header back to the beginning if xfrm6_policy_check()
1227          * and tcp_v6_fill_cb() are going to be called again.
1228          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1229          */
1230         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1231                 sizeof(struct inet6_skb_parm));
1232 }
1233
1234 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1235                                          struct request_sock *req,
1236                                          struct dst_entry *dst,
1237                                          struct request_sock *req_unhash,
1238                                          bool *own_req)
1239 {
1240         struct inet_request_sock *ireq;
1241         struct ipv6_pinfo *newnp;
1242         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1243         struct ipv6_txoptions *opt;
1244         struct inet_sock *newinet;
1245         bool found_dup_sk = false;
1246         struct tcp_sock *newtp;
1247         struct sock *newsk;
1248 #ifdef CONFIG_TCP_MD5SIG
1249         struct tcp_md5sig_key *key;
1250         int l3index;
1251 #endif
1252         struct flowi6 fl6;
1253
1254         if (skb->protocol == htons(ETH_P_IP)) {
1255                 /*
1256                  *      v6 mapped
1257                  */
1258
1259                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1260                                              req_unhash, own_req);
1261
1262                 if (!newsk)
1263                         return NULL;
1264
1265                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1266
1267                 newnp = tcp_inet6_sk(newsk);
1268                 newtp = tcp_sk(newsk);
1269
1270                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1271
1272                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1273
1274                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1275                 if (sk_is_mptcp(newsk))
1276                         mptcpv6_handle_mapped(newsk, true);
1277                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1278 #ifdef CONFIG_TCP_MD5SIG
1279                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1280 #endif
1281
1282                 newnp->ipv6_mc_list = NULL;
1283                 newnp->ipv6_ac_list = NULL;
1284                 newnp->ipv6_fl_list = NULL;
1285                 newnp->pktoptions  = NULL;
1286                 newnp->opt         = NULL;
1287                 newnp->mcast_oif   = inet_iif(skb);
1288                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1289                 newnp->rcv_flowinfo = 0;
1290                 if (np->repflow)
1291                         newnp->flow_label = 0;
1292
1293                 /*
1294                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1295                  * here, tcp_create_openreq_child now does this for us, see the comment in
1296                  * that function for the gory details. -acme
1297                  */
1298
1299                 /* It is tricky place. Until this moment IPv4 tcp
1300                    worked with IPv6 icsk.icsk_af_ops.
1301                    Sync it now.
1302                  */
1303                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1304
1305                 return newsk;
1306         }
1307
1308         ireq = inet_rsk(req);
1309
1310         if (sk_acceptq_is_full(sk))
1311                 goto out_overflow;
1312
1313         if (!dst) {
1314                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1315                 if (!dst)
1316                         goto out;
1317         }
1318
1319         newsk = tcp_create_openreq_child(sk, req, skb);
1320         if (!newsk)
1321                 goto out_nonewsk;
1322
1323         /*
1324          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1325          * count here, tcp_create_openreq_child now does this for us, see the
1326          * comment in that function for the gory details. -acme
1327          */
1328
1329         newsk->sk_gso_type = SKB_GSO_TCPV6;
1330         ip6_dst_store(newsk, dst, NULL, NULL);
1331         inet6_sk_rx_dst_set(newsk, skb);
1332
1333         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1334
1335         newtp = tcp_sk(newsk);
1336         newinet = inet_sk(newsk);
1337         newnp = tcp_inet6_sk(newsk);
1338
1339         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1340
1341         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1342         newnp->saddr = ireq->ir_v6_loc_addr;
1343         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1344         newsk->sk_bound_dev_if = ireq->ir_iif;
1345
1346         /* Now IPv6 options...
1347
1348            First: no IPv4 options.
1349          */
1350         newinet->inet_opt = NULL;
1351         newnp->ipv6_mc_list = NULL;
1352         newnp->ipv6_ac_list = NULL;
1353         newnp->ipv6_fl_list = NULL;
1354
1355         /* Clone RX bits */
1356         newnp->rxopt.all = np->rxopt.all;
1357
1358         newnp->pktoptions = NULL;
1359         newnp->opt        = NULL;
1360         newnp->mcast_oif  = tcp_v6_iif(skb);
1361         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1362         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1363         if (np->repflow)
1364                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1365
1366         /* Set ToS of the new socket based upon the value of incoming SYN.
1367          * ECT bits are set later in tcp_init_transfer().
1368          */
1369         if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1370                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1371
1372         /* Clone native IPv6 options from listening socket (if any)
1373
1374            Yes, keeping reference count would be much more clever,
1375            but we make one more one thing there: reattach optmem
1376            to newsk.
1377          */
1378         opt = ireq->ipv6_opt;
1379         if (!opt)
1380                 opt = rcu_dereference(np->opt);
1381         if (opt) {
1382                 opt = ipv6_dup_options(newsk, opt);
1383                 RCU_INIT_POINTER(newnp->opt, opt);
1384         }
1385         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1386         if (opt)
1387                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1388                                                     opt->opt_flen;
1389
1390         tcp_ca_openreq_child(newsk, dst);
1391
1392         tcp_sync_mss(newsk, dst_mtu(dst));
1393         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1394
1395         tcp_initialize_rcv_mss(newsk);
1396
1397         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1398         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1399
1400 #ifdef CONFIG_TCP_MD5SIG
1401         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1402
1403         /* Copy over the MD5 key from the original socket */
1404         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1405         if (key) {
1406                 /* We're using one, so create a matching key
1407                  * on the newsk structure. If we fail to get
1408                  * memory, then we end up not copying the key
1409                  * across. Shucks.
1410                  */
1411                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1412                                AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1413                                sk_gfp_mask(sk, GFP_ATOMIC));
1414         }
1415 #endif
1416
1417         if (__inet_inherit_port(sk, newsk) < 0) {
1418                 inet_csk_prepare_forced_close(newsk);
1419                 tcp_done(newsk);
1420                 goto out;
1421         }
1422         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1423                                        &found_dup_sk);
1424         if (*own_req) {
1425                 tcp_move_syn(newtp, req);
1426
1427                 /* Clone pktoptions received with SYN, if we own the req */
1428                 if (ireq->pktopts) {
1429                         newnp->pktoptions = skb_clone(ireq->pktopts,
1430                                                       sk_gfp_mask(sk, GFP_ATOMIC));
1431                         consume_skb(ireq->pktopts);
1432                         ireq->pktopts = NULL;
1433                         if (newnp->pktoptions) {
1434                                 tcp_v6_restore_cb(newnp->pktoptions);
1435                                 skb_set_owner_r(newnp->pktoptions, newsk);
1436                         }
1437                 }
1438         } else {
1439                 if (!req_unhash && found_dup_sk) {
1440                         /* This code path should only be executed in the
1441                          * syncookie case only
1442                          */
1443                         bh_unlock_sock(newsk);
1444                         sock_put(newsk);
1445                         newsk = NULL;
1446                 }
1447         }
1448
1449         return newsk;
1450
1451 out_overflow:
1452         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1453 out_nonewsk:
1454         dst_release(dst);
1455 out:
1456         tcp_listendrop(sk);
1457         return NULL;
1458 }
1459
1460 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1461                                                            u32));
1462 /* The socket must have it's spinlock held when we get
1463  * here, unless it is a TCP_LISTEN socket.
1464  *
1465  * We have a potential double-lock case here, so even when
1466  * doing backlog processing we use the BH locking scheme.
1467  * This is because we cannot sleep with the original spinlock
1468  * held.
1469  */
1470 INDIRECT_CALLABLE_SCOPE
1471 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1472 {
1473         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1474         struct sk_buff *opt_skb = NULL;
1475         struct tcp_sock *tp;
1476
1477         /* Imagine: socket is IPv6. IPv4 packet arrives,
1478            goes to IPv4 receive handler and backlogged.
1479            From backlog it always goes here. Kerboom...
1480            Fortunately, tcp_rcv_established and rcv_established
1481            handle them correctly, but it is not case with
1482            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1483          */
1484
1485         if (skb->protocol == htons(ETH_P_IP))
1486                 return tcp_v4_do_rcv(sk, skb);
1487
1488         /*
1489          *      socket locking is here for SMP purposes as backlog rcv
1490          *      is currently called with bh processing disabled.
1491          */
1492
1493         /* Do Stevens' IPV6_PKTOPTIONS.
1494
1495            Yes, guys, it is the only place in our code, where we
1496            may make it not affecting IPv4.
1497            The rest of code is protocol independent,
1498            and I do not like idea to uglify IPv4.
1499
1500            Actually, all the idea behind IPV6_PKTOPTIONS
1501            looks not very well thought. For now we latch
1502            options, received in the last packet, enqueued
1503            by tcp. Feel free to propose better solution.
1504                                                --ANK (980728)
1505          */
1506         if (np->rxopt.all)
1507                 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1508
1509         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1510                 struct dst_entry *dst;
1511
1512                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1513                                                 lockdep_sock_is_held(sk));
1514
1515                 sock_rps_save_rxhash(sk, skb);
1516                 sk_mark_napi_id(sk, skb);
1517                 if (dst) {
1518                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1519                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1520                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1521                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1522                                 dst_release(dst);
1523                         }
1524                 }
1525
1526                 tcp_rcv_established(sk, skb);
1527                 if (opt_skb)
1528                         goto ipv6_pktoptions;
1529                 return 0;
1530         }
1531
1532         if (tcp_checksum_complete(skb))
1533                 goto csum_err;
1534
1535         if (sk->sk_state == TCP_LISTEN) {
1536                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1537
1538                 if (!nsk)
1539                         goto discard;
1540
1541                 if (nsk != sk) {
1542                         if (tcp_child_process(sk, nsk, skb))
1543                                 goto reset;
1544                         if (opt_skb)
1545                                 __kfree_skb(opt_skb);
1546                         return 0;
1547                 }
1548         } else
1549                 sock_rps_save_rxhash(sk, skb);
1550
1551         if (tcp_rcv_state_process(sk, skb))
1552                 goto reset;
1553         if (opt_skb)
1554                 goto ipv6_pktoptions;
1555         return 0;
1556
1557 reset:
1558         tcp_v6_send_reset(sk, skb);
1559 discard:
1560         if (opt_skb)
1561                 __kfree_skb(opt_skb);
1562         kfree_skb(skb);
1563         return 0;
1564 csum_err:
1565         trace_tcp_bad_csum(skb);
1566         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1567         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1568         goto discard;
1569
1570
1571 ipv6_pktoptions:
1572         /* Do you ask, what is it?
1573
1574            1. skb was enqueued by tcp.
1575            2. skb is added to tail of read queue, rather than out of order.
1576            3. socket is not in passive state.
1577            4. Finally, it really contains options, which user wants to receive.
1578          */
1579         tp = tcp_sk(sk);
1580         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1581             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1582                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1583                         np->mcast_oif = tcp_v6_iif(opt_skb);
1584                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1585                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1586                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1587                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1588                 if (np->repflow)
1589                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1590                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1591                         skb_set_owner_r(opt_skb, sk);
1592                         tcp_v6_restore_cb(opt_skb);
1593                         opt_skb = xchg(&np->pktoptions, opt_skb);
1594                 } else {
1595                         __kfree_skb(opt_skb);
1596                         opt_skb = xchg(&np->pktoptions, NULL);
1597                 }
1598         }
1599
1600         consume_skb(opt_skb);
1601         return 0;
1602 }
1603
1604 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1605                            const struct tcphdr *th)
1606 {
1607         /* This is tricky: we move IP6CB at its correct location into
1608          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1609          * _decode_session6() uses IP6CB().
1610          * barrier() makes sure compiler won't play aliasing games.
1611          */
1612         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1613                 sizeof(struct inet6_skb_parm));
1614         barrier();
1615
1616         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1617         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1618                                     skb->len - th->doff*4);
1619         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1620         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1621         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1622         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1623         TCP_SKB_CB(skb)->sacked = 0;
1624         TCP_SKB_CB(skb)->has_rxtstamp =
1625                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1626 }
1627
1628 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1629 {
1630         int sdif = inet6_sdif(skb);
1631         int dif = inet6_iif(skb);
1632         const struct tcphdr *th;
1633         const struct ipv6hdr *hdr;
1634         bool refcounted;
1635         struct sock *sk;
1636         int ret;
1637         struct net *net = dev_net(skb->dev);
1638
1639         if (skb->pkt_type != PACKET_HOST)
1640                 goto discard_it;
1641
1642         /*
1643          *      Count it even if it's bad.
1644          */
1645         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1646
1647         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1648                 goto discard_it;
1649
1650         th = (const struct tcphdr *)skb->data;
1651
1652         if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1653                 goto bad_packet;
1654         if (!pskb_may_pull(skb, th->doff*4))
1655                 goto discard_it;
1656
1657         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1658                 goto csum_error;
1659
1660         th = (const struct tcphdr *)skb->data;
1661         hdr = ipv6_hdr(skb);
1662
1663 lookup:
1664         sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1665                                 th->source, th->dest, inet6_iif(skb), sdif,
1666                                 &refcounted);
1667         if (!sk)
1668                 goto no_tcp_socket;
1669
1670 process:
1671         if (sk->sk_state == TCP_TIME_WAIT)
1672                 goto do_time_wait;
1673
1674         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1675                 struct request_sock *req = inet_reqsk(sk);
1676                 bool req_stolen = false;
1677                 struct sock *nsk;
1678
1679                 sk = req->rsk_listener;
1680                 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1681                         sk_drops_add(sk, skb);
1682                         reqsk_put(req);
1683                         goto discard_it;
1684                 }
1685                 if (tcp_checksum_complete(skb)) {
1686                         reqsk_put(req);
1687                         goto csum_error;
1688                 }
1689                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1690                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1691                         if (!nsk) {
1692                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1693                                 goto lookup;
1694                         }
1695                         sk = nsk;
1696                         /* reuseport_migrate_sock() has already held one sk_refcnt
1697                          * before returning.
1698                          */
1699                 } else {
1700                         sock_hold(sk);
1701                 }
1702                 refcounted = true;
1703                 nsk = NULL;
1704                 if (!tcp_filter(sk, skb)) {
1705                         th = (const struct tcphdr *)skb->data;
1706                         hdr = ipv6_hdr(skb);
1707                         tcp_v6_fill_cb(skb, hdr, th);
1708                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1709                 }
1710                 if (!nsk) {
1711                         reqsk_put(req);
1712                         if (req_stolen) {
1713                                 /* Another cpu got exclusive access to req
1714                                  * and created a full blown socket.
1715                                  * Try to feed this packet to this socket
1716                                  * instead of discarding it.
1717                                  */
1718                                 tcp_v6_restore_cb(skb);
1719                                 sock_put(sk);
1720                                 goto lookup;
1721                         }
1722                         goto discard_and_relse;
1723                 }
1724                 if (nsk == sk) {
1725                         reqsk_put(req);
1726                         tcp_v6_restore_cb(skb);
1727                 } else if (tcp_child_process(sk, nsk, skb)) {
1728                         tcp_v6_send_reset(nsk, skb);
1729                         goto discard_and_relse;
1730                 } else {
1731                         sock_put(sk);
1732                         return 0;
1733                 }
1734         }
1735
1736         if (static_branch_unlikely(&ip6_min_hopcount)) {
1737                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1738                 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1739                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1740                         goto discard_and_relse;
1741                 }
1742         }
1743
1744         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1745                 goto discard_and_relse;
1746
1747         if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1748                 goto discard_and_relse;
1749
1750         if (tcp_filter(sk, skb))
1751                 goto discard_and_relse;
1752         th = (const struct tcphdr *)skb->data;
1753         hdr = ipv6_hdr(skb);
1754         tcp_v6_fill_cb(skb, hdr, th);
1755
1756         skb->dev = NULL;
1757
1758         if (sk->sk_state == TCP_LISTEN) {
1759                 ret = tcp_v6_do_rcv(sk, skb);
1760                 goto put_and_return;
1761         }
1762
1763         sk_incoming_cpu_update(sk);
1764
1765         sk_defer_free_flush(sk);
1766         bh_lock_sock_nested(sk);
1767         tcp_segs_in(tcp_sk(sk), skb);
1768         ret = 0;
1769         if (!sock_owned_by_user(sk)) {
1770                 ret = tcp_v6_do_rcv(sk, skb);
1771         } else {
1772                 if (tcp_add_backlog(sk, skb))
1773                         goto discard_and_relse;
1774         }
1775         bh_unlock_sock(sk);
1776 put_and_return:
1777         if (refcounted)
1778                 sock_put(sk);
1779         return ret ? -1 : 0;
1780
1781 no_tcp_socket:
1782         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1783                 goto discard_it;
1784
1785         tcp_v6_fill_cb(skb, hdr, th);
1786
1787         if (tcp_checksum_complete(skb)) {
1788 csum_error:
1789                 trace_tcp_bad_csum(skb);
1790                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1791 bad_packet:
1792                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1793         } else {
1794                 tcp_v6_send_reset(NULL, skb);
1795         }
1796
1797 discard_it:
1798         kfree_skb(skb);
1799         return 0;
1800
1801 discard_and_relse:
1802         sk_drops_add(sk, skb);
1803         if (refcounted)
1804                 sock_put(sk);
1805         goto discard_it;
1806
1807 do_time_wait:
1808         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1809                 inet_twsk_put(inet_twsk(sk));
1810                 goto discard_it;
1811         }
1812
1813         tcp_v6_fill_cb(skb, hdr, th);
1814
1815         if (tcp_checksum_complete(skb)) {
1816                 inet_twsk_put(inet_twsk(sk));
1817                 goto csum_error;
1818         }
1819
1820         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1821         case TCP_TW_SYN:
1822         {
1823                 struct sock *sk2;
1824
1825                 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1826                                             skb, __tcp_hdrlen(th),
1827                                             &ipv6_hdr(skb)->saddr, th->source,
1828                                             &ipv6_hdr(skb)->daddr,
1829                                             ntohs(th->dest),
1830                                             tcp_v6_iif_l3_slave(skb),
1831                                             sdif);
1832                 if (sk2) {
1833                         struct inet_timewait_sock *tw = inet_twsk(sk);
1834                         inet_twsk_deschedule_put(tw);
1835                         sk = sk2;
1836                         tcp_v6_restore_cb(skb);
1837                         refcounted = false;
1838                         goto process;
1839                 }
1840         }
1841                 /* to ACK */
1842                 fallthrough;
1843         case TCP_TW_ACK:
1844                 tcp_v6_timewait_ack(sk, skb);
1845                 break;
1846         case TCP_TW_RST:
1847                 tcp_v6_send_reset(sk, skb);
1848                 inet_twsk_deschedule_put(inet_twsk(sk));
1849                 goto discard_it;
1850         case TCP_TW_SUCCESS:
1851                 ;
1852         }
1853         goto discard_it;
1854 }
1855
1856 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1857 {
1858         const struct ipv6hdr *hdr;
1859         const struct tcphdr *th;
1860         struct sock *sk;
1861
1862         if (skb->pkt_type != PACKET_HOST)
1863                 return;
1864
1865         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1866                 return;
1867
1868         hdr = ipv6_hdr(skb);
1869         th = tcp_hdr(skb);
1870
1871         if (th->doff < sizeof(struct tcphdr) / 4)
1872                 return;
1873
1874         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1875         sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1876                                         &hdr->saddr, th->source,
1877                                         &hdr->daddr, ntohs(th->dest),
1878                                         inet6_iif(skb), inet6_sdif(skb));
1879         if (sk) {
1880                 skb->sk = sk;
1881                 skb->destructor = sock_edemux;
1882                 if (sk_fullsock(sk)) {
1883                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1884
1885                         if (dst)
1886                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1887                         if (dst &&
1888                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1889                                 skb_dst_set_noref(skb, dst);
1890                 }
1891         }
1892 }
1893
1894 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1895         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1896         .twsk_unique    = tcp_twsk_unique,
1897         .twsk_destructor = tcp_twsk_destructor,
1898 };
1899
1900 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1901 {
1902         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1903 }
1904
1905 const struct inet_connection_sock_af_ops ipv6_specific = {
1906         .queue_xmit        = inet6_csk_xmit,
1907         .send_check        = tcp_v6_send_check,
1908         .rebuild_header    = inet6_sk_rebuild_header,
1909         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1910         .conn_request      = tcp_v6_conn_request,
1911         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1912         .net_header_len    = sizeof(struct ipv6hdr),
1913         .net_frag_header_len = sizeof(struct frag_hdr),
1914         .setsockopt        = ipv6_setsockopt,
1915         .getsockopt        = ipv6_getsockopt,
1916         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1917         .sockaddr_len      = sizeof(struct sockaddr_in6),
1918         .mtu_reduced       = tcp_v6_mtu_reduced,
1919 };
1920
1921 #ifdef CONFIG_TCP_MD5SIG
1922 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1923         .md5_lookup     =       tcp_v6_md5_lookup,
1924         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1925         .md5_parse      =       tcp_v6_parse_md5_keys,
1926 };
1927 #endif
1928
1929 /*
1930  *      TCP over IPv4 via INET6 API
1931  */
1932 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1933         .queue_xmit        = ip_queue_xmit,
1934         .send_check        = tcp_v4_send_check,
1935         .rebuild_header    = inet_sk_rebuild_header,
1936         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1937         .conn_request      = tcp_v6_conn_request,
1938         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1939         .net_header_len    = sizeof(struct iphdr),
1940         .setsockopt        = ipv6_setsockopt,
1941         .getsockopt        = ipv6_getsockopt,
1942         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1943         .sockaddr_len      = sizeof(struct sockaddr_in6),
1944         .mtu_reduced       = tcp_v4_mtu_reduced,
1945 };
1946
1947 #ifdef CONFIG_TCP_MD5SIG
1948 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1949         .md5_lookup     =       tcp_v4_md5_lookup,
1950         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1951         .md5_parse      =       tcp_v6_parse_md5_keys,
1952 };
1953 #endif
1954
1955 /* NOTE: A lot of things set to zero explicitly by call to
1956  *       sk_alloc() so need not be done here.
1957  */
1958 static int tcp_v6_init_sock(struct sock *sk)
1959 {
1960         struct inet_connection_sock *icsk = inet_csk(sk);
1961
1962         tcp_init_sock(sk);
1963
1964         icsk->icsk_af_ops = &ipv6_specific;
1965
1966 #ifdef CONFIG_TCP_MD5SIG
1967         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1968 #endif
1969
1970         return 0;
1971 }
1972
1973 static void tcp_v6_destroy_sock(struct sock *sk)
1974 {
1975         tcp_v4_destroy_sock(sk);
1976         inet6_destroy_sock(sk);
1977 }
1978
1979 #ifdef CONFIG_PROC_FS
1980 /* Proc filesystem TCPv6 sock list dumping. */
1981 static void get_openreq6(struct seq_file *seq,
1982                          const struct request_sock *req, int i)
1983 {
1984         long ttd = req->rsk_timer.expires - jiffies;
1985         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1986         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1987
1988         if (ttd < 0)
1989                 ttd = 0;
1990
1991         seq_printf(seq,
1992                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1993                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1994                    i,
1995                    src->s6_addr32[0], src->s6_addr32[1],
1996                    src->s6_addr32[2], src->s6_addr32[3],
1997                    inet_rsk(req)->ir_num,
1998                    dest->s6_addr32[0], dest->s6_addr32[1],
1999                    dest->s6_addr32[2], dest->s6_addr32[3],
2000                    ntohs(inet_rsk(req)->ir_rmt_port),
2001                    TCP_SYN_RECV,
2002                    0, 0, /* could print option size, but that is af dependent. */
2003                    1,   /* timers active (only the expire timer) */
2004                    jiffies_to_clock_t(ttd),
2005                    req->num_timeout,
2006                    from_kuid_munged(seq_user_ns(seq),
2007                                     sock_i_uid(req->rsk_listener)),
2008                    0,  /* non standard timer */
2009                    0, /* open_requests have no inode */
2010                    0, req);
2011 }
2012
2013 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2014 {
2015         const struct in6_addr *dest, *src;
2016         __u16 destp, srcp;
2017         int timer_active;
2018         unsigned long timer_expires;
2019         const struct inet_sock *inet = inet_sk(sp);
2020         const struct tcp_sock *tp = tcp_sk(sp);
2021         const struct inet_connection_sock *icsk = inet_csk(sp);
2022         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2023         int rx_queue;
2024         int state;
2025
2026         dest  = &sp->sk_v6_daddr;
2027         src   = &sp->sk_v6_rcv_saddr;
2028         destp = ntohs(inet->inet_dport);
2029         srcp  = ntohs(inet->inet_sport);
2030
2031         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2032             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2033             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2034                 timer_active    = 1;
2035                 timer_expires   = icsk->icsk_timeout;
2036         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2037                 timer_active    = 4;
2038                 timer_expires   = icsk->icsk_timeout;
2039         } else if (timer_pending(&sp->sk_timer)) {
2040                 timer_active    = 2;
2041                 timer_expires   = sp->sk_timer.expires;
2042         } else {
2043                 timer_active    = 0;
2044                 timer_expires = jiffies;
2045         }
2046
2047         state = inet_sk_state_load(sp);
2048         if (state == TCP_LISTEN)
2049                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2050         else
2051                 /* Because we don't lock the socket,
2052                  * we might find a transient negative value.
2053                  */
2054                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2055                                       READ_ONCE(tp->copied_seq), 0);
2056
2057         seq_printf(seq,
2058                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2059                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2060                    i,
2061                    src->s6_addr32[0], src->s6_addr32[1],
2062                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2063                    dest->s6_addr32[0], dest->s6_addr32[1],
2064                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2065                    state,
2066                    READ_ONCE(tp->write_seq) - tp->snd_una,
2067                    rx_queue,
2068                    timer_active,
2069                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2070                    icsk->icsk_retransmits,
2071                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2072                    icsk->icsk_probes_out,
2073                    sock_i_ino(sp),
2074                    refcount_read(&sp->sk_refcnt), sp,
2075                    jiffies_to_clock_t(icsk->icsk_rto),
2076                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2077                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2078                    tp->snd_cwnd,
2079                    state == TCP_LISTEN ?
2080                         fastopenq->max_qlen :
2081                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2082                    );
2083 }
2084
2085 static void get_timewait6_sock(struct seq_file *seq,
2086                                struct inet_timewait_sock *tw, int i)
2087 {
2088         long delta = tw->tw_timer.expires - jiffies;
2089         const struct in6_addr *dest, *src;
2090         __u16 destp, srcp;
2091
2092         dest = &tw->tw_v6_daddr;
2093         src  = &tw->tw_v6_rcv_saddr;
2094         destp = ntohs(tw->tw_dport);
2095         srcp  = ntohs(tw->tw_sport);
2096
2097         seq_printf(seq,
2098                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2099                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2100                    i,
2101                    src->s6_addr32[0], src->s6_addr32[1],
2102                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2103                    dest->s6_addr32[0], dest->s6_addr32[1],
2104                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2105                    tw->tw_substate, 0, 0,
2106                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2107                    refcount_read(&tw->tw_refcnt), tw);
2108 }
2109
2110 static int tcp6_seq_show(struct seq_file *seq, void *v)
2111 {
2112         struct tcp_iter_state *st;
2113         struct sock *sk = v;
2114
2115         if (v == SEQ_START_TOKEN) {
2116                 seq_puts(seq,
2117                          "  sl  "
2118                          "local_address                         "
2119                          "remote_address                        "
2120                          "st tx_queue rx_queue tr tm->when retrnsmt"
2121                          "   uid  timeout inode\n");
2122                 goto out;
2123         }
2124         st = seq->private;
2125
2126         if (sk->sk_state == TCP_TIME_WAIT)
2127                 get_timewait6_sock(seq, v, st->num);
2128         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2129                 get_openreq6(seq, v, st->num);
2130         else
2131                 get_tcp6_sock(seq, v, st->num);
2132 out:
2133         return 0;
2134 }
2135
2136 static const struct seq_operations tcp6_seq_ops = {
2137         .show           = tcp6_seq_show,
2138         .start          = tcp_seq_start,
2139         .next           = tcp_seq_next,
2140         .stop           = tcp_seq_stop,
2141 };
2142
2143 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2144         .family         = AF_INET6,
2145 };
2146
2147 int __net_init tcp6_proc_init(struct net *net)
2148 {
2149         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2150                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2151                 return -ENOMEM;
2152         return 0;
2153 }
2154
2155 void tcp6_proc_exit(struct net *net)
2156 {
2157         remove_proc_entry("tcp6", net->proc_net);
2158 }
2159 #endif
2160
2161 struct proto tcpv6_prot = {
2162         .name                   = "TCPv6",
2163         .owner                  = THIS_MODULE,
2164         .close                  = tcp_close,
2165         .pre_connect            = tcp_v6_pre_connect,
2166         .connect                = tcp_v6_connect,
2167         .disconnect             = tcp_disconnect,
2168         .accept                 = inet_csk_accept,
2169         .ioctl                  = tcp_ioctl,
2170         .init                   = tcp_v6_init_sock,
2171         .destroy                = tcp_v6_destroy_sock,
2172         .shutdown               = tcp_shutdown,
2173         .setsockopt             = tcp_setsockopt,
2174         .getsockopt             = tcp_getsockopt,
2175         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2176         .keepalive              = tcp_set_keepalive,
2177         .recvmsg                = tcp_recvmsg,
2178         .sendmsg                = tcp_sendmsg,
2179         .sendpage               = tcp_sendpage,
2180         .backlog_rcv            = tcp_v6_do_rcv,
2181         .release_cb             = tcp_release_cb,
2182         .hash                   = inet6_hash,
2183         .unhash                 = inet_unhash,
2184         .get_port               = inet_csk_get_port,
2185         .put_port               = inet_put_port,
2186 #ifdef CONFIG_BPF_SYSCALL
2187         .psock_update_sk_prot   = tcp_bpf_update_proto,
2188 #endif
2189         .enter_memory_pressure  = tcp_enter_memory_pressure,
2190         .leave_memory_pressure  = tcp_leave_memory_pressure,
2191         .stream_memory_free     = tcp_stream_memory_free,
2192         .sockets_allocated      = &tcp_sockets_allocated,
2193         .memory_allocated       = &tcp_memory_allocated,
2194         .memory_pressure        = &tcp_memory_pressure,
2195         .orphan_count           = &tcp_orphan_count,
2196         .sysctl_mem             = sysctl_tcp_mem,
2197         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2198         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2199         .max_header             = MAX_TCP_HEADER,
2200         .obj_size               = sizeof(struct tcp6_sock),
2201         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2202         .twsk_prot              = &tcp6_timewait_sock_ops,
2203         .rsk_prot               = &tcp6_request_sock_ops,
2204         .h.hashinfo             = &tcp_hashinfo,
2205         .no_autobind            = true,
2206         .diag_destroy           = tcp_abort,
2207 };
2208 EXPORT_SYMBOL_GPL(tcpv6_prot);
2209
2210 /* thinking of making this const? Don't.
2211  * early_demux can change based on sysctl.
2212  */
2213 static struct inet6_protocol tcpv6_protocol = {
2214         .early_demux    =       tcp_v6_early_demux,
2215         .early_demux_handler =  tcp_v6_early_demux,
2216         .handler        =       tcp_v6_rcv,
2217         .err_handler    =       tcp_v6_err,
2218         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2219 };
2220
2221 static struct inet_protosw tcpv6_protosw = {
2222         .type           =       SOCK_STREAM,
2223         .protocol       =       IPPROTO_TCP,
2224         .prot           =       &tcpv6_prot,
2225         .ops            =       &inet6_stream_ops,
2226         .flags          =       INET_PROTOSW_PERMANENT |
2227                                 INET_PROTOSW_ICSK,
2228 };
2229
2230 static int __net_init tcpv6_net_init(struct net *net)
2231 {
2232         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2233                                     SOCK_RAW, IPPROTO_TCP, net);
2234 }
2235
2236 static void __net_exit tcpv6_net_exit(struct net *net)
2237 {
2238         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2239 }
2240
2241 static struct pernet_operations tcpv6_net_ops = {
2242         .init       = tcpv6_net_init,
2243         .exit       = tcpv6_net_exit,
2244 };
2245
2246 int __init tcpv6_init(void)
2247 {
2248         int ret;
2249
2250         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2251         if (ret)
2252                 goto out;
2253
2254         /* register inet6 protocol */
2255         ret = inet6_register_protosw(&tcpv6_protosw);
2256         if (ret)
2257                 goto out_tcpv6_protocol;
2258
2259         ret = register_pernet_subsys(&tcpv6_net_ops);
2260         if (ret)
2261                 goto out_tcpv6_protosw;
2262
2263         ret = mptcpv6_init();
2264         if (ret)
2265                 goto out_tcpv6_pernet_subsys;
2266
2267 out:
2268         return ret;
2269
2270 out_tcpv6_pernet_subsys:
2271         unregister_pernet_subsys(&tcpv6_net_ops);
2272 out_tcpv6_protosw:
2273         inet6_unregister_protosw(&tcpv6_protosw);
2274 out_tcpv6_protocol:
2275         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2276         goto out;
2277 }
2278
2279 void tcpv6_exit(void)
2280 {
2281         unregister_pernet_subsys(&tcpv6_net_ops);
2282         inet6_unregister_protosw(&tcpv6_protosw);
2283         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2284 }