f2fs: Provide a splice-read wrapper
[linux-block.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 sk->sk_rx_dst_ifindex = skb->skb_iif;
112                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_connection_sock *icsk = inet_csk(sk);
150         struct in6_addr *saddr = NULL, *final_p, final;
151         struct inet_timewait_death_row *tcp_death_row;
152         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153         struct inet_sock *inet = inet_sk(sk);
154         struct tcp_sock *tp = tcp_sk(sk);
155         struct net *net = sock_net(sk);
156         struct ipv6_txoptions *opt;
157         struct dst_entry *dst;
158         struct flowi6 fl6;
159         int addr_type;
160         int err;
161
162         if (addr_len < SIN6_LEN_RFC2133)
163                 return -EINVAL;
164
165         if (usin->sin6_family != AF_INET6)
166                 return -EAFNOSUPPORT;
167
168         memset(&fl6, 0, sizeof(fl6));
169
170         if (np->sndflow) {
171                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
172                 IP6_ECN_flow_init(fl6.flowlabel);
173                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
174                         struct ip6_flowlabel *flowlabel;
175                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
176                         if (IS_ERR(flowlabel))
177                                 return -EINVAL;
178                         fl6_sock_release(flowlabel);
179                 }
180         }
181
182         /*
183          *      connect() to INADDR_ANY means loopback (BSD'ism).
184          */
185
186         if (ipv6_addr_any(&usin->sin6_addr)) {
187                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
188                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
189                                                &usin->sin6_addr);
190                 else
191                         usin->sin6_addr = in6addr_loopback;
192         }
193
194         addr_type = ipv6_addr_type(&usin->sin6_addr);
195
196         if (addr_type & IPV6_ADDR_MULTICAST)
197                 return -ENETUNREACH;
198
199         if (addr_type&IPV6_ADDR_LINKLOCAL) {
200                 if (addr_len >= sizeof(struct sockaddr_in6) &&
201                     usin->sin6_scope_id) {
202                         /* If interface is set while binding, indices
203                          * must coincide.
204                          */
205                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
206                                 return -EINVAL;
207
208                         sk->sk_bound_dev_if = usin->sin6_scope_id;
209                 }
210
211                 /* Connect to link-local address requires an interface */
212                 if (!sk->sk_bound_dev_if)
213                         return -EINVAL;
214         }
215
216         if (tp->rx_opt.ts_recent_stamp &&
217             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
218                 tp->rx_opt.ts_recent = 0;
219                 tp->rx_opt.ts_recent_stamp = 0;
220                 WRITE_ONCE(tp->write_seq, 0);
221         }
222
223         sk->sk_v6_daddr = usin->sin6_addr;
224         np->flow_label = fl6.flowlabel;
225
226         /*
227          *      TCP over IPv4
228          */
229
230         if (addr_type & IPV6_ADDR_MAPPED) {
231                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
232                 struct sockaddr_in sin;
233
234                 if (ipv6_only_sock(sk))
235                         return -ENETUNREACH;
236
237                 sin.sin_family = AF_INET;
238                 sin.sin_port = usin->sin6_port;
239                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
240
241                 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
242                 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
243                 if (sk_is_mptcp(sk))
244                         mptcpv6_handle_mapped(sk, true);
245                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
246 #ifdef CONFIG_TCP_MD5SIG
247                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
248 #endif
249
250                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
251
252                 if (err) {
253                         icsk->icsk_ext_hdr_len = exthdrlen;
254                         /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
255                         WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
256                         if (sk_is_mptcp(sk))
257                                 mptcpv6_handle_mapped(sk, false);
258                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
259 #ifdef CONFIG_TCP_MD5SIG
260                         tp->af_specific = &tcp_sock_ipv6_specific;
261 #endif
262                         goto failure;
263                 }
264                 np->saddr = sk->sk_v6_rcv_saddr;
265
266                 return err;
267         }
268
269         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
270                 saddr = &sk->sk_v6_rcv_saddr;
271
272         fl6.flowi6_proto = IPPROTO_TCP;
273         fl6.daddr = sk->sk_v6_daddr;
274         fl6.saddr = saddr ? *saddr : np->saddr;
275         fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
276         fl6.flowi6_oif = sk->sk_bound_dev_if;
277         fl6.flowi6_mark = sk->sk_mark;
278         fl6.fl6_dport = usin->sin6_port;
279         fl6.fl6_sport = inet->inet_sport;
280         fl6.flowi6_uid = sk->sk_uid;
281
282         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
283         final_p = fl6_update_dst(&fl6, opt, &final);
284
285         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
286
287         dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
288         if (IS_ERR(dst)) {
289                 err = PTR_ERR(dst);
290                 goto failure;
291         }
292
293         tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
294
295         if (!saddr) {
296                 saddr = &fl6.saddr;
297
298                 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
299                 if (err)
300                         goto failure;
301         }
302
303         /* set the source address */
304         np->saddr = *saddr;
305         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
306
307         sk->sk_gso_type = SKB_GSO_TCPV6;
308         ip6_dst_store(sk, dst, NULL, NULL);
309
310         icsk->icsk_ext_hdr_len = 0;
311         if (opt)
312                 icsk->icsk_ext_hdr_len = opt->opt_flen +
313                                          opt->opt_nflen;
314
315         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
316
317         inet->inet_dport = usin->sin6_port;
318
319         tcp_set_state(sk, TCP_SYN_SENT);
320         err = inet6_hash_connect(tcp_death_row, sk);
321         if (err)
322                 goto late_failure;
323
324         sk_set_txhash(sk);
325
326         if (likely(!tp->repair)) {
327                 if (!tp->write_seq)
328                         WRITE_ONCE(tp->write_seq,
329                                    secure_tcpv6_seq(np->saddr.s6_addr32,
330                                                     sk->sk_v6_daddr.s6_addr32,
331                                                     inet->inet_sport,
332                                                     inet->inet_dport));
333                 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
334                                                    sk->sk_v6_daddr.s6_addr32);
335         }
336
337         if (tcp_fastopen_defer_connect(sk, &err))
338                 return err;
339         if (err)
340                 goto late_failure;
341
342         err = tcp_connect(sk);
343         if (err)
344                 goto late_failure;
345
346         return 0;
347
348 late_failure:
349         tcp_set_state(sk, TCP_CLOSE);
350         inet_bhash2_reset_saddr(sk);
351 failure:
352         inet->inet_dport = 0;
353         sk->sk_route_caps = 0;
354         return err;
355 }
356
357 static void tcp_v6_mtu_reduced(struct sock *sk)
358 {
359         struct dst_entry *dst;
360         u32 mtu;
361
362         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
363                 return;
364
365         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
366
367         /* Drop requests trying to increase our current mss.
368          * Check done in __ip6_rt_update_pmtu() is too late.
369          */
370         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
371                 return;
372
373         dst = inet6_csk_update_pmtu(sk, mtu);
374         if (!dst)
375                 return;
376
377         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
378                 tcp_sync_mss(sk, dst_mtu(dst));
379                 tcp_simple_retransmit(sk);
380         }
381 }
382
383 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
384                 u8 type, u8 code, int offset, __be32 info)
385 {
386         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
387         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
388         struct net *net = dev_net(skb->dev);
389         struct request_sock *fastopen;
390         struct ipv6_pinfo *np;
391         struct tcp_sock *tp;
392         __u32 seq, snd_una;
393         struct sock *sk;
394         bool fatal;
395         int err;
396
397         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
398                                         &hdr->daddr, th->dest,
399                                         &hdr->saddr, ntohs(th->source),
400                                         skb->dev->ifindex, inet6_sdif(skb));
401
402         if (!sk) {
403                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
404                                   ICMP6_MIB_INERRORS);
405                 return -ENOENT;
406         }
407
408         if (sk->sk_state == TCP_TIME_WAIT) {
409                 inet_twsk_put(inet_twsk(sk));
410                 return 0;
411         }
412         seq = ntohl(th->seq);
413         fatal = icmpv6_err_convert(type, code, &err);
414         if (sk->sk_state == TCP_NEW_SYN_RECV) {
415                 tcp_req_err(sk, seq, fatal);
416                 return 0;
417         }
418
419         bh_lock_sock(sk);
420         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
421                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
422
423         if (sk->sk_state == TCP_CLOSE)
424                 goto out;
425
426         if (static_branch_unlikely(&ip6_min_hopcount)) {
427                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
428                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
429                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
430                         goto out;
431                 }
432         }
433
434         tp = tcp_sk(sk);
435         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
436         fastopen = rcu_dereference(tp->fastopen_rsk);
437         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
438         if (sk->sk_state != TCP_LISTEN &&
439             !between(seq, snd_una, tp->snd_nxt)) {
440                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
441                 goto out;
442         }
443
444         np = tcp_inet6_sk(sk);
445
446         if (type == NDISC_REDIRECT) {
447                 if (!sock_owned_by_user(sk)) {
448                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
449
450                         if (dst)
451                                 dst->ops->redirect(dst, sk, skb);
452                 }
453                 goto out;
454         }
455
456         if (type == ICMPV6_PKT_TOOBIG) {
457                 u32 mtu = ntohl(info);
458
459                 /* We are not interested in TCP_LISTEN and open_requests
460                  * (SYN-ACKs send out by Linux are always <576bytes so
461                  * they should go through unfragmented).
462                  */
463                 if (sk->sk_state == TCP_LISTEN)
464                         goto out;
465
466                 if (!ip6_sk_accept_pmtu(sk))
467                         goto out;
468
469                 if (mtu < IPV6_MIN_MTU)
470                         goto out;
471
472                 WRITE_ONCE(tp->mtu_info, mtu);
473
474                 if (!sock_owned_by_user(sk))
475                         tcp_v6_mtu_reduced(sk);
476                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
477                                            &sk->sk_tsq_flags))
478                         sock_hold(sk);
479                 goto out;
480         }
481
482
483         /* Might be for an request_sock */
484         switch (sk->sk_state) {
485         case TCP_SYN_SENT:
486         case TCP_SYN_RECV:
487                 /* Only in fast or simultaneous open. If a fast open socket is
488                  * already accepted it is treated as a connected one below.
489                  */
490                 if (fastopen && !fastopen->sk)
491                         break;
492
493                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
494
495                 if (!sock_owned_by_user(sk)) {
496                         WRITE_ONCE(sk->sk_err, err);
497                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
498
499                         tcp_done(sk);
500                 } else {
501                         WRITE_ONCE(sk->sk_err_soft, err);
502                 }
503                 goto out;
504         case TCP_LISTEN:
505                 break;
506         default:
507                 /* check if this ICMP message allows revert of backoff.
508                  * (see RFC 6069)
509                  */
510                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
511                     code == ICMPV6_NOROUTE)
512                         tcp_ld_RTO_revert(sk, seq);
513         }
514
515         if (!sock_owned_by_user(sk) && np->recverr) {
516                 WRITE_ONCE(sk->sk_err, err);
517                 sk_error_report(sk);
518         } else {
519                 WRITE_ONCE(sk->sk_err_soft, err);
520         }
521 out:
522         bh_unlock_sock(sk);
523         sock_put(sk);
524         return 0;
525 }
526
527
528 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
529                               struct flowi *fl,
530                               struct request_sock *req,
531                               struct tcp_fastopen_cookie *foc,
532                               enum tcp_synack_type synack_type,
533                               struct sk_buff *syn_skb)
534 {
535         struct inet_request_sock *ireq = inet_rsk(req);
536         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
537         struct ipv6_txoptions *opt;
538         struct flowi6 *fl6 = &fl->u.ip6;
539         struct sk_buff *skb;
540         int err = -ENOMEM;
541         u8 tclass;
542
543         /* First, grab a route. */
544         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
545                                                IPPROTO_TCP)) == NULL)
546                 goto done;
547
548         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
549
550         if (skb) {
551                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
552                                     &ireq->ir_v6_rmt_addr);
553
554                 fl6->daddr = ireq->ir_v6_rmt_addr;
555                 if (np->repflow && ireq->pktopts)
556                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
557
558                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
559                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
560                                 (np->tclass & INET_ECN_MASK) :
561                                 np->tclass;
562
563                 if (!INET_ECN_is_capable(tclass) &&
564                     tcp_bpf_ca_needs_ecn((struct sock *)req))
565                         tclass |= INET_ECN_ECT_0;
566
567                 rcu_read_lock();
568                 opt = ireq->ipv6_opt;
569                 if (!opt)
570                         opt = rcu_dereference(np->opt);
571                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
572                                tclass, sk->sk_priority);
573                 rcu_read_unlock();
574                 err = net_xmit_eval(err);
575         }
576
577 done:
578         return err;
579 }
580
581
582 static void tcp_v6_reqsk_destructor(struct request_sock *req)
583 {
584         kfree(inet_rsk(req)->ipv6_opt);
585         consume_skb(inet_rsk(req)->pktopts);
586 }
587
588 #ifdef CONFIG_TCP_MD5SIG
589 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
590                                                    const struct in6_addr *addr,
591                                                    int l3index)
592 {
593         return tcp_md5_do_lookup(sk, l3index,
594                                  (union tcp_md5_addr *)addr, AF_INET6);
595 }
596
597 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
598                                                 const struct sock *addr_sk)
599 {
600         int l3index;
601
602         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
603                                                  addr_sk->sk_bound_dev_if);
604         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
605                                     l3index);
606 }
607
608 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
609                                  sockptr_t optval, int optlen)
610 {
611         struct tcp_md5sig cmd;
612         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
613         int l3index = 0;
614         u8 prefixlen;
615         u8 flags;
616
617         if (optlen < sizeof(cmd))
618                 return -EINVAL;
619
620         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
621                 return -EFAULT;
622
623         if (sin6->sin6_family != AF_INET6)
624                 return -EINVAL;
625
626         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
627
628         if (optname == TCP_MD5SIG_EXT &&
629             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
630                 prefixlen = cmd.tcpm_prefixlen;
631                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
632                                         prefixlen > 32))
633                         return -EINVAL;
634         } else {
635                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
636         }
637
638         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
639             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
640                 struct net_device *dev;
641
642                 rcu_read_lock();
643                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
644                 if (dev && netif_is_l3_master(dev))
645                         l3index = dev->ifindex;
646                 rcu_read_unlock();
647
648                 /* ok to reference set/not set outside of rcu;
649                  * right now device MUST be an L3 master
650                  */
651                 if (!dev || !l3index)
652                         return -EINVAL;
653         }
654
655         if (!cmd.tcpm_keylen) {
656                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
657                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
658                                               AF_INET, prefixlen,
659                                               l3index, flags);
660                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
661                                       AF_INET6, prefixlen, l3index, flags);
662         }
663
664         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
665                 return -EINVAL;
666
667         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
668                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
669                                       AF_INET, prefixlen, l3index, flags,
670                                       cmd.tcpm_key, cmd.tcpm_keylen);
671
672         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
673                               AF_INET6, prefixlen, l3index, flags,
674                               cmd.tcpm_key, cmd.tcpm_keylen);
675 }
676
677 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
678                                    const struct in6_addr *daddr,
679                                    const struct in6_addr *saddr,
680                                    const struct tcphdr *th, int nbytes)
681 {
682         struct tcp6_pseudohdr *bp;
683         struct scatterlist sg;
684         struct tcphdr *_th;
685
686         bp = hp->scratch;
687         /* 1. TCP pseudo-header (RFC2460) */
688         bp->saddr = *saddr;
689         bp->daddr = *daddr;
690         bp->protocol = cpu_to_be32(IPPROTO_TCP);
691         bp->len = cpu_to_be32(nbytes);
692
693         _th = (struct tcphdr *)(bp + 1);
694         memcpy(_th, th, sizeof(*th));
695         _th->check = 0;
696
697         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
698         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
699                                 sizeof(*bp) + sizeof(*th));
700         return crypto_ahash_update(hp->md5_req);
701 }
702
703 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
704                                const struct in6_addr *daddr, struct in6_addr *saddr,
705                                const struct tcphdr *th)
706 {
707         struct tcp_md5sig_pool *hp;
708         struct ahash_request *req;
709
710         hp = tcp_get_md5sig_pool();
711         if (!hp)
712                 goto clear_hash_noput;
713         req = hp->md5_req;
714
715         if (crypto_ahash_init(req))
716                 goto clear_hash;
717         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
718                 goto clear_hash;
719         if (tcp_md5_hash_key(hp, key))
720                 goto clear_hash;
721         ahash_request_set_crypt(req, NULL, md5_hash, 0);
722         if (crypto_ahash_final(req))
723                 goto clear_hash;
724
725         tcp_put_md5sig_pool();
726         return 0;
727
728 clear_hash:
729         tcp_put_md5sig_pool();
730 clear_hash_noput:
731         memset(md5_hash, 0, 16);
732         return 1;
733 }
734
735 static int tcp_v6_md5_hash_skb(char *md5_hash,
736                                const struct tcp_md5sig_key *key,
737                                const struct sock *sk,
738                                const struct sk_buff *skb)
739 {
740         const struct in6_addr *saddr, *daddr;
741         struct tcp_md5sig_pool *hp;
742         struct ahash_request *req;
743         const struct tcphdr *th = tcp_hdr(skb);
744
745         if (sk) { /* valid for establish/request sockets */
746                 saddr = &sk->sk_v6_rcv_saddr;
747                 daddr = &sk->sk_v6_daddr;
748         } else {
749                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
750                 saddr = &ip6h->saddr;
751                 daddr = &ip6h->daddr;
752         }
753
754         hp = tcp_get_md5sig_pool();
755         if (!hp)
756                 goto clear_hash_noput;
757         req = hp->md5_req;
758
759         if (crypto_ahash_init(req))
760                 goto clear_hash;
761
762         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
763                 goto clear_hash;
764         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
765                 goto clear_hash;
766         if (tcp_md5_hash_key(hp, key))
767                 goto clear_hash;
768         ahash_request_set_crypt(req, NULL, md5_hash, 0);
769         if (crypto_ahash_final(req))
770                 goto clear_hash;
771
772         tcp_put_md5sig_pool();
773         return 0;
774
775 clear_hash:
776         tcp_put_md5sig_pool();
777 clear_hash_noput:
778         memset(md5_hash, 0, 16);
779         return 1;
780 }
781
782 #endif
783
784 static void tcp_v6_init_req(struct request_sock *req,
785                             const struct sock *sk_listener,
786                             struct sk_buff *skb)
787 {
788         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
789         struct inet_request_sock *ireq = inet_rsk(req);
790         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
791
792         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
793         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
794
795         /* So that link locals have meaning */
796         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
797             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
798                 ireq->ir_iif = tcp_v6_iif(skb);
799
800         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
801             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
802              np->rxopt.bits.rxinfo ||
803              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
804              np->rxopt.bits.rxohlim || np->repflow)) {
805                 refcount_inc(&skb->users);
806                 ireq->pktopts = skb;
807         }
808 }
809
810 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
811                                           struct sk_buff *skb,
812                                           struct flowi *fl,
813                                           struct request_sock *req)
814 {
815         tcp_v6_init_req(req, sk, skb);
816
817         if (security_inet_conn_request(sk, skb, req))
818                 return NULL;
819
820         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
821 }
822
823 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
824         .family         =       AF_INET6,
825         .obj_size       =       sizeof(struct tcp6_request_sock),
826         .rtx_syn_ack    =       tcp_rtx_synack,
827         .send_ack       =       tcp_v6_reqsk_send_ack,
828         .destructor     =       tcp_v6_reqsk_destructor,
829         .send_reset     =       tcp_v6_send_reset,
830         .syn_ack_timeout =      tcp_syn_ack_timeout,
831 };
832
833 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
834         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
835                                 sizeof(struct ipv6hdr),
836 #ifdef CONFIG_TCP_MD5SIG
837         .req_md5_lookup =       tcp_v6_md5_lookup,
838         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
839 #endif
840 #ifdef CONFIG_SYN_COOKIES
841         .cookie_init_seq =      cookie_v6_init_sequence,
842 #endif
843         .route_req      =       tcp_v6_route_req,
844         .init_seq       =       tcp_v6_init_seq,
845         .init_ts_off    =       tcp_v6_init_ts_off,
846         .send_synack    =       tcp_v6_send_synack,
847 };
848
849 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
850                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
851                                  int oif, struct tcp_md5sig_key *key, int rst,
852                                  u8 tclass, __be32 label, u32 priority, u32 txhash)
853 {
854         const struct tcphdr *th = tcp_hdr(skb);
855         struct tcphdr *t1;
856         struct sk_buff *buff;
857         struct flowi6 fl6;
858         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
859         struct sock *ctl_sk = net->ipv6.tcp_sk;
860         unsigned int tot_len = sizeof(struct tcphdr);
861         __be32 mrst = 0, *topt;
862         struct dst_entry *dst;
863         __u32 mark = 0;
864
865         if (tsecr)
866                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
867 #ifdef CONFIG_TCP_MD5SIG
868         if (key)
869                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
870 #endif
871
872 #ifdef CONFIG_MPTCP
873         if (rst && !key) {
874                 mrst = mptcp_reset_option(skb);
875
876                 if (mrst)
877                         tot_len += sizeof(__be32);
878         }
879 #endif
880
881         buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
882         if (!buff)
883                 return;
884
885         skb_reserve(buff, MAX_TCP_HEADER);
886
887         t1 = skb_push(buff, tot_len);
888         skb_reset_transport_header(buff);
889
890         /* Swap the send and the receive. */
891         memset(t1, 0, sizeof(*t1));
892         t1->dest = th->source;
893         t1->source = th->dest;
894         t1->doff = tot_len / 4;
895         t1->seq = htonl(seq);
896         t1->ack_seq = htonl(ack);
897         t1->ack = !rst || !th->ack;
898         t1->rst = rst;
899         t1->window = htons(win);
900
901         topt = (__be32 *)(t1 + 1);
902
903         if (tsecr) {
904                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
905                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
906                 *topt++ = htonl(tsval);
907                 *topt++ = htonl(tsecr);
908         }
909
910         if (mrst)
911                 *topt++ = mrst;
912
913 #ifdef CONFIG_TCP_MD5SIG
914         if (key) {
915                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
916                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
917                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
918                                     &ipv6_hdr(skb)->saddr,
919                                     &ipv6_hdr(skb)->daddr, t1);
920         }
921 #endif
922
923         memset(&fl6, 0, sizeof(fl6));
924         fl6.daddr = ipv6_hdr(skb)->saddr;
925         fl6.saddr = ipv6_hdr(skb)->daddr;
926         fl6.flowlabel = label;
927
928         buff->ip_summed = CHECKSUM_PARTIAL;
929
930         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
931
932         fl6.flowi6_proto = IPPROTO_TCP;
933         if (rt6_need_strict(&fl6.daddr) && !oif)
934                 fl6.flowi6_oif = tcp_v6_iif(skb);
935         else {
936                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
937                         oif = skb->skb_iif;
938
939                 fl6.flowi6_oif = oif;
940         }
941
942         if (sk) {
943                 if (sk->sk_state == TCP_TIME_WAIT)
944                         mark = inet_twsk(sk)->tw_mark;
945                 else
946                         mark = sk->sk_mark;
947                 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
948         }
949         if (txhash) {
950                 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
951                 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
952         }
953         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
954         fl6.fl6_dport = t1->dest;
955         fl6.fl6_sport = t1->source;
956         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
957         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
958
959         /* Pass a socket to ip6_dst_lookup either it is for RST
960          * Underlying function will use this to retrieve the network
961          * namespace
962          */
963         if (sk && sk->sk_state != TCP_TIME_WAIT)
964                 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
965         else
966                 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
967         if (!IS_ERR(dst)) {
968                 skb_dst_set(buff, dst);
969                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
970                          tclass & ~INET_ECN_MASK, priority);
971                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
972                 if (rst)
973                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
974                 return;
975         }
976
977         kfree_skb(buff);
978 }
979
980 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
981 {
982         const struct tcphdr *th = tcp_hdr(skb);
983         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
984         u32 seq = 0, ack_seq = 0;
985         struct tcp_md5sig_key *key = NULL;
986 #ifdef CONFIG_TCP_MD5SIG
987         const __u8 *hash_location = NULL;
988         unsigned char newhash[16];
989         int genhash;
990         struct sock *sk1 = NULL;
991 #endif
992         __be32 label = 0;
993         u32 priority = 0;
994         struct net *net;
995         u32 txhash = 0;
996         int oif = 0;
997
998         if (th->rst)
999                 return;
1000
1001         /* If sk not NULL, it means we did a successful lookup and incoming
1002          * route had to be correct. prequeue might have dropped our dst.
1003          */
1004         if (!sk && !ipv6_unicast_destination(skb))
1005                 return;
1006
1007         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1008 #ifdef CONFIG_TCP_MD5SIG
1009         rcu_read_lock();
1010         hash_location = tcp_parse_md5sig_option(th);
1011         if (sk && sk_fullsock(sk)) {
1012                 int l3index;
1013
1014                 /* sdif set, means packet ingressed via a device
1015                  * in an L3 domain and inet_iif is set to it.
1016                  */
1017                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1018                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1019         } else if (hash_location) {
1020                 int dif = tcp_v6_iif_l3_slave(skb);
1021                 int sdif = tcp_v6_sdif(skb);
1022                 int l3index;
1023
1024                 /*
1025                  * active side is lost. Try to find listening socket through
1026                  * source port, and then find md5 key through listening socket.
1027                  * we are not loose security here:
1028                  * Incoming packet is checked with md5 hash with finding key,
1029                  * no RST generated if md5 hash doesn't match.
1030                  */
1031                 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1032                                             NULL, 0, &ipv6h->saddr, th->source,
1033                                             &ipv6h->daddr, ntohs(th->source),
1034                                             dif, sdif);
1035                 if (!sk1)
1036                         goto out;
1037
1038                 /* sdif set, means packet ingressed via a device
1039                  * in an L3 domain and dif is set to it.
1040                  */
1041                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1042
1043                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1044                 if (!key)
1045                         goto out;
1046
1047                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1048                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1049                         goto out;
1050         }
1051 #endif
1052
1053         if (th->ack)
1054                 seq = ntohl(th->ack_seq);
1055         else
1056                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1057                           (th->doff << 2);
1058
1059         if (sk) {
1060                 oif = sk->sk_bound_dev_if;
1061                 if (sk_fullsock(sk)) {
1062                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1063
1064                         trace_tcp_send_reset(sk, skb);
1065                         if (np->repflow)
1066                                 label = ip6_flowlabel(ipv6h);
1067                         priority = sk->sk_priority;
1068                         txhash = sk->sk_txhash;
1069                 }
1070                 if (sk->sk_state == TCP_TIME_WAIT) {
1071                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1072                         priority = inet_twsk(sk)->tw_priority;
1073                         txhash = inet_twsk(sk)->tw_txhash;
1074                 }
1075         } else {
1076                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1077                         label = ip6_flowlabel(ipv6h);
1078         }
1079
1080         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1081                              ipv6_get_dsfield(ipv6h), label, priority, txhash);
1082
1083 #ifdef CONFIG_TCP_MD5SIG
1084 out:
1085         rcu_read_unlock();
1086 #endif
1087 }
1088
1089 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1090                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1091                             struct tcp_md5sig_key *key, u8 tclass,
1092                             __be32 label, u32 priority, u32 txhash)
1093 {
1094         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1095                              tclass, label, priority, txhash);
1096 }
1097
1098 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1099 {
1100         struct inet_timewait_sock *tw = inet_twsk(sk);
1101         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1102
1103         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1104                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1105                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1106                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1107                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1108                         tw->tw_txhash);
1109
1110         inet_twsk_put(tw);
1111 }
1112
1113 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1114                                   struct request_sock *req)
1115 {
1116         int l3index;
1117
1118         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1119
1120         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1121          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1122          */
1123         /* RFC 7323 2.3
1124          * The window field (SEG.WND) of every outgoing segment, with the
1125          * exception of <SYN> segments, MUST be right-shifted by
1126          * Rcv.Wind.Shift bits:
1127          */
1128         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1129                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1130                         tcp_rsk(req)->rcv_nxt,
1131                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1132                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1133                         req->ts_recent, sk->sk_bound_dev_if,
1134                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1135                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
1136                         tcp_rsk(req)->txhash);
1137 }
1138
1139
1140 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1141 {
1142 #ifdef CONFIG_SYN_COOKIES
1143         const struct tcphdr *th = tcp_hdr(skb);
1144
1145         if (!th->syn)
1146                 sk = cookie_v6_check(sk, skb);
1147 #endif
1148         return sk;
1149 }
1150
1151 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1152                          struct tcphdr *th, u32 *cookie)
1153 {
1154         u16 mss = 0;
1155 #ifdef CONFIG_SYN_COOKIES
1156         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1157                                     &tcp_request_sock_ipv6_ops, sk, th);
1158         if (mss) {
1159                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1160                 tcp_synq_overflow(sk);
1161         }
1162 #endif
1163         return mss;
1164 }
1165
1166 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1167 {
1168         if (skb->protocol == htons(ETH_P_IP))
1169                 return tcp_v4_conn_request(sk, skb);
1170
1171         if (!ipv6_unicast_destination(skb))
1172                 goto drop;
1173
1174         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1175                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1176                 return 0;
1177         }
1178
1179         return tcp_conn_request(&tcp6_request_sock_ops,
1180                                 &tcp_request_sock_ipv6_ops, sk, skb);
1181
1182 drop:
1183         tcp_listendrop(sk);
1184         return 0; /* don't send reset */
1185 }
1186
1187 static void tcp_v6_restore_cb(struct sk_buff *skb)
1188 {
1189         /* We need to move header back to the beginning if xfrm6_policy_check()
1190          * and tcp_v6_fill_cb() are going to be called again.
1191          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1192          */
1193         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1194                 sizeof(struct inet6_skb_parm));
1195 }
1196
1197 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1198                                          struct request_sock *req,
1199                                          struct dst_entry *dst,
1200                                          struct request_sock *req_unhash,
1201                                          bool *own_req)
1202 {
1203         struct inet_request_sock *ireq;
1204         struct ipv6_pinfo *newnp;
1205         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1206         struct ipv6_txoptions *opt;
1207         struct inet_sock *newinet;
1208         bool found_dup_sk = false;
1209         struct tcp_sock *newtp;
1210         struct sock *newsk;
1211 #ifdef CONFIG_TCP_MD5SIG
1212         struct tcp_md5sig_key *key;
1213         int l3index;
1214 #endif
1215         struct flowi6 fl6;
1216
1217         if (skb->protocol == htons(ETH_P_IP)) {
1218                 /*
1219                  *      v6 mapped
1220                  */
1221
1222                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1223                                              req_unhash, own_req);
1224
1225                 if (!newsk)
1226                         return NULL;
1227
1228                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1229
1230                 newnp = tcp_inet6_sk(newsk);
1231                 newtp = tcp_sk(newsk);
1232
1233                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1234
1235                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1236
1237                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1238                 if (sk_is_mptcp(newsk))
1239                         mptcpv6_handle_mapped(newsk, true);
1240                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1241 #ifdef CONFIG_TCP_MD5SIG
1242                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1243 #endif
1244
1245                 newnp->ipv6_mc_list = NULL;
1246                 newnp->ipv6_ac_list = NULL;
1247                 newnp->ipv6_fl_list = NULL;
1248                 newnp->pktoptions  = NULL;
1249                 newnp->opt         = NULL;
1250                 newnp->mcast_oif   = inet_iif(skb);
1251                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1252                 newnp->rcv_flowinfo = 0;
1253                 if (np->repflow)
1254                         newnp->flow_label = 0;
1255
1256                 /*
1257                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1258                  * here, tcp_create_openreq_child now does this for us, see the comment in
1259                  * that function for the gory details. -acme
1260                  */
1261
1262                 /* It is tricky place. Until this moment IPv4 tcp
1263                    worked with IPv6 icsk.icsk_af_ops.
1264                    Sync it now.
1265                  */
1266                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1267
1268                 return newsk;
1269         }
1270
1271         ireq = inet_rsk(req);
1272
1273         if (sk_acceptq_is_full(sk))
1274                 goto out_overflow;
1275
1276         if (!dst) {
1277                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1278                 if (!dst)
1279                         goto out;
1280         }
1281
1282         newsk = tcp_create_openreq_child(sk, req, skb);
1283         if (!newsk)
1284                 goto out_nonewsk;
1285
1286         /*
1287          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1288          * count here, tcp_create_openreq_child now does this for us, see the
1289          * comment in that function for the gory details. -acme
1290          */
1291
1292         newsk->sk_gso_type = SKB_GSO_TCPV6;
1293         ip6_dst_store(newsk, dst, NULL, NULL);
1294         inet6_sk_rx_dst_set(newsk, skb);
1295
1296         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1297
1298         newtp = tcp_sk(newsk);
1299         newinet = inet_sk(newsk);
1300         newnp = tcp_inet6_sk(newsk);
1301
1302         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1303
1304         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1305         newnp->saddr = ireq->ir_v6_loc_addr;
1306         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1307         newsk->sk_bound_dev_if = ireq->ir_iif;
1308
1309         /* Now IPv6 options...
1310
1311            First: no IPv4 options.
1312          */
1313         newinet->inet_opt = NULL;
1314         newnp->ipv6_mc_list = NULL;
1315         newnp->ipv6_ac_list = NULL;
1316         newnp->ipv6_fl_list = NULL;
1317
1318         /* Clone RX bits */
1319         newnp->rxopt.all = np->rxopt.all;
1320
1321         newnp->pktoptions = NULL;
1322         newnp->opt        = NULL;
1323         newnp->mcast_oif  = tcp_v6_iif(skb);
1324         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1325         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1326         if (np->repflow)
1327                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1328
1329         /* Set ToS of the new socket based upon the value of incoming SYN.
1330          * ECT bits are set later in tcp_init_transfer().
1331          */
1332         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1333                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1334
1335         /* Clone native IPv6 options from listening socket (if any)
1336
1337            Yes, keeping reference count would be much more clever,
1338            but we make one more one thing there: reattach optmem
1339            to newsk.
1340          */
1341         opt = ireq->ipv6_opt;
1342         if (!opt)
1343                 opt = rcu_dereference(np->opt);
1344         if (opt) {
1345                 opt = ipv6_dup_options(newsk, opt);
1346                 RCU_INIT_POINTER(newnp->opt, opt);
1347         }
1348         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1349         if (opt)
1350                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1351                                                     opt->opt_flen;
1352
1353         tcp_ca_openreq_child(newsk, dst);
1354
1355         tcp_sync_mss(newsk, dst_mtu(dst));
1356         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1357
1358         tcp_initialize_rcv_mss(newsk);
1359
1360         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1361         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1362
1363 #ifdef CONFIG_TCP_MD5SIG
1364         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1365
1366         /* Copy over the MD5 key from the original socket */
1367         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1368         if (key) {
1369                 const union tcp_md5_addr *addr;
1370
1371                 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1372                 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1373                         inet_csk_prepare_forced_close(newsk);
1374                         tcp_done(newsk);
1375                         goto out;
1376                 }
1377         }
1378 #endif
1379
1380         if (__inet_inherit_port(sk, newsk) < 0) {
1381                 inet_csk_prepare_forced_close(newsk);
1382                 tcp_done(newsk);
1383                 goto out;
1384         }
1385         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1386                                        &found_dup_sk);
1387         if (*own_req) {
1388                 tcp_move_syn(newtp, req);
1389
1390                 /* Clone pktoptions received with SYN, if we own the req */
1391                 if (ireq->pktopts) {
1392                         newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1393                         consume_skb(ireq->pktopts);
1394                         ireq->pktopts = NULL;
1395                         if (newnp->pktoptions)
1396                                 tcp_v6_restore_cb(newnp->pktoptions);
1397                 }
1398         } else {
1399                 if (!req_unhash && found_dup_sk) {
1400                         /* This code path should only be executed in the
1401                          * syncookie case only
1402                          */
1403                         bh_unlock_sock(newsk);
1404                         sock_put(newsk);
1405                         newsk = NULL;
1406                 }
1407         }
1408
1409         return newsk;
1410
1411 out_overflow:
1412         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1413 out_nonewsk:
1414         dst_release(dst);
1415 out:
1416         tcp_listendrop(sk);
1417         return NULL;
1418 }
1419
1420 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1421                                                            u32));
1422 /* The socket must have it's spinlock held when we get
1423  * here, unless it is a TCP_LISTEN socket.
1424  *
1425  * We have a potential double-lock case here, so even when
1426  * doing backlog processing we use the BH locking scheme.
1427  * This is because we cannot sleep with the original spinlock
1428  * held.
1429  */
1430 INDIRECT_CALLABLE_SCOPE
1431 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1432 {
1433         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1434         struct sk_buff *opt_skb = NULL;
1435         enum skb_drop_reason reason;
1436         struct tcp_sock *tp;
1437
1438         /* Imagine: socket is IPv6. IPv4 packet arrives,
1439            goes to IPv4 receive handler and backlogged.
1440            From backlog it always goes here. Kerboom...
1441            Fortunately, tcp_rcv_established and rcv_established
1442            handle them correctly, but it is not case with
1443            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1444          */
1445
1446         if (skb->protocol == htons(ETH_P_IP))
1447                 return tcp_v4_do_rcv(sk, skb);
1448
1449         /*
1450          *      socket locking is here for SMP purposes as backlog rcv
1451          *      is currently called with bh processing disabled.
1452          */
1453
1454         /* Do Stevens' IPV6_PKTOPTIONS.
1455
1456            Yes, guys, it is the only place in our code, where we
1457            may make it not affecting IPv4.
1458            The rest of code is protocol independent,
1459            and I do not like idea to uglify IPv4.
1460
1461            Actually, all the idea behind IPV6_PKTOPTIONS
1462            looks not very well thought. For now we latch
1463            options, received in the last packet, enqueued
1464            by tcp. Feel free to propose better solution.
1465                                                --ANK (980728)
1466          */
1467         if (np->rxopt.all)
1468                 opt_skb = skb_clone_and_charge_r(skb, sk);
1469
1470         reason = SKB_DROP_REASON_NOT_SPECIFIED;
1471         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1472                 struct dst_entry *dst;
1473
1474                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1475                                                 lockdep_sock_is_held(sk));
1476
1477                 sock_rps_save_rxhash(sk, skb);
1478                 sk_mark_napi_id(sk, skb);
1479                 if (dst) {
1480                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1481                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1482                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1483                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1484                                 dst_release(dst);
1485                         }
1486                 }
1487
1488                 tcp_rcv_established(sk, skb);
1489                 if (opt_skb)
1490                         goto ipv6_pktoptions;
1491                 return 0;
1492         }
1493
1494         if (tcp_checksum_complete(skb))
1495                 goto csum_err;
1496
1497         if (sk->sk_state == TCP_LISTEN) {
1498                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1499
1500                 if (!nsk)
1501                         goto discard;
1502
1503                 if (nsk != sk) {
1504                         if (tcp_child_process(sk, nsk, skb))
1505                                 goto reset;
1506                         if (opt_skb)
1507                                 __kfree_skb(opt_skb);
1508                         return 0;
1509                 }
1510         } else
1511                 sock_rps_save_rxhash(sk, skb);
1512
1513         if (tcp_rcv_state_process(sk, skb))
1514                 goto reset;
1515         if (opt_skb)
1516                 goto ipv6_pktoptions;
1517         return 0;
1518
1519 reset:
1520         tcp_v6_send_reset(sk, skb);
1521 discard:
1522         if (opt_skb)
1523                 __kfree_skb(opt_skb);
1524         kfree_skb_reason(skb, reason);
1525         return 0;
1526 csum_err:
1527         reason = SKB_DROP_REASON_TCP_CSUM;
1528         trace_tcp_bad_csum(skb);
1529         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1530         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1531         goto discard;
1532
1533
1534 ipv6_pktoptions:
1535         /* Do you ask, what is it?
1536
1537            1. skb was enqueued by tcp.
1538            2. skb is added to tail of read queue, rather than out of order.
1539            3. socket is not in passive state.
1540            4. Finally, it really contains options, which user wants to receive.
1541          */
1542         tp = tcp_sk(sk);
1543         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1544             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1545                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1546                         np->mcast_oif = tcp_v6_iif(opt_skb);
1547                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1548                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1549                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1550                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1551                 if (np->repflow)
1552                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1553                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1554                         tcp_v6_restore_cb(opt_skb);
1555                         opt_skb = xchg(&np->pktoptions, opt_skb);
1556                 } else {
1557                         __kfree_skb(opt_skb);
1558                         opt_skb = xchg(&np->pktoptions, NULL);
1559                 }
1560         }
1561
1562         consume_skb(opt_skb);
1563         return 0;
1564 }
1565
1566 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1567                            const struct tcphdr *th)
1568 {
1569         /* This is tricky: we move IP6CB at its correct location into
1570          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1571          * _decode_session6() uses IP6CB().
1572          * barrier() makes sure compiler won't play aliasing games.
1573          */
1574         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1575                 sizeof(struct inet6_skb_parm));
1576         barrier();
1577
1578         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1579         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1580                                     skb->len - th->doff*4);
1581         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1582         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1583         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1584         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1585         TCP_SKB_CB(skb)->sacked = 0;
1586         TCP_SKB_CB(skb)->has_rxtstamp =
1587                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1588 }
1589
1590 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1591 {
1592         enum skb_drop_reason drop_reason;
1593         int sdif = inet6_sdif(skb);
1594         int dif = inet6_iif(skb);
1595         const struct tcphdr *th;
1596         const struct ipv6hdr *hdr;
1597         bool refcounted;
1598         struct sock *sk;
1599         int ret;
1600         struct net *net = dev_net(skb->dev);
1601
1602         drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1603         if (skb->pkt_type != PACKET_HOST)
1604                 goto discard_it;
1605
1606         /*
1607          *      Count it even if it's bad.
1608          */
1609         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1610
1611         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1612                 goto discard_it;
1613
1614         th = (const struct tcphdr *)skb->data;
1615
1616         if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1617                 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1618                 goto bad_packet;
1619         }
1620         if (!pskb_may_pull(skb, th->doff*4))
1621                 goto discard_it;
1622
1623         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1624                 goto csum_error;
1625
1626         th = (const struct tcphdr *)skb->data;
1627         hdr = ipv6_hdr(skb);
1628
1629 lookup:
1630         sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1631                                 th->source, th->dest, inet6_iif(skb), sdif,
1632                                 &refcounted);
1633         if (!sk)
1634                 goto no_tcp_socket;
1635
1636 process:
1637         if (sk->sk_state == TCP_TIME_WAIT)
1638                 goto do_time_wait;
1639
1640         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1641                 struct request_sock *req = inet_reqsk(sk);
1642                 bool req_stolen = false;
1643                 struct sock *nsk;
1644
1645                 sk = req->rsk_listener;
1646                 drop_reason = tcp_inbound_md5_hash(sk, skb,
1647                                                    &hdr->saddr, &hdr->daddr,
1648                                                    AF_INET6, dif, sdif);
1649                 if (drop_reason) {
1650                         sk_drops_add(sk, skb);
1651                         reqsk_put(req);
1652                         goto discard_it;
1653                 }
1654                 if (tcp_checksum_complete(skb)) {
1655                         reqsk_put(req);
1656                         goto csum_error;
1657                 }
1658                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1659                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1660                         if (!nsk) {
1661                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1662                                 goto lookup;
1663                         }
1664                         sk = nsk;
1665                         /* reuseport_migrate_sock() has already held one sk_refcnt
1666                          * before returning.
1667                          */
1668                 } else {
1669                         sock_hold(sk);
1670                 }
1671                 refcounted = true;
1672                 nsk = NULL;
1673                 if (!tcp_filter(sk, skb)) {
1674                         th = (const struct tcphdr *)skb->data;
1675                         hdr = ipv6_hdr(skb);
1676                         tcp_v6_fill_cb(skb, hdr, th);
1677                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1678                 } else {
1679                         drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1680                 }
1681                 if (!nsk) {
1682                         reqsk_put(req);
1683                         if (req_stolen) {
1684                                 /* Another cpu got exclusive access to req
1685                                  * and created a full blown socket.
1686                                  * Try to feed this packet to this socket
1687                                  * instead of discarding it.
1688                                  */
1689                                 tcp_v6_restore_cb(skb);
1690                                 sock_put(sk);
1691                                 goto lookup;
1692                         }
1693                         goto discard_and_relse;
1694                 }
1695                 if (nsk == sk) {
1696                         reqsk_put(req);
1697                         tcp_v6_restore_cb(skb);
1698                 } else if (tcp_child_process(sk, nsk, skb)) {
1699                         tcp_v6_send_reset(nsk, skb);
1700                         goto discard_and_relse;
1701                 } else {
1702                         sock_put(sk);
1703                         return 0;
1704                 }
1705         }
1706
1707         if (static_branch_unlikely(&ip6_min_hopcount)) {
1708                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1709                 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1710                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1711                         drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1712                         goto discard_and_relse;
1713                 }
1714         }
1715
1716         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1717                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1718                 goto discard_and_relse;
1719         }
1720
1721         drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1722                                            AF_INET6, dif, sdif);
1723         if (drop_reason)
1724                 goto discard_and_relse;
1725
1726         nf_reset_ct(skb);
1727
1728         if (tcp_filter(sk, skb)) {
1729                 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1730                 goto discard_and_relse;
1731         }
1732         th = (const struct tcphdr *)skb->data;
1733         hdr = ipv6_hdr(skb);
1734         tcp_v6_fill_cb(skb, hdr, th);
1735
1736         skb->dev = NULL;
1737
1738         if (sk->sk_state == TCP_LISTEN) {
1739                 ret = tcp_v6_do_rcv(sk, skb);
1740                 goto put_and_return;
1741         }
1742
1743         sk_incoming_cpu_update(sk);
1744
1745         bh_lock_sock_nested(sk);
1746         tcp_segs_in(tcp_sk(sk), skb);
1747         ret = 0;
1748         if (!sock_owned_by_user(sk)) {
1749                 ret = tcp_v6_do_rcv(sk, skb);
1750         } else {
1751                 if (tcp_add_backlog(sk, skb, &drop_reason))
1752                         goto discard_and_relse;
1753         }
1754         bh_unlock_sock(sk);
1755 put_and_return:
1756         if (refcounted)
1757                 sock_put(sk);
1758         return ret ? -1 : 0;
1759
1760 no_tcp_socket:
1761         drop_reason = SKB_DROP_REASON_NO_SOCKET;
1762         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1763                 goto discard_it;
1764
1765         tcp_v6_fill_cb(skb, hdr, th);
1766
1767         if (tcp_checksum_complete(skb)) {
1768 csum_error:
1769                 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1770                 trace_tcp_bad_csum(skb);
1771                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1772 bad_packet:
1773                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1774         } else {
1775                 tcp_v6_send_reset(NULL, skb);
1776         }
1777
1778 discard_it:
1779         SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1780         kfree_skb_reason(skb, drop_reason);
1781         return 0;
1782
1783 discard_and_relse:
1784         sk_drops_add(sk, skb);
1785         if (refcounted)
1786                 sock_put(sk);
1787         goto discard_it;
1788
1789 do_time_wait:
1790         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1791                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1792                 inet_twsk_put(inet_twsk(sk));
1793                 goto discard_it;
1794         }
1795
1796         tcp_v6_fill_cb(skb, hdr, th);
1797
1798         if (tcp_checksum_complete(skb)) {
1799                 inet_twsk_put(inet_twsk(sk));
1800                 goto csum_error;
1801         }
1802
1803         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1804         case TCP_TW_SYN:
1805         {
1806                 struct sock *sk2;
1807
1808                 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1809                                             skb, __tcp_hdrlen(th),
1810                                             &ipv6_hdr(skb)->saddr, th->source,
1811                                             &ipv6_hdr(skb)->daddr,
1812                                             ntohs(th->dest),
1813                                             tcp_v6_iif_l3_slave(skb),
1814                                             sdif);
1815                 if (sk2) {
1816                         struct inet_timewait_sock *tw = inet_twsk(sk);
1817                         inet_twsk_deschedule_put(tw);
1818                         sk = sk2;
1819                         tcp_v6_restore_cb(skb);
1820                         refcounted = false;
1821                         goto process;
1822                 }
1823         }
1824                 /* to ACK */
1825                 fallthrough;
1826         case TCP_TW_ACK:
1827                 tcp_v6_timewait_ack(sk, skb);
1828                 break;
1829         case TCP_TW_RST:
1830                 tcp_v6_send_reset(sk, skb);
1831                 inet_twsk_deschedule_put(inet_twsk(sk));
1832                 goto discard_it;
1833         case TCP_TW_SUCCESS:
1834                 ;
1835         }
1836         goto discard_it;
1837 }
1838
1839 void tcp_v6_early_demux(struct sk_buff *skb)
1840 {
1841         struct net *net = dev_net(skb->dev);
1842         const struct ipv6hdr *hdr;
1843         const struct tcphdr *th;
1844         struct sock *sk;
1845
1846         if (skb->pkt_type != PACKET_HOST)
1847                 return;
1848
1849         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1850                 return;
1851
1852         hdr = ipv6_hdr(skb);
1853         th = tcp_hdr(skb);
1854
1855         if (th->doff < sizeof(struct tcphdr) / 4)
1856                 return;
1857
1858         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1859         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1860                                         &hdr->saddr, th->source,
1861                                         &hdr->daddr, ntohs(th->dest),
1862                                         inet6_iif(skb), inet6_sdif(skb));
1863         if (sk) {
1864                 skb->sk = sk;
1865                 skb->destructor = sock_edemux;
1866                 if (sk_fullsock(sk)) {
1867                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1868
1869                         if (dst)
1870                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1871                         if (dst &&
1872                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1873                                 skb_dst_set_noref(skb, dst);
1874                 }
1875         }
1876 }
1877
1878 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1879         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1880         .twsk_unique    = tcp_twsk_unique,
1881         .twsk_destructor = tcp_twsk_destructor,
1882 };
1883
1884 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1885 {
1886         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1887 }
1888
1889 const struct inet_connection_sock_af_ops ipv6_specific = {
1890         .queue_xmit        = inet6_csk_xmit,
1891         .send_check        = tcp_v6_send_check,
1892         .rebuild_header    = inet6_sk_rebuild_header,
1893         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1894         .conn_request      = tcp_v6_conn_request,
1895         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1896         .net_header_len    = sizeof(struct ipv6hdr),
1897         .net_frag_header_len = sizeof(struct frag_hdr),
1898         .setsockopt        = ipv6_setsockopt,
1899         .getsockopt        = ipv6_getsockopt,
1900         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1901         .sockaddr_len      = sizeof(struct sockaddr_in6),
1902         .mtu_reduced       = tcp_v6_mtu_reduced,
1903 };
1904
1905 #ifdef CONFIG_TCP_MD5SIG
1906 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1907         .md5_lookup     =       tcp_v6_md5_lookup,
1908         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1909         .md5_parse      =       tcp_v6_parse_md5_keys,
1910 };
1911 #endif
1912
1913 /*
1914  *      TCP over IPv4 via INET6 API
1915  */
1916 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1917         .queue_xmit        = ip_queue_xmit,
1918         .send_check        = tcp_v4_send_check,
1919         .rebuild_header    = inet_sk_rebuild_header,
1920         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1921         .conn_request      = tcp_v6_conn_request,
1922         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1923         .net_header_len    = sizeof(struct iphdr),
1924         .setsockopt        = ipv6_setsockopt,
1925         .getsockopt        = ipv6_getsockopt,
1926         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1927         .sockaddr_len      = sizeof(struct sockaddr_in6),
1928         .mtu_reduced       = tcp_v4_mtu_reduced,
1929 };
1930
1931 #ifdef CONFIG_TCP_MD5SIG
1932 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1933         .md5_lookup     =       tcp_v4_md5_lookup,
1934         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1935         .md5_parse      =       tcp_v6_parse_md5_keys,
1936 };
1937 #endif
1938
1939 /* NOTE: A lot of things set to zero explicitly by call to
1940  *       sk_alloc() so need not be done here.
1941  */
1942 static int tcp_v6_init_sock(struct sock *sk)
1943 {
1944         struct inet_connection_sock *icsk = inet_csk(sk);
1945
1946         tcp_init_sock(sk);
1947
1948         icsk->icsk_af_ops = &ipv6_specific;
1949
1950 #ifdef CONFIG_TCP_MD5SIG
1951         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1952 #endif
1953
1954         return 0;
1955 }
1956
1957 #ifdef CONFIG_PROC_FS
1958 /* Proc filesystem TCPv6 sock list dumping. */
1959 static void get_openreq6(struct seq_file *seq,
1960                          const struct request_sock *req, int i)
1961 {
1962         long ttd = req->rsk_timer.expires - jiffies;
1963         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1964         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1965
1966         if (ttd < 0)
1967                 ttd = 0;
1968
1969         seq_printf(seq,
1970                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1971                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1972                    i,
1973                    src->s6_addr32[0], src->s6_addr32[1],
1974                    src->s6_addr32[2], src->s6_addr32[3],
1975                    inet_rsk(req)->ir_num,
1976                    dest->s6_addr32[0], dest->s6_addr32[1],
1977                    dest->s6_addr32[2], dest->s6_addr32[3],
1978                    ntohs(inet_rsk(req)->ir_rmt_port),
1979                    TCP_SYN_RECV,
1980                    0, 0, /* could print option size, but that is af dependent. */
1981                    1,   /* timers active (only the expire timer) */
1982                    jiffies_to_clock_t(ttd),
1983                    req->num_timeout,
1984                    from_kuid_munged(seq_user_ns(seq),
1985                                     sock_i_uid(req->rsk_listener)),
1986                    0,  /* non standard timer */
1987                    0, /* open_requests have no inode */
1988                    0, req);
1989 }
1990
1991 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1992 {
1993         const struct in6_addr *dest, *src;
1994         __u16 destp, srcp;
1995         int timer_active;
1996         unsigned long timer_expires;
1997         const struct inet_sock *inet = inet_sk(sp);
1998         const struct tcp_sock *tp = tcp_sk(sp);
1999         const struct inet_connection_sock *icsk = inet_csk(sp);
2000         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2001         int rx_queue;
2002         int state;
2003
2004         dest  = &sp->sk_v6_daddr;
2005         src   = &sp->sk_v6_rcv_saddr;
2006         destp = ntohs(inet->inet_dport);
2007         srcp  = ntohs(inet->inet_sport);
2008
2009         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2010             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2011             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2012                 timer_active    = 1;
2013                 timer_expires   = icsk->icsk_timeout;
2014         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2015                 timer_active    = 4;
2016                 timer_expires   = icsk->icsk_timeout;
2017         } else if (timer_pending(&sp->sk_timer)) {
2018                 timer_active    = 2;
2019                 timer_expires   = sp->sk_timer.expires;
2020         } else {
2021                 timer_active    = 0;
2022                 timer_expires = jiffies;
2023         }
2024
2025         state = inet_sk_state_load(sp);
2026         if (state == TCP_LISTEN)
2027                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2028         else
2029                 /* Because we don't lock the socket,
2030                  * we might find a transient negative value.
2031                  */
2032                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2033                                       READ_ONCE(tp->copied_seq), 0);
2034
2035         seq_printf(seq,
2036                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2037                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2038                    i,
2039                    src->s6_addr32[0], src->s6_addr32[1],
2040                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2041                    dest->s6_addr32[0], dest->s6_addr32[1],
2042                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2043                    state,
2044                    READ_ONCE(tp->write_seq) - tp->snd_una,
2045                    rx_queue,
2046                    timer_active,
2047                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2048                    icsk->icsk_retransmits,
2049                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2050                    icsk->icsk_probes_out,
2051                    sock_i_ino(sp),
2052                    refcount_read(&sp->sk_refcnt), sp,
2053                    jiffies_to_clock_t(icsk->icsk_rto),
2054                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2055                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2056                    tcp_snd_cwnd(tp),
2057                    state == TCP_LISTEN ?
2058                         fastopenq->max_qlen :
2059                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2060                    );
2061 }
2062
2063 static void get_timewait6_sock(struct seq_file *seq,
2064                                struct inet_timewait_sock *tw, int i)
2065 {
2066         long delta = tw->tw_timer.expires - jiffies;
2067         const struct in6_addr *dest, *src;
2068         __u16 destp, srcp;
2069
2070         dest = &tw->tw_v6_daddr;
2071         src  = &tw->tw_v6_rcv_saddr;
2072         destp = ntohs(tw->tw_dport);
2073         srcp  = ntohs(tw->tw_sport);
2074
2075         seq_printf(seq,
2076                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2077                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2078                    i,
2079                    src->s6_addr32[0], src->s6_addr32[1],
2080                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2081                    dest->s6_addr32[0], dest->s6_addr32[1],
2082                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2083                    tw->tw_substate, 0, 0,
2084                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2085                    refcount_read(&tw->tw_refcnt), tw);
2086 }
2087
2088 static int tcp6_seq_show(struct seq_file *seq, void *v)
2089 {
2090         struct tcp_iter_state *st;
2091         struct sock *sk = v;
2092
2093         if (v == SEQ_START_TOKEN) {
2094                 seq_puts(seq,
2095                          "  sl  "
2096                          "local_address                         "
2097                          "remote_address                        "
2098                          "st tx_queue rx_queue tr tm->when retrnsmt"
2099                          "   uid  timeout inode\n");
2100                 goto out;
2101         }
2102         st = seq->private;
2103
2104         if (sk->sk_state == TCP_TIME_WAIT)
2105                 get_timewait6_sock(seq, v, st->num);
2106         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2107                 get_openreq6(seq, v, st->num);
2108         else
2109                 get_tcp6_sock(seq, v, st->num);
2110 out:
2111         return 0;
2112 }
2113
2114 static const struct seq_operations tcp6_seq_ops = {
2115         .show           = tcp6_seq_show,
2116         .start          = tcp_seq_start,
2117         .next           = tcp_seq_next,
2118         .stop           = tcp_seq_stop,
2119 };
2120
2121 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2122         .family         = AF_INET6,
2123 };
2124
2125 int __net_init tcp6_proc_init(struct net *net)
2126 {
2127         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2128                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2129                 return -ENOMEM;
2130         return 0;
2131 }
2132
2133 void tcp6_proc_exit(struct net *net)
2134 {
2135         remove_proc_entry("tcp6", net->proc_net);
2136 }
2137 #endif
2138
2139 struct proto tcpv6_prot = {
2140         .name                   = "TCPv6",
2141         .owner                  = THIS_MODULE,
2142         .close                  = tcp_close,
2143         .pre_connect            = tcp_v6_pre_connect,
2144         .connect                = tcp_v6_connect,
2145         .disconnect             = tcp_disconnect,
2146         .accept                 = inet_csk_accept,
2147         .ioctl                  = tcp_ioctl,
2148         .init                   = tcp_v6_init_sock,
2149         .destroy                = tcp_v4_destroy_sock,
2150         .shutdown               = tcp_shutdown,
2151         .setsockopt             = tcp_setsockopt,
2152         .getsockopt             = tcp_getsockopt,
2153         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2154         .keepalive              = tcp_set_keepalive,
2155         .recvmsg                = tcp_recvmsg,
2156         .sendmsg                = tcp_sendmsg,
2157         .sendpage               = tcp_sendpage,
2158         .backlog_rcv            = tcp_v6_do_rcv,
2159         .release_cb             = tcp_release_cb,
2160         .hash                   = inet6_hash,
2161         .unhash                 = inet_unhash,
2162         .get_port               = inet_csk_get_port,
2163         .put_port               = inet_put_port,
2164 #ifdef CONFIG_BPF_SYSCALL
2165         .psock_update_sk_prot   = tcp_bpf_update_proto,
2166 #endif
2167         .enter_memory_pressure  = tcp_enter_memory_pressure,
2168         .leave_memory_pressure  = tcp_leave_memory_pressure,
2169         .stream_memory_free     = tcp_stream_memory_free,
2170         .sockets_allocated      = &tcp_sockets_allocated,
2171
2172         .memory_allocated       = &tcp_memory_allocated,
2173         .per_cpu_fw_alloc       = &tcp_memory_per_cpu_fw_alloc,
2174
2175         .memory_pressure        = &tcp_memory_pressure,
2176         .orphan_count           = &tcp_orphan_count,
2177         .sysctl_mem             = sysctl_tcp_mem,
2178         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2179         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2180         .max_header             = MAX_TCP_HEADER,
2181         .obj_size               = sizeof(struct tcp6_sock),
2182         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2183         .twsk_prot              = &tcp6_timewait_sock_ops,
2184         .rsk_prot               = &tcp6_request_sock_ops,
2185         .h.hashinfo             = NULL,
2186         .no_autobind            = true,
2187         .diag_destroy           = tcp_abort,
2188 };
2189 EXPORT_SYMBOL_GPL(tcpv6_prot);
2190
2191 static const struct inet6_protocol tcpv6_protocol = {
2192         .handler        =       tcp_v6_rcv,
2193         .err_handler    =       tcp_v6_err,
2194         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2195 };
2196
2197 static struct inet_protosw tcpv6_protosw = {
2198         .type           =       SOCK_STREAM,
2199         .protocol       =       IPPROTO_TCP,
2200         .prot           =       &tcpv6_prot,
2201         .ops            =       &inet6_stream_ops,
2202         .flags          =       INET_PROTOSW_PERMANENT |
2203                                 INET_PROTOSW_ICSK,
2204 };
2205
2206 static int __net_init tcpv6_net_init(struct net *net)
2207 {
2208         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2209                                     SOCK_RAW, IPPROTO_TCP, net);
2210 }
2211
2212 static void __net_exit tcpv6_net_exit(struct net *net)
2213 {
2214         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2215 }
2216
2217 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2218 {
2219         tcp_twsk_purge(net_exit_list, AF_INET6);
2220 }
2221
2222 static struct pernet_operations tcpv6_net_ops = {
2223         .init       = tcpv6_net_init,
2224         .exit       = tcpv6_net_exit,
2225         .exit_batch = tcpv6_net_exit_batch,
2226 };
2227
2228 int __init tcpv6_init(void)
2229 {
2230         int ret;
2231
2232         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2233         if (ret)
2234                 goto out;
2235
2236         /* register inet6 protocol */
2237         ret = inet6_register_protosw(&tcpv6_protosw);
2238         if (ret)
2239                 goto out_tcpv6_protocol;
2240
2241         ret = register_pernet_subsys(&tcpv6_net_ops);
2242         if (ret)
2243                 goto out_tcpv6_protosw;
2244
2245         ret = mptcpv6_init();
2246         if (ret)
2247                 goto out_tcpv6_pernet_subsys;
2248
2249 out:
2250         return ret;
2251
2252 out_tcpv6_pernet_subsys:
2253         unregister_pernet_subsys(&tcpv6_net_ops);
2254 out_tcpv6_protosw:
2255         inet6_unregister_protosw(&tcpv6_protosw);
2256 out_tcpv6_protocol:
2257         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2258         goto out;
2259 }
2260
2261 void tcpv6_exit(void)
2262 {
2263         unregister_pernet_subsys(&tcpv6_net_ops);
2264         inet6_unregister_protosw(&tcpv6_protosw);
2265         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2266 }