Merge tag 'mt76-for-kvalo-2023-09-30' of https://github.com/nbd168/wireless
[linux-2.6-block.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
97                                               struct tcp6_sock, tcp)->inet6)
98
99 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
100 {
101         struct dst_entry *dst = skb_dst(skb);
102
103         if (dst && dst_hold_safe(dst)) {
104                 const struct rt6_info *rt = (const struct rt6_info *)dst;
105
106                 rcu_assign_pointer(sk->sk_rx_dst, dst);
107                 sk->sk_rx_dst_ifindex = skb->skb_iif;
108                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
109         }
110 }
111
112 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
113 {
114         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
115                                 ipv6_hdr(skb)->saddr.s6_addr32,
116                                 tcp_hdr(skb)->dest,
117                                 tcp_hdr(skb)->source);
118 }
119
120 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
121 {
122         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
123                                    ipv6_hdr(skb)->saddr.s6_addr32);
124 }
125
126 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
127                               int addr_len)
128 {
129         /* This check is replicated from tcp_v6_connect() and intended to
130          * prevent BPF program called below from accessing bytes that are out
131          * of the bound specified by user in addr_len.
132          */
133         if (addr_len < SIN6_LEN_RFC2133)
134                 return -EINVAL;
135
136         sock_owned_by_me(sk);
137
138         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
139 }
140
141 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
142                           int addr_len)
143 {
144         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
145         struct inet_connection_sock *icsk = inet_csk(sk);
146         struct in6_addr *saddr = NULL, *final_p, final;
147         struct inet_timewait_death_row *tcp_death_row;
148         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
149         struct inet_sock *inet = inet_sk(sk);
150         struct tcp_sock *tp = tcp_sk(sk);
151         struct net *net = sock_net(sk);
152         struct ipv6_txoptions *opt;
153         struct dst_entry *dst;
154         struct flowi6 fl6;
155         int addr_type;
156         int err;
157
158         if (addr_len < SIN6_LEN_RFC2133)
159                 return -EINVAL;
160
161         if (usin->sin6_family != AF_INET6)
162                 return -EAFNOSUPPORT;
163
164         memset(&fl6, 0, sizeof(fl6));
165
166         if (inet6_test_bit(SNDFLOW, sk)) {
167                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
168                 IP6_ECN_flow_init(fl6.flowlabel);
169                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
170                         struct ip6_flowlabel *flowlabel;
171                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
172                         if (IS_ERR(flowlabel))
173                                 return -EINVAL;
174                         fl6_sock_release(flowlabel);
175                 }
176         }
177
178         /*
179          *      connect() to INADDR_ANY means loopback (BSD'ism).
180          */
181
182         if (ipv6_addr_any(&usin->sin6_addr)) {
183                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
184                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
185                                                &usin->sin6_addr);
186                 else
187                         usin->sin6_addr = in6addr_loopback;
188         }
189
190         addr_type = ipv6_addr_type(&usin->sin6_addr);
191
192         if (addr_type & IPV6_ADDR_MULTICAST)
193                 return -ENETUNREACH;
194
195         if (addr_type&IPV6_ADDR_LINKLOCAL) {
196                 if (addr_len >= sizeof(struct sockaddr_in6) &&
197                     usin->sin6_scope_id) {
198                         /* If interface is set while binding, indices
199                          * must coincide.
200                          */
201                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
202                                 return -EINVAL;
203
204                         sk->sk_bound_dev_if = usin->sin6_scope_id;
205                 }
206
207                 /* Connect to link-local address requires an interface */
208                 if (!sk->sk_bound_dev_if)
209                         return -EINVAL;
210         }
211
212         if (tp->rx_opt.ts_recent_stamp &&
213             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
214                 tp->rx_opt.ts_recent = 0;
215                 tp->rx_opt.ts_recent_stamp = 0;
216                 WRITE_ONCE(tp->write_seq, 0);
217         }
218
219         sk->sk_v6_daddr = usin->sin6_addr;
220         np->flow_label = fl6.flowlabel;
221
222         /*
223          *      TCP over IPv4
224          */
225
226         if (addr_type & IPV6_ADDR_MAPPED) {
227                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
228                 struct sockaddr_in sin;
229
230                 if (ipv6_only_sock(sk))
231                         return -ENETUNREACH;
232
233                 sin.sin_family = AF_INET;
234                 sin.sin_port = usin->sin6_port;
235                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
236
237                 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
238                 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
239                 if (sk_is_mptcp(sk))
240                         mptcpv6_handle_mapped(sk, true);
241                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
242 #ifdef CONFIG_TCP_MD5SIG
243                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
244 #endif
245
246                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
247
248                 if (err) {
249                         icsk->icsk_ext_hdr_len = exthdrlen;
250                         /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
251                         WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
252                         if (sk_is_mptcp(sk))
253                                 mptcpv6_handle_mapped(sk, false);
254                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
255 #ifdef CONFIG_TCP_MD5SIG
256                         tp->af_specific = &tcp_sock_ipv6_specific;
257 #endif
258                         goto failure;
259                 }
260                 np->saddr = sk->sk_v6_rcv_saddr;
261
262                 return err;
263         }
264
265         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
266                 saddr = &sk->sk_v6_rcv_saddr;
267
268         fl6.flowi6_proto = IPPROTO_TCP;
269         fl6.daddr = sk->sk_v6_daddr;
270         fl6.saddr = saddr ? *saddr : np->saddr;
271         fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
272         fl6.flowi6_oif = sk->sk_bound_dev_if;
273         fl6.flowi6_mark = sk->sk_mark;
274         fl6.fl6_dport = usin->sin6_port;
275         fl6.fl6_sport = inet->inet_sport;
276         fl6.flowi6_uid = sk->sk_uid;
277
278         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279         final_p = fl6_update_dst(&fl6, opt, &final);
280
281         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282
283         dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
284         if (IS_ERR(dst)) {
285                 err = PTR_ERR(dst);
286                 goto failure;
287         }
288
289         tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
290
291         if (!saddr) {
292                 saddr = &fl6.saddr;
293
294                 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
295                 if (err)
296                         goto failure;
297         }
298
299         /* set the source address */
300         np->saddr = *saddr;
301         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
302
303         sk->sk_gso_type = SKB_GSO_TCPV6;
304         ip6_dst_store(sk, dst, NULL, NULL);
305
306         icsk->icsk_ext_hdr_len = 0;
307         if (opt)
308                 icsk->icsk_ext_hdr_len = opt->opt_flen +
309                                          opt->opt_nflen;
310
311         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
312
313         inet->inet_dport = usin->sin6_port;
314
315         tcp_set_state(sk, TCP_SYN_SENT);
316         err = inet6_hash_connect(tcp_death_row, sk);
317         if (err)
318                 goto late_failure;
319
320         sk_set_txhash(sk);
321
322         if (likely(!tp->repair)) {
323                 if (!tp->write_seq)
324                         WRITE_ONCE(tp->write_seq,
325                                    secure_tcpv6_seq(np->saddr.s6_addr32,
326                                                     sk->sk_v6_daddr.s6_addr32,
327                                                     inet->inet_sport,
328                                                     inet->inet_dport));
329                 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
330                                                    sk->sk_v6_daddr.s6_addr32);
331         }
332
333         if (tcp_fastopen_defer_connect(sk, &err))
334                 return err;
335         if (err)
336                 goto late_failure;
337
338         err = tcp_connect(sk);
339         if (err)
340                 goto late_failure;
341
342         return 0;
343
344 late_failure:
345         tcp_set_state(sk, TCP_CLOSE);
346         inet_bhash2_reset_saddr(sk);
347 failure:
348         inet->inet_dport = 0;
349         sk->sk_route_caps = 0;
350         return err;
351 }
352
353 static void tcp_v6_mtu_reduced(struct sock *sk)
354 {
355         struct dst_entry *dst;
356         u32 mtu;
357
358         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
359                 return;
360
361         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
362
363         /* Drop requests trying to increase our current mss.
364          * Check done in __ip6_rt_update_pmtu() is too late.
365          */
366         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
367                 return;
368
369         dst = inet6_csk_update_pmtu(sk, mtu);
370         if (!dst)
371                 return;
372
373         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
374                 tcp_sync_mss(sk, dst_mtu(dst));
375                 tcp_simple_retransmit(sk);
376         }
377 }
378
379 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
380                 u8 type, u8 code, int offset, __be32 info)
381 {
382         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
383         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
384         struct net *net = dev_net(skb->dev);
385         struct request_sock *fastopen;
386         struct ipv6_pinfo *np;
387         struct tcp_sock *tp;
388         __u32 seq, snd_una;
389         struct sock *sk;
390         bool fatal;
391         int err;
392
393         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
394                                         &hdr->daddr, th->dest,
395                                         &hdr->saddr, ntohs(th->source),
396                                         skb->dev->ifindex, inet6_sdif(skb));
397
398         if (!sk) {
399                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
400                                   ICMP6_MIB_INERRORS);
401                 return -ENOENT;
402         }
403
404         if (sk->sk_state == TCP_TIME_WAIT) {
405                 inet_twsk_put(inet_twsk(sk));
406                 return 0;
407         }
408         seq = ntohl(th->seq);
409         fatal = icmpv6_err_convert(type, code, &err);
410         if (sk->sk_state == TCP_NEW_SYN_RECV) {
411                 tcp_req_err(sk, seq, fatal);
412                 return 0;
413         }
414
415         bh_lock_sock(sk);
416         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
417                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
418
419         if (sk->sk_state == TCP_CLOSE)
420                 goto out;
421
422         if (static_branch_unlikely(&ip6_min_hopcount)) {
423                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
424                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
425                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
426                         goto out;
427                 }
428         }
429
430         tp = tcp_sk(sk);
431         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
432         fastopen = rcu_dereference(tp->fastopen_rsk);
433         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
434         if (sk->sk_state != TCP_LISTEN &&
435             !between(seq, snd_una, tp->snd_nxt)) {
436                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
437                 goto out;
438         }
439
440         np = tcp_inet6_sk(sk);
441
442         if (type == NDISC_REDIRECT) {
443                 if (!sock_owned_by_user(sk)) {
444                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
445
446                         if (dst)
447                                 dst->ops->redirect(dst, sk, skb);
448                 }
449                 goto out;
450         }
451
452         if (type == ICMPV6_PKT_TOOBIG) {
453                 u32 mtu = ntohl(info);
454
455                 /* We are not interested in TCP_LISTEN and open_requests
456                  * (SYN-ACKs send out by Linux are always <576bytes so
457                  * they should go through unfragmented).
458                  */
459                 if (sk->sk_state == TCP_LISTEN)
460                         goto out;
461
462                 if (!ip6_sk_accept_pmtu(sk))
463                         goto out;
464
465                 if (mtu < IPV6_MIN_MTU)
466                         goto out;
467
468                 WRITE_ONCE(tp->mtu_info, mtu);
469
470                 if (!sock_owned_by_user(sk))
471                         tcp_v6_mtu_reduced(sk);
472                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
473                                            &sk->sk_tsq_flags))
474                         sock_hold(sk);
475                 goto out;
476         }
477
478
479         /* Might be for an request_sock */
480         switch (sk->sk_state) {
481         case TCP_SYN_SENT:
482         case TCP_SYN_RECV:
483                 /* Only in fast or simultaneous open. If a fast open socket is
484                  * already accepted it is treated as a connected one below.
485                  */
486                 if (fastopen && !fastopen->sk)
487                         break;
488
489                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
490
491                 if (!sock_owned_by_user(sk)) {
492                         WRITE_ONCE(sk->sk_err, err);
493                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
494
495                         tcp_done(sk);
496                 } else {
497                         WRITE_ONCE(sk->sk_err_soft, err);
498                 }
499                 goto out;
500         case TCP_LISTEN:
501                 break;
502         default:
503                 /* check if this ICMP message allows revert of backoff.
504                  * (see RFC 6069)
505                  */
506                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
507                     code == ICMPV6_NOROUTE)
508                         tcp_ld_RTO_revert(sk, seq);
509         }
510
511         if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
512                 WRITE_ONCE(sk->sk_err, err);
513                 sk_error_report(sk);
514         } else {
515                 WRITE_ONCE(sk->sk_err_soft, err);
516         }
517 out:
518         bh_unlock_sock(sk);
519         sock_put(sk);
520         return 0;
521 }
522
523
524 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
525                               struct flowi *fl,
526                               struct request_sock *req,
527                               struct tcp_fastopen_cookie *foc,
528                               enum tcp_synack_type synack_type,
529                               struct sk_buff *syn_skb)
530 {
531         struct inet_request_sock *ireq = inet_rsk(req);
532         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
533         struct ipv6_txoptions *opt;
534         struct flowi6 *fl6 = &fl->u.ip6;
535         struct sk_buff *skb;
536         int err = -ENOMEM;
537         u8 tclass;
538
539         /* First, grab a route. */
540         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
541                                                IPPROTO_TCP)) == NULL)
542                 goto done;
543
544         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
545
546         if (skb) {
547                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
548                                     &ireq->ir_v6_rmt_addr);
549
550                 fl6->daddr = ireq->ir_v6_rmt_addr;
551                 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts)
552                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
553
554                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
555                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
556                                 (np->tclass & INET_ECN_MASK) :
557                                 np->tclass;
558
559                 if (!INET_ECN_is_capable(tclass) &&
560                     tcp_bpf_ca_needs_ecn((struct sock *)req))
561                         tclass |= INET_ECN_ECT_0;
562
563                 rcu_read_lock();
564                 opt = ireq->ipv6_opt;
565                 if (!opt)
566                         opt = rcu_dereference(np->opt);
567                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
568                                opt, tclass, READ_ONCE(sk->sk_priority));
569                 rcu_read_unlock();
570                 err = net_xmit_eval(err);
571         }
572
573 done:
574         return err;
575 }
576
577
578 static void tcp_v6_reqsk_destructor(struct request_sock *req)
579 {
580         kfree(inet_rsk(req)->ipv6_opt);
581         consume_skb(inet_rsk(req)->pktopts);
582 }
583
584 #ifdef CONFIG_TCP_MD5SIG
585 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
586                                                    const struct in6_addr *addr,
587                                                    int l3index)
588 {
589         return tcp_md5_do_lookup(sk, l3index,
590                                  (union tcp_md5_addr *)addr, AF_INET6);
591 }
592
593 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
594                                                 const struct sock *addr_sk)
595 {
596         int l3index;
597
598         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
599                                                  addr_sk->sk_bound_dev_if);
600         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
601                                     l3index);
602 }
603
604 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
605                                  sockptr_t optval, int optlen)
606 {
607         struct tcp_md5sig cmd;
608         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
609         int l3index = 0;
610         u8 prefixlen;
611         u8 flags;
612
613         if (optlen < sizeof(cmd))
614                 return -EINVAL;
615
616         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
617                 return -EFAULT;
618
619         if (sin6->sin6_family != AF_INET6)
620                 return -EINVAL;
621
622         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
623
624         if (optname == TCP_MD5SIG_EXT &&
625             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
626                 prefixlen = cmd.tcpm_prefixlen;
627                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
628                                         prefixlen > 32))
629                         return -EINVAL;
630         } else {
631                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
632         }
633
634         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
635             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
636                 struct net_device *dev;
637
638                 rcu_read_lock();
639                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
640                 if (dev && netif_is_l3_master(dev))
641                         l3index = dev->ifindex;
642                 rcu_read_unlock();
643
644                 /* ok to reference set/not set outside of rcu;
645                  * right now device MUST be an L3 master
646                  */
647                 if (!dev || !l3index)
648                         return -EINVAL;
649         }
650
651         if (!cmd.tcpm_keylen) {
652                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
653                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
654                                               AF_INET, prefixlen,
655                                               l3index, flags);
656                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
657                                       AF_INET6, prefixlen, l3index, flags);
658         }
659
660         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
661                 return -EINVAL;
662
663         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
664                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
665                                       AF_INET, prefixlen, l3index, flags,
666                                       cmd.tcpm_key, cmd.tcpm_keylen);
667
668         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
669                               AF_INET6, prefixlen, l3index, flags,
670                               cmd.tcpm_key, cmd.tcpm_keylen);
671 }
672
673 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
674                                    const struct in6_addr *daddr,
675                                    const struct in6_addr *saddr,
676                                    const struct tcphdr *th, int nbytes)
677 {
678         struct tcp6_pseudohdr *bp;
679         struct scatterlist sg;
680         struct tcphdr *_th;
681
682         bp = hp->scratch;
683         /* 1. TCP pseudo-header (RFC2460) */
684         bp->saddr = *saddr;
685         bp->daddr = *daddr;
686         bp->protocol = cpu_to_be32(IPPROTO_TCP);
687         bp->len = cpu_to_be32(nbytes);
688
689         _th = (struct tcphdr *)(bp + 1);
690         memcpy(_th, th, sizeof(*th));
691         _th->check = 0;
692
693         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
694         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
695                                 sizeof(*bp) + sizeof(*th));
696         return crypto_ahash_update(hp->md5_req);
697 }
698
699 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
700                                const struct in6_addr *daddr, struct in6_addr *saddr,
701                                const struct tcphdr *th)
702 {
703         struct tcp_md5sig_pool *hp;
704         struct ahash_request *req;
705
706         hp = tcp_get_md5sig_pool();
707         if (!hp)
708                 goto clear_hash_noput;
709         req = hp->md5_req;
710
711         if (crypto_ahash_init(req))
712                 goto clear_hash;
713         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
714                 goto clear_hash;
715         if (tcp_md5_hash_key(hp, key))
716                 goto clear_hash;
717         ahash_request_set_crypt(req, NULL, md5_hash, 0);
718         if (crypto_ahash_final(req))
719                 goto clear_hash;
720
721         tcp_put_md5sig_pool();
722         return 0;
723
724 clear_hash:
725         tcp_put_md5sig_pool();
726 clear_hash_noput:
727         memset(md5_hash, 0, 16);
728         return 1;
729 }
730
731 static int tcp_v6_md5_hash_skb(char *md5_hash,
732                                const struct tcp_md5sig_key *key,
733                                const struct sock *sk,
734                                const struct sk_buff *skb)
735 {
736         const struct in6_addr *saddr, *daddr;
737         struct tcp_md5sig_pool *hp;
738         struct ahash_request *req;
739         const struct tcphdr *th = tcp_hdr(skb);
740
741         if (sk) { /* valid for establish/request sockets */
742                 saddr = &sk->sk_v6_rcv_saddr;
743                 daddr = &sk->sk_v6_daddr;
744         } else {
745                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
746                 saddr = &ip6h->saddr;
747                 daddr = &ip6h->daddr;
748         }
749
750         hp = tcp_get_md5sig_pool();
751         if (!hp)
752                 goto clear_hash_noput;
753         req = hp->md5_req;
754
755         if (crypto_ahash_init(req))
756                 goto clear_hash;
757
758         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
759                 goto clear_hash;
760         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
761                 goto clear_hash;
762         if (tcp_md5_hash_key(hp, key))
763                 goto clear_hash;
764         ahash_request_set_crypt(req, NULL, md5_hash, 0);
765         if (crypto_ahash_final(req))
766                 goto clear_hash;
767
768         tcp_put_md5sig_pool();
769         return 0;
770
771 clear_hash:
772         tcp_put_md5sig_pool();
773 clear_hash_noput:
774         memset(md5_hash, 0, 16);
775         return 1;
776 }
777
778 #endif
779
780 static void tcp_v6_init_req(struct request_sock *req,
781                             const struct sock *sk_listener,
782                             struct sk_buff *skb)
783 {
784         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
785         struct inet_request_sock *ireq = inet_rsk(req);
786         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
787
788         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
789         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
790
791         /* So that link locals have meaning */
792         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
793             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
794                 ireq->ir_iif = tcp_v6_iif(skb);
795
796         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
797             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
798              np->rxopt.bits.rxinfo ||
799              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
800              np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) {
801                 refcount_inc(&skb->users);
802                 ireq->pktopts = skb;
803         }
804 }
805
806 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
807                                           struct sk_buff *skb,
808                                           struct flowi *fl,
809                                           struct request_sock *req)
810 {
811         tcp_v6_init_req(req, sk, skb);
812
813         if (security_inet_conn_request(sk, skb, req))
814                 return NULL;
815
816         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
817 }
818
819 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
820         .family         =       AF_INET6,
821         .obj_size       =       sizeof(struct tcp6_request_sock),
822         .rtx_syn_ack    =       tcp_rtx_synack,
823         .send_ack       =       tcp_v6_reqsk_send_ack,
824         .destructor     =       tcp_v6_reqsk_destructor,
825         .send_reset     =       tcp_v6_send_reset,
826         .syn_ack_timeout =      tcp_syn_ack_timeout,
827 };
828
829 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
830         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
831                                 sizeof(struct ipv6hdr),
832 #ifdef CONFIG_TCP_MD5SIG
833         .req_md5_lookup =       tcp_v6_md5_lookup,
834         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
835 #endif
836 #ifdef CONFIG_SYN_COOKIES
837         .cookie_init_seq =      cookie_v6_init_sequence,
838 #endif
839         .route_req      =       tcp_v6_route_req,
840         .init_seq       =       tcp_v6_init_seq,
841         .init_ts_off    =       tcp_v6_init_ts_off,
842         .send_synack    =       tcp_v6_send_synack,
843 };
844
845 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
846                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
847                                  int oif, struct tcp_md5sig_key *key, int rst,
848                                  u8 tclass, __be32 label, u32 priority, u32 txhash)
849 {
850         const struct tcphdr *th = tcp_hdr(skb);
851         struct tcphdr *t1;
852         struct sk_buff *buff;
853         struct flowi6 fl6;
854         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
855         struct sock *ctl_sk = net->ipv6.tcp_sk;
856         unsigned int tot_len = sizeof(struct tcphdr);
857         __be32 mrst = 0, *topt;
858         struct dst_entry *dst;
859         __u32 mark = 0;
860
861         if (tsecr)
862                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
863 #ifdef CONFIG_TCP_MD5SIG
864         if (key)
865                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
866 #endif
867
868 #ifdef CONFIG_MPTCP
869         if (rst && !key) {
870                 mrst = mptcp_reset_option(skb);
871
872                 if (mrst)
873                         tot_len += sizeof(__be32);
874         }
875 #endif
876
877         buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
878         if (!buff)
879                 return;
880
881         skb_reserve(buff, MAX_TCP_HEADER);
882
883         t1 = skb_push(buff, tot_len);
884         skb_reset_transport_header(buff);
885
886         /* Swap the send and the receive. */
887         memset(t1, 0, sizeof(*t1));
888         t1->dest = th->source;
889         t1->source = th->dest;
890         t1->doff = tot_len / 4;
891         t1->seq = htonl(seq);
892         t1->ack_seq = htonl(ack);
893         t1->ack = !rst || !th->ack;
894         t1->rst = rst;
895         t1->window = htons(win);
896
897         topt = (__be32 *)(t1 + 1);
898
899         if (tsecr) {
900                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
901                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
902                 *topt++ = htonl(tsval);
903                 *topt++ = htonl(tsecr);
904         }
905
906         if (mrst)
907                 *topt++ = mrst;
908
909 #ifdef CONFIG_TCP_MD5SIG
910         if (key) {
911                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
912                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
913                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
914                                     &ipv6_hdr(skb)->saddr,
915                                     &ipv6_hdr(skb)->daddr, t1);
916         }
917 #endif
918
919         memset(&fl6, 0, sizeof(fl6));
920         fl6.daddr = ipv6_hdr(skb)->saddr;
921         fl6.saddr = ipv6_hdr(skb)->daddr;
922         fl6.flowlabel = label;
923
924         buff->ip_summed = CHECKSUM_PARTIAL;
925
926         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
927
928         fl6.flowi6_proto = IPPROTO_TCP;
929         if (rt6_need_strict(&fl6.daddr) && !oif)
930                 fl6.flowi6_oif = tcp_v6_iif(skb);
931         else {
932                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
933                         oif = skb->skb_iif;
934
935                 fl6.flowi6_oif = oif;
936         }
937
938         if (sk) {
939                 if (sk->sk_state == TCP_TIME_WAIT)
940                         mark = inet_twsk(sk)->tw_mark;
941                 else
942                         mark = READ_ONCE(sk->sk_mark);
943                 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
944         }
945         if (txhash) {
946                 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
947                 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
948         }
949         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
950         fl6.fl6_dport = t1->dest;
951         fl6.fl6_sport = t1->source;
952         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
953         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
954
955         /* Pass a socket to ip6_dst_lookup either it is for RST
956          * Underlying function will use this to retrieve the network
957          * namespace
958          */
959         if (sk && sk->sk_state != TCP_TIME_WAIT)
960                 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
961         else
962                 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
963         if (!IS_ERR(dst)) {
964                 skb_dst_set(buff, dst);
965                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
966                          tclass & ~INET_ECN_MASK, priority);
967                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
968                 if (rst)
969                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
970                 return;
971         }
972
973         kfree_skb(buff);
974 }
975
976 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
977 {
978         const struct tcphdr *th = tcp_hdr(skb);
979         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
980         u32 seq = 0, ack_seq = 0;
981         struct tcp_md5sig_key *key = NULL;
982 #ifdef CONFIG_TCP_MD5SIG
983         const __u8 *hash_location = NULL;
984         unsigned char newhash[16];
985         int genhash;
986         struct sock *sk1 = NULL;
987 #endif
988         __be32 label = 0;
989         u32 priority = 0;
990         struct net *net;
991         u32 txhash = 0;
992         int oif = 0;
993
994         if (th->rst)
995                 return;
996
997         /* If sk not NULL, it means we did a successful lookup and incoming
998          * route had to be correct. prequeue might have dropped our dst.
999          */
1000         if (!sk && !ipv6_unicast_destination(skb))
1001                 return;
1002
1003         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1004 #ifdef CONFIG_TCP_MD5SIG
1005         rcu_read_lock();
1006         hash_location = tcp_parse_md5sig_option(th);
1007         if (sk && sk_fullsock(sk)) {
1008                 int l3index;
1009
1010                 /* sdif set, means packet ingressed via a device
1011                  * in an L3 domain and inet_iif is set to it.
1012                  */
1013                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1014                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1015         } else if (hash_location) {
1016                 int dif = tcp_v6_iif_l3_slave(skb);
1017                 int sdif = tcp_v6_sdif(skb);
1018                 int l3index;
1019
1020                 /*
1021                  * active side is lost. Try to find listening socket through
1022                  * source port, and then find md5 key through listening socket.
1023                  * we are not loose security here:
1024                  * Incoming packet is checked with md5 hash with finding key,
1025                  * no RST generated if md5 hash doesn't match.
1026                  */
1027                 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1028                                             NULL, 0, &ipv6h->saddr, th->source,
1029                                             &ipv6h->daddr, ntohs(th->source),
1030                                             dif, sdif);
1031                 if (!sk1)
1032                         goto out;
1033
1034                 /* sdif set, means packet ingressed via a device
1035                  * in an L3 domain and dif is set to it.
1036                  */
1037                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1038
1039                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1040                 if (!key)
1041                         goto out;
1042
1043                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1044                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1045                         goto out;
1046         }
1047 #endif
1048
1049         if (th->ack)
1050                 seq = ntohl(th->ack_seq);
1051         else
1052                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1053                           (th->doff << 2);
1054
1055         if (sk) {
1056                 oif = sk->sk_bound_dev_if;
1057                 if (sk_fullsock(sk)) {
1058                         trace_tcp_send_reset(sk, skb);
1059                         if (inet6_test_bit(REPFLOW, sk))
1060                                 label = ip6_flowlabel(ipv6h);
1061                         priority = READ_ONCE(sk->sk_priority);
1062                         txhash = sk->sk_txhash;
1063                 }
1064                 if (sk->sk_state == TCP_TIME_WAIT) {
1065                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1066                         priority = inet_twsk(sk)->tw_priority;
1067                         txhash = inet_twsk(sk)->tw_txhash;
1068                 }
1069         } else {
1070                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1071                         label = ip6_flowlabel(ipv6h);
1072         }
1073
1074         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1075                              ipv6_get_dsfield(ipv6h), label, priority, txhash);
1076
1077 #ifdef CONFIG_TCP_MD5SIG
1078 out:
1079         rcu_read_unlock();
1080 #endif
1081 }
1082
1083 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1084                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1085                             struct tcp_md5sig_key *key, u8 tclass,
1086                             __be32 label, u32 priority, u32 txhash)
1087 {
1088         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1089                              tclass, label, priority, txhash);
1090 }
1091
1092 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1093 {
1094         struct inet_timewait_sock *tw = inet_twsk(sk);
1095         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1096
1097         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1098                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1099                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1100                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1101                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1102                         tw->tw_txhash);
1103
1104         inet_twsk_put(tw);
1105 }
1106
1107 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1108                                   struct request_sock *req)
1109 {
1110         int l3index;
1111
1112         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1113
1114         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1115          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1116          */
1117         /* RFC 7323 2.3
1118          * The window field (SEG.WND) of every outgoing segment, with the
1119          * exception of <SYN> segments, MUST be right-shifted by
1120          * Rcv.Wind.Shift bits:
1121          */
1122         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1123                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1124                         tcp_rsk(req)->rcv_nxt,
1125                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1126                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1127                         READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1128                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1129                         ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1130                         READ_ONCE(sk->sk_priority),
1131                         READ_ONCE(tcp_rsk(req)->txhash));
1132 }
1133
1134
1135 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1136 {
1137 #ifdef CONFIG_SYN_COOKIES
1138         const struct tcphdr *th = tcp_hdr(skb);
1139
1140         if (!th->syn)
1141                 sk = cookie_v6_check(sk, skb);
1142 #endif
1143         return sk;
1144 }
1145
1146 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1147                          struct tcphdr *th, u32 *cookie)
1148 {
1149         u16 mss = 0;
1150 #ifdef CONFIG_SYN_COOKIES
1151         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1152                                     &tcp_request_sock_ipv6_ops, sk, th);
1153         if (mss) {
1154                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1155                 tcp_synq_overflow(sk);
1156         }
1157 #endif
1158         return mss;
1159 }
1160
1161 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1162 {
1163         if (skb->protocol == htons(ETH_P_IP))
1164                 return tcp_v4_conn_request(sk, skb);
1165
1166         if (!ipv6_unicast_destination(skb))
1167                 goto drop;
1168
1169         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1170                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1171                 return 0;
1172         }
1173
1174         return tcp_conn_request(&tcp6_request_sock_ops,
1175                                 &tcp_request_sock_ipv6_ops, sk, skb);
1176
1177 drop:
1178         tcp_listendrop(sk);
1179         return 0; /* don't send reset */
1180 }
1181
1182 static void tcp_v6_restore_cb(struct sk_buff *skb)
1183 {
1184         /* We need to move header back to the beginning if xfrm6_policy_check()
1185          * and tcp_v6_fill_cb() are going to be called again.
1186          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1187          */
1188         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1189                 sizeof(struct inet6_skb_parm));
1190 }
1191
1192 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1193                                          struct request_sock *req,
1194                                          struct dst_entry *dst,
1195                                          struct request_sock *req_unhash,
1196                                          bool *own_req)
1197 {
1198         struct inet_request_sock *ireq;
1199         struct ipv6_pinfo *newnp;
1200         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1201         struct ipv6_txoptions *opt;
1202         struct inet_sock *newinet;
1203         bool found_dup_sk = false;
1204         struct tcp_sock *newtp;
1205         struct sock *newsk;
1206 #ifdef CONFIG_TCP_MD5SIG
1207         struct tcp_md5sig_key *key;
1208         int l3index;
1209 #endif
1210         struct flowi6 fl6;
1211
1212         if (skb->protocol == htons(ETH_P_IP)) {
1213                 /*
1214                  *      v6 mapped
1215                  */
1216
1217                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1218                                              req_unhash, own_req);
1219
1220                 if (!newsk)
1221                         return NULL;
1222
1223                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1224
1225                 newnp = tcp_inet6_sk(newsk);
1226                 newtp = tcp_sk(newsk);
1227
1228                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1229
1230                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1231
1232                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1233                 if (sk_is_mptcp(newsk))
1234                         mptcpv6_handle_mapped(newsk, true);
1235                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1236 #ifdef CONFIG_TCP_MD5SIG
1237                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1238 #endif
1239
1240                 newnp->ipv6_mc_list = NULL;
1241                 newnp->ipv6_ac_list = NULL;
1242                 newnp->ipv6_fl_list = NULL;
1243                 newnp->pktoptions  = NULL;
1244                 newnp->opt         = NULL;
1245                 newnp->mcast_oif   = inet_iif(skb);
1246                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1247                 newnp->rcv_flowinfo = 0;
1248                 if (inet6_test_bit(REPFLOW, sk))
1249                         newnp->flow_label = 0;
1250
1251                 /*
1252                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1253                  * here, tcp_create_openreq_child now does this for us, see the comment in
1254                  * that function for the gory details. -acme
1255                  */
1256
1257                 /* It is tricky place. Until this moment IPv4 tcp
1258                    worked with IPv6 icsk.icsk_af_ops.
1259                    Sync it now.
1260                  */
1261                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1262
1263                 return newsk;
1264         }
1265
1266         ireq = inet_rsk(req);
1267
1268         if (sk_acceptq_is_full(sk))
1269                 goto out_overflow;
1270
1271         if (!dst) {
1272                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1273                 if (!dst)
1274                         goto out;
1275         }
1276
1277         newsk = tcp_create_openreq_child(sk, req, skb);
1278         if (!newsk)
1279                 goto out_nonewsk;
1280
1281         /*
1282          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1283          * count here, tcp_create_openreq_child now does this for us, see the
1284          * comment in that function for the gory details. -acme
1285          */
1286
1287         newsk->sk_gso_type = SKB_GSO_TCPV6;
1288         ip6_dst_store(newsk, dst, NULL, NULL);
1289         inet6_sk_rx_dst_set(newsk, skb);
1290
1291         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1292
1293         newtp = tcp_sk(newsk);
1294         newinet = inet_sk(newsk);
1295         newnp = tcp_inet6_sk(newsk);
1296
1297         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1298
1299         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1300         newnp->saddr = ireq->ir_v6_loc_addr;
1301         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1302         newsk->sk_bound_dev_if = ireq->ir_iif;
1303
1304         /* Now IPv6 options...
1305
1306            First: no IPv4 options.
1307          */
1308         newinet->inet_opt = NULL;
1309         newnp->ipv6_mc_list = NULL;
1310         newnp->ipv6_ac_list = NULL;
1311         newnp->ipv6_fl_list = NULL;
1312
1313         /* Clone RX bits */
1314         newnp->rxopt.all = np->rxopt.all;
1315
1316         newnp->pktoptions = NULL;
1317         newnp->opt        = NULL;
1318         newnp->mcast_oif  = tcp_v6_iif(skb);
1319         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1320         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1321         if (inet6_test_bit(REPFLOW, sk))
1322                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1323
1324         /* Set ToS of the new socket based upon the value of incoming SYN.
1325          * ECT bits are set later in tcp_init_transfer().
1326          */
1327         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1328                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1329
1330         /* Clone native IPv6 options from listening socket (if any)
1331
1332            Yes, keeping reference count would be much more clever,
1333            but we make one more one thing there: reattach optmem
1334            to newsk.
1335          */
1336         opt = ireq->ipv6_opt;
1337         if (!opt)
1338                 opt = rcu_dereference(np->opt);
1339         if (opt) {
1340                 opt = ipv6_dup_options(newsk, opt);
1341                 RCU_INIT_POINTER(newnp->opt, opt);
1342         }
1343         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1344         if (opt)
1345                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1346                                                     opt->opt_flen;
1347
1348         tcp_ca_openreq_child(newsk, dst);
1349
1350         tcp_sync_mss(newsk, dst_mtu(dst));
1351         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1352
1353         tcp_initialize_rcv_mss(newsk);
1354
1355         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1356         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1357
1358 #ifdef CONFIG_TCP_MD5SIG
1359         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1360
1361         /* Copy over the MD5 key from the original socket */
1362         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1363         if (key) {
1364                 const union tcp_md5_addr *addr;
1365
1366                 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1367                 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1368                         inet_csk_prepare_forced_close(newsk);
1369                         tcp_done(newsk);
1370                         goto out;
1371                 }
1372         }
1373 #endif
1374
1375         if (__inet_inherit_port(sk, newsk) < 0) {
1376                 inet_csk_prepare_forced_close(newsk);
1377                 tcp_done(newsk);
1378                 goto out;
1379         }
1380         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1381                                        &found_dup_sk);
1382         if (*own_req) {
1383                 tcp_move_syn(newtp, req);
1384
1385                 /* Clone pktoptions received with SYN, if we own the req */
1386                 if (ireq->pktopts) {
1387                         newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1388                         consume_skb(ireq->pktopts);
1389                         ireq->pktopts = NULL;
1390                         if (newnp->pktoptions)
1391                                 tcp_v6_restore_cb(newnp->pktoptions);
1392                 }
1393         } else {
1394                 if (!req_unhash && found_dup_sk) {
1395                         /* This code path should only be executed in the
1396                          * syncookie case only
1397                          */
1398                         bh_unlock_sock(newsk);
1399                         sock_put(newsk);
1400                         newsk = NULL;
1401                 }
1402         }
1403
1404         return newsk;
1405
1406 out_overflow:
1407         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1408 out_nonewsk:
1409         dst_release(dst);
1410 out:
1411         tcp_listendrop(sk);
1412         return NULL;
1413 }
1414
1415 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1416                                                            u32));
1417 /* The socket must have it's spinlock held when we get
1418  * here, unless it is a TCP_LISTEN socket.
1419  *
1420  * We have a potential double-lock case here, so even when
1421  * doing backlog processing we use the BH locking scheme.
1422  * This is because we cannot sleep with the original spinlock
1423  * held.
1424  */
1425 INDIRECT_CALLABLE_SCOPE
1426 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1427 {
1428         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1429         struct sk_buff *opt_skb = NULL;
1430         enum skb_drop_reason reason;
1431         struct tcp_sock *tp;
1432
1433         /* Imagine: socket is IPv6. IPv4 packet arrives,
1434            goes to IPv4 receive handler and backlogged.
1435            From backlog it always goes here. Kerboom...
1436            Fortunately, tcp_rcv_established and rcv_established
1437            handle them correctly, but it is not case with
1438            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1439          */
1440
1441         if (skb->protocol == htons(ETH_P_IP))
1442                 return tcp_v4_do_rcv(sk, skb);
1443
1444         /*
1445          *      socket locking is here for SMP purposes as backlog rcv
1446          *      is currently called with bh processing disabled.
1447          */
1448
1449         /* Do Stevens' IPV6_PKTOPTIONS.
1450
1451            Yes, guys, it is the only place in our code, where we
1452            may make it not affecting IPv4.
1453            The rest of code is protocol independent,
1454            and I do not like idea to uglify IPv4.
1455
1456            Actually, all the idea behind IPV6_PKTOPTIONS
1457            looks not very well thought. For now we latch
1458            options, received in the last packet, enqueued
1459            by tcp. Feel free to propose better solution.
1460                                                --ANK (980728)
1461          */
1462         if (np->rxopt.all)
1463                 opt_skb = skb_clone_and_charge_r(skb, sk);
1464
1465         reason = SKB_DROP_REASON_NOT_SPECIFIED;
1466         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1467                 struct dst_entry *dst;
1468
1469                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1470                                                 lockdep_sock_is_held(sk));
1471
1472                 sock_rps_save_rxhash(sk, skb);
1473                 sk_mark_napi_id(sk, skb);
1474                 if (dst) {
1475                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1476                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1477                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1478                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1479                                 dst_release(dst);
1480                         }
1481                 }
1482
1483                 tcp_rcv_established(sk, skb);
1484                 if (opt_skb)
1485                         goto ipv6_pktoptions;
1486                 return 0;
1487         }
1488
1489         if (tcp_checksum_complete(skb))
1490                 goto csum_err;
1491
1492         if (sk->sk_state == TCP_LISTEN) {
1493                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1494
1495                 if (!nsk)
1496                         goto discard;
1497
1498                 if (nsk != sk) {
1499                         if (tcp_child_process(sk, nsk, skb))
1500                                 goto reset;
1501                         if (opt_skb)
1502                                 __kfree_skb(opt_skb);
1503                         return 0;
1504                 }
1505         } else
1506                 sock_rps_save_rxhash(sk, skb);
1507
1508         if (tcp_rcv_state_process(sk, skb))
1509                 goto reset;
1510         if (opt_skb)
1511                 goto ipv6_pktoptions;
1512         return 0;
1513
1514 reset:
1515         tcp_v6_send_reset(sk, skb);
1516 discard:
1517         if (opt_skb)
1518                 __kfree_skb(opt_skb);
1519         kfree_skb_reason(skb, reason);
1520         return 0;
1521 csum_err:
1522         reason = SKB_DROP_REASON_TCP_CSUM;
1523         trace_tcp_bad_csum(skb);
1524         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1525         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1526         goto discard;
1527
1528
1529 ipv6_pktoptions:
1530         /* Do you ask, what is it?
1531
1532            1. skb was enqueued by tcp.
1533            2. skb is added to tail of read queue, rather than out of order.
1534            3. socket is not in passive state.
1535            4. Finally, it really contains options, which user wants to receive.
1536          */
1537         tp = tcp_sk(sk);
1538         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1539             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1540                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1541                         np->mcast_oif = tcp_v6_iif(opt_skb);
1542                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1543                         WRITE_ONCE(np->mcast_hops,
1544                                    ipv6_hdr(opt_skb)->hop_limit);
1545                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1546                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1547                 if (inet6_test_bit(REPFLOW, sk))
1548                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1549                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1550                         tcp_v6_restore_cb(opt_skb);
1551                         opt_skb = xchg(&np->pktoptions, opt_skb);
1552                 } else {
1553                         __kfree_skb(opt_skb);
1554                         opt_skb = xchg(&np->pktoptions, NULL);
1555                 }
1556         }
1557
1558         consume_skb(opt_skb);
1559         return 0;
1560 }
1561
1562 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1563                            const struct tcphdr *th)
1564 {
1565         /* This is tricky: we move IP6CB at its correct location into
1566          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1567          * _decode_session6() uses IP6CB().
1568          * barrier() makes sure compiler won't play aliasing games.
1569          */
1570         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1571                 sizeof(struct inet6_skb_parm));
1572         barrier();
1573
1574         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1575         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1576                                     skb->len - th->doff*4);
1577         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1578         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1579         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1580         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1581         TCP_SKB_CB(skb)->sacked = 0;
1582         TCP_SKB_CB(skb)->has_rxtstamp =
1583                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1584 }
1585
1586 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1587 {
1588         enum skb_drop_reason drop_reason;
1589         int sdif = inet6_sdif(skb);
1590         int dif = inet6_iif(skb);
1591         const struct tcphdr *th;
1592         const struct ipv6hdr *hdr;
1593         bool refcounted;
1594         struct sock *sk;
1595         int ret;
1596         struct net *net = dev_net(skb->dev);
1597
1598         drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1599         if (skb->pkt_type != PACKET_HOST)
1600                 goto discard_it;
1601
1602         /*
1603          *      Count it even if it's bad.
1604          */
1605         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1606
1607         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1608                 goto discard_it;
1609
1610         th = (const struct tcphdr *)skb->data;
1611
1612         if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1613                 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1614                 goto bad_packet;
1615         }
1616         if (!pskb_may_pull(skb, th->doff*4))
1617                 goto discard_it;
1618
1619         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1620                 goto csum_error;
1621
1622         th = (const struct tcphdr *)skb->data;
1623         hdr = ipv6_hdr(skb);
1624
1625 lookup:
1626         sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1627                                 th->source, th->dest, inet6_iif(skb), sdif,
1628                                 &refcounted);
1629         if (!sk)
1630                 goto no_tcp_socket;
1631
1632 process:
1633         if (sk->sk_state == TCP_TIME_WAIT)
1634                 goto do_time_wait;
1635
1636         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1637                 struct request_sock *req = inet_reqsk(sk);
1638                 bool req_stolen = false;
1639                 struct sock *nsk;
1640
1641                 sk = req->rsk_listener;
1642                 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1643                         drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1644                 else
1645                         drop_reason = tcp_inbound_md5_hash(sk, skb,
1646                                                            &hdr->saddr, &hdr->daddr,
1647                                                            AF_INET6, dif, sdif);
1648                 if (drop_reason) {
1649                         sk_drops_add(sk, skb);
1650                         reqsk_put(req);
1651                         goto discard_it;
1652                 }
1653                 if (tcp_checksum_complete(skb)) {
1654                         reqsk_put(req);
1655                         goto csum_error;
1656                 }
1657                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1658                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1659                         if (!nsk) {
1660                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1661                                 goto lookup;
1662                         }
1663                         sk = nsk;
1664                         /* reuseport_migrate_sock() has already held one sk_refcnt
1665                          * before returning.
1666                          */
1667                 } else {
1668                         sock_hold(sk);
1669                 }
1670                 refcounted = true;
1671                 nsk = NULL;
1672                 if (!tcp_filter(sk, skb)) {
1673                         th = (const struct tcphdr *)skb->data;
1674                         hdr = ipv6_hdr(skb);
1675                         tcp_v6_fill_cb(skb, hdr, th);
1676                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1677                 } else {
1678                         drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1679                 }
1680                 if (!nsk) {
1681                         reqsk_put(req);
1682                         if (req_stolen) {
1683                                 /* Another cpu got exclusive access to req
1684                                  * and created a full blown socket.
1685                                  * Try to feed this packet to this socket
1686                                  * instead of discarding it.
1687                                  */
1688                                 tcp_v6_restore_cb(skb);
1689                                 sock_put(sk);
1690                                 goto lookup;
1691                         }
1692                         goto discard_and_relse;
1693                 }
1694                 nf_reset_ct(skb);
1695                 if (nsk == sk) {
1696                         reqsk_put(req);
1697                         tcp_v6_restore_cb(skb);
1698                 } else if (tcp_child_process(sk, nsk, skb)) {
1699                         tcp_v6_send_reset(nsk, skb);
1700                         goto discard_and_relse;
1701                 } else {
1702                         sock_put(sk);
1703                         return 0;
1704                 }
1705         }
1706
1707         if (static_branch_unlikely(&ip6_min_hopcount)) {
1708                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1709                 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1710                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1711                         drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1712                         goto discard_and_relse;
1713                 }
1714         }
1715
1716         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1717                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1718                 goto discard_and_relse;
1719         }
1720
1721         drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1722                                            AF_INET6, dif, sdif);
1723         if (drop_reason)
1724                 goto discard_and_relse;
1725
1726         nf_reset_ct(skb);
1727
1728         if (tcp_filter(sk, skb)) {
1729                 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1730                 goto discard_and_relse;
1731         }
1732         th = (const struct tcphdr *)skb->data;
1733         hdr = ipv6_hdr(skb);
1734         tcp_v6_fill_cb(skb, hdr, th);
1735
1736         skb->dev = NULL;
1737
1738         if (sk->sk_state == TCP_LISTEN) {
1739                 ret = tcp_v6_do_rcv(sk, skb);
1740                 goto put_and_return;
1741         }
1742
1743         sk_incoming_cpu_update(sk);
1744
1745         bh_lock_sock_nested(sk);
1746         tcp_segs_in(tcp_sk(sk), skb);
1747         ret = 0;
1748         if (!sock_owned_by_user(sk)) {
1749                 ret = tcp_v6_do_rcv(sk, skb);
1750         } else {
1751                 if (tcp_add_backlog(sk, skb, &drop_reason))
1752                         goto discard_and_relse;
1753         }
1754         bh_unlock_sock(sk);
1755 put_and_return:
1756         if (refcounted)
1757                 sock_put(sk);
1758         return ret ? -1 : 0;
1759
1760 no_tcp_socket:
1761         drop_reason = SKB_DROP_REASON_NO_SOCKET;
1762         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1763                 goto discard_it;
1764
1765         tcp_v6_fill_cb(skb, hdr, th);
1766
1767         if (tcp_checksum_complete(skb)) {
1768 csum_error:
1769                 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1770                 trace_tcp_bad_csum(skb);
1771                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1772 bad_packet:
1773                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1774         } else {
1775                 tcp_v6_send_reset(NULL, skb);
1776         }
1777
1778 discard_it:
1779         SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1780         kfree_skb_reason(skb, drop_reason);
1781         return 0;
1782
1783 discard_and_relse:
1784         sk_drops_add(sk, skb);
1785         if (refcounted)
1786                 sock_put(sk);
1787         goto discard_it;
1788
1789 do_time_wait:
1790         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1791                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1792                 inet_twsk_put(inet_twsk(sk));
1793                 goto discard_it;
1794         }
1795
1796         tcp_v6_fill_cb(skb, hdr, th);
1797
1798         if (tcp_checksum_complete(skb)) {
1799                 inet_twsk_put(inet_twsk(sk));
1800                 goto csum_error;
1801         }
1802
1803         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1804         case TCP_TW_SYN:
1805         {
1806                 struct sock *sk2;
1807
1808                 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1809                                             skb, __tcp_hdrlen(th),
1810                                             &ipv6_hdr(skb)->saddr, th->source,
1811                                             &ipv6_hdr(skb)->daddr,
1812                                             ntohs(th->dest),
1813                                             tcp_v6_iif_l3_slave(skb),
1814                                             sdif);
1815                 if (sk2) {
1816                         struct inet_timewait_sock *tw = inet_twsk(sk);
1817                         inet_twsk_deschedule_put(tw);
1818                         sk = sk2;
1819                         tcp_v6_restore_cb(skb);
1820                         refcounted = false;
1821                         goto process;
1822                 }
1823         }
1824                 /* to ACK */
1825                 fallthrough;
1826         case TCP_TW_ACK:
1827                 tcp_v6_timewait_ack(sk, skb);
1828                 break;
1829         case TCP_TW_RST:
1830                 tcp_v6_send_reset(sk, skb);
1831                 inet_twsk_deschedule_put(inet_twsk(sk));
1832                 goto discard_it;
1833         case TCP_TW_SUCCESS:
1834                 ;
1835         }
1836         goto discard_it;
1837 }
1838
1839 void tcp_v6_early_demux(struct sk_buff *skb)
1840 {
1841         struct net *net = dev_net(skb->dev);
1842         const struct ipv6hdr *hdr;
1843         const struct tcphdr *th;
1844         struct sock *sk;
1845
1846         if (skb->pkt_type != PACKET_HOST)
1847                 return;
1848
1849         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1850                 return;
1851
1852         hdr = ipv6_hdr(skb);
1853         th = tcp_hdr(skb);
1854
1855         if (th->doff < sizeof(struct tcphdr) / 4)
1856                 return;
1857
1858         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1859         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1860                                         &hdr->saddr, th->source,
1861                                         &hdr->daddr, ntohs(th->dest),
1862                                         inet6_iif(skb), inet6_sdif(skb));
1863         if (sk) {
1864                 skb->sk = sk;
1865                 skb->destructor = sock_edemux;
1866                 if (sk_fullsock(sk)) {
1867                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1868
1869                         if (dst)
1870                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1871                         if (dst &&
1872                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1873                                 skb_dst_set_noref(skb, dst);
1874                 }
1875         }
1876 }
1877
1878 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1879         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1880         .twsk_unique    = tcp_twsk_unique,
1881         .twsk_destructor = tcp_twsk_destructor,
1882 };
1883
1884 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1885 {
1886         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1887 }
1888
1889 const struct inet_connection_sock_af_ops ipv6_specific = {
1890         .queue_xmit        = inet6_csk_xmit,
1891         .send_check        = tcp_v6_send_check,
1892         .rebuild_header    = inet6_sk_rebuild_header,
1893         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1894         .conn_request      = tcp_v6_conn_request,
1895         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1896         .net_header_len    = sizeof(struct ipv6hdr),
1897         .net_frag_header_len = sizeof(struct frag_hdr),
1898         .setsockopt        = ipv6_setsockopt,
1899         .getsockopt        = ipv6_getsockopt,
1900         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1901         .sockaddr_len      = sizeof(struct sockaddr_in6),
1902         .mtu_reduced       = tcp_v6_mtu_reduced,
1903 };
1904
1905 #ifdef CONFIG_TCP_MD5SIG
1906 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1907         .md5_lookup     =       tcp_v6_md5_lookup,
1908         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1909         .md5_parse      =       tcp_v6_parse_md5_keys,
1910 };
1911 #endif
1912
1913 /*
1914  *      TCP over IPv4 via INET6 API
1915  */
1916 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1917         .queue_xmit        = ip_queue_xmit,
1918         .send_check        = tcp_v4_send_check,
1919         .rebuild_header    = inet_sk_rebuild_header,
1920         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1921         .conn_request      = tcp_v6_conn_request,
1922         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1923         .net_header_len    = sizeof(struct iphdr),
1924         .setsockopt        = ipv6_setsockopt,
1925         .getsockopt        = ipv6_getsockopt,
1926         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1927         .sockaddr_len      = sizeof(struct sockaddr_in6),
1928         .mtu_reduced       = tcp_v4_mtu_reduced,
1929 };
1930
1931 #ifdef CONFIG_TCP_MD5SIG
1932 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1933         .md5_lookup     =       tcp_v4_md5_lookup,
1934         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1935         .md5_parse      =       tcp_v6_parse_md5_keys,
1936 };
1937 #endif
1938
1939 /* NOTE: A lot of things set to zero explicitly by call to
1940  *       sk_alloc() so need not be done here.
1941  */
1942 static int tcp_v6_init_sock(struct sock *sk)
1943 {
1944         struct inet_connection_sock *icsk = inet_csk(sk);
1945
1946         tcp_init_sock(sk);
1947
1948         icsk->icsk_af_ops = &ipv6_specific;
1949
1950 #ifdef CONFIG_TCP_MD5SIG
1951         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1952 #endif
1953
1954         return 0;
1955 }
1956
1957 #ifdef CONFIG_PROC_FS
1958 /* Proc filesystem TCPv6 sock list dumping. */
1959 static void get_openreq6(struct seq_file *seq,
1960                          const struct request_sock *req, int i)
1961 {
1962         long ttd = req->rsk_timer.expires - jiffies;
1963         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1964         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1965
1966         if (ttd < 0)
1967                 ttd = 0;
1968
1969         seq_printf(seq,
1970                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1971                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1972                    i,
1973                    src->s6_addr32[0], src->s6_addr32[1],
1974                    src->s6_addr32[2], src->s6_addr32[3],
1975                    inet_rsk(req)->ir_num,
1976                    dest->s6_addr32[0], dest->s6_addr32[1],
1977                    dest->s6_addr32[2], dest->s6_addr32[3],
1978                    ntohs(inet_rsk(req)->ir_rmt_port),
1979                    TCP_SYN_RECV,
1980                    0, 0, /* could print option size, but that is af dependent. */
1981                    1,   /* timers active (only the expire timer) */
1982                    jiffies_to_clock_t(ttd),
1983                    req->num_timeout,
1984                    from_kuid_munged(seq_user_ns(seq),
1985                                     sock_i_uid(req->rsk_listener)),
1986                    0,  /* non standard timer */
1987                    0, /* open_requests have no inode */
1988                    0, req);
1989 }
1990
1991 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1992 {
1993         const struct in6_addr *dest, *src;
1994         __u16 destp, srcp;
1995         int timer_active;
1996         unsigned long timer_expires;
1997         const struct inet_sock *inet = inet_sk(sp);
1998         const struct tcp_sock *tp = tcp_sk(sp);
1999         const struct inet_connection_sock *icsk = inet_csk(sp);
2000         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2001         int rx_queue;
2002         int state;
2003
2004         dest  = &sp->sk_v6_daddr;
2005         src   = &sp->sk_v6_rcv_saddr;
2006         destp = ntohs(inet->inet_dport);
2007         srcp  = ntohs(inet->inet_sport);
2008
2009         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2010             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2011             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2012                 timer_active    = 1;
2013                 timer_expires   = icsk->icsk_timeout;
2014         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2015                 timer_active    = 4;
2016                 timer_expires   = icsk->icsk_timeout;
2017         } else if (timer_pending(&sp->sk_timer)) {
2018                 timer_active    = 2;
2019                 timer_expires   = sp->sk_timer.expires;
2020         } else {
2021                 timer_active    = 0;
2022                 timer_expires = jiffies;
2023         }
2024
2025         state = inet_sk_state_load(sp);
2026         if (state == TCP_LISTEN)
2027                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2028         else
2029                 /* Because we don't lock the socket,
2030                  * we might find a transient negative value.
2031                  */
2032                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2033                                       READ_ONCE(tp->copied_seq), 0);
2034
2035         seq_printf(seq,
2036                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2037                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2038                    i,
2039                    src->s6_addr32[0], src->s6_addr32[1],
2040                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2041                    dest->s6_addr32[0], dest->s6_addr32[1],
2042                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2043                    state,
2044                    READ_ONCE(tp->write_seq) - tp->snd_una,
2045                    rx_queue,
2046                    timer_active,
2047                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2048                    icsk->icsk_retransmits,
2049                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2050                    icsk->icsk_probes_out,
2051                    sock_i_ino(sp),
2052                    refcount_read(&sp->sk_refcnt), sp,
2053                    jiffies_to_clock_t(icsk->icsk_rto),
2054                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2055                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2056                    tcp_snd_cwnd(tp),
2057                    state == TCP_LISTEN ?
2058                         fastopenq->max_qlen :
2059                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2060                    );
2061 }
2062
2063 static void get_timewait6_sock(struct seq_file *seq,
2064                                struct inet_timewait_sock *tw, int i)
2065 {
2066         long delta = tw->tw_timer.expires - jiffies;
2067         const struct in6_addr *dest, *src;
2068         __u16 destp, srcp;
2069
2070         dest = &tw->tw_v6_daddr;
2071         src  = &tw->tw_v6_rcv_saddr;
2072         destp = ntohs(tw->tw_dport);
2073         srcp  = ntohs(tw->tw_sport);
2074
2075         seq_printf(seq,
2076                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2077                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2078                    i,
2079                    src->s6_addr32[0], src->s6_addr32[1],
2080                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2081                    dest->s6_addr32[0], dest->s6_addr32[1],
2082                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2083                    tw->tw_substate, 0, 0,
2084                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2085                    refcount_read(&tw->tw_refcnt), tw);
2086 }
2087
2088 static int tcp6_seq_show(struct seq_file *seq, void *v)
2089 {
2090         struct tcp_iter_state *st;
2091         struct sock *sk = v;
2092
2093         if (v == SEQ_START_TOKEN) {
2094                 seq_puts(seq,
2095                          "  sl  "
2096                          "local_address                         "
2097                          "remote_address                        "
2098                          "st tx_queue rx_queue tr tm->when retrnsmt"
2099                          "   uid  timeout inode\n");
2100                 goto out;
2101         }
2102         st = seq->private;
2103
2104         if (sk->sk_state == TCP_TIME_WAIT)
2105                 get_timewait6_sock(seq, v, st->num);
2106         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2107                 get_openreq6(seq, v, st->num);
2108         else
2109                 get_tcp6_sock(seq, v, st->num);
2110 out:
2111         return 0;
2112 }
2113
2114 static const struct seq_operations tcp6_seq_ops = {
2115         .show           = tcp6_seq_show,
2116         .start          = tcp_seq_start,
2117         .next           = tcp_seq_next,
2118         .stop           = tcp_seq_stop,
2119 };
2120
2121 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2122         .family         = AF_INET6,
2123 };
2124
2125 int __net_init tcp6_proc_init(struct net *net)
2126 {
2127         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2128                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2129                 return -ENOMEM;
2130         return 0;
2131 }
2132
2133 void tcp6_proc_exit(struct net *net)
2134 {
2135         remove_proc_entry("tcp6", net->proc_net);
2136 }
2137 #endif
2138
2139 struct proto tcpv6_prot = {
2140         .name                   = "TCPv6",
2141         .owner                  = THIS_MODULE,
2142         .close                  = tcp_close,
2143         .pre_connect            = tcp_v6_pre_connect,
2144         .connect                = tcp_v6_connect,
2145         .disconnect             = tcp_disconnect,
2146         .accept                 = inet_csk_accept,
2147         .ioctl                  = tcp_ioctl,
2148         .init                   = tcp_v6_init_sock,
2149         .destroy                = tcp_v4_destroy_sock,
2150         .shutdown               = tcp_shutdown,
2151         .setsockopt             = tcp_setsockopt,
2152         .getsockopt             = tcp_getsockopt,
2153         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2154         .keepalive              = tcp_set_keepalive,
2155         .recvmsg                = tcp_recvmsg,
2156         .sendmsg                = tcp_sendmsg,
2157         .splice_eof             = tcp_splice_eof,
2158         .backlog_rcv            = tcp_v6_do_rcv,
2159         .release_cb             = tcp_release_cb,
2160         .hash                   = inet6_hash,
2161         .unhash                 = inet_unhash,
2162         .get_port               = inet_csk_get_port,
2163         .put_port               = inet_put_port,
2164 #ifdef CONFIG_BPF_SYSCALL
2165         .psock_update_sk_prot   = tcp_bpf_update_proto,
2166 #endif
2167         .enter_memory_pressure  = tcp_enter_memory_pressure,
2168         .leave_memory_pressure  = tcp_leave_memory_pressure,
2169         .stream_memory_free     = tcp_stream_memory_free,
2170         .sockets_allocated      = &tcp_sockets_allocated,
2171
2172         .memory_allocated       = &tcp_memory_allocated,
2173         .per_cpu_fw_alloc       = &tcp_memory_per_cpu_fw_alloc,
2174
2175         .memory_pressure        = &tcp_memory_pressure,
2176         .orphan_count           = &tcp_orphan_count,
2177         .sysctl_mem             = sysctl_tcp_mem,
2178         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2179         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2180         .max_header             = MAX_TCP_HEADER,
2181         .obj_size               = sizeof(struct tcp6_sock),
2182         .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2183         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2184         .twsk_prot              = &tcp6_timewait_sock_ops,
2185         .rsk_prot               = &tcp6_request_sock_ops,
2186         .h.hashinfo             = NULL,
2187         .no_autobind            = true,
2188         .diag_destroy           = tcp_abort,
2189 };
2190 EXPORT_SYMBOL_GPL(tcpv6_prot);
2191
2192 static const struct inet6_protocol tcpv6_protocol = {
2193         .handler        =       tcp_v6_rcv,
2194         .err_handler    =       tcp_v6_err,
2195         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2196 };
2197
2198 static struct inet_protosw tcpv6_protosw = {
2199         .type           =       SOCK_STREAM,
2200         .protocol       =       IPPROTO_TCP,
2201         .prot           =       &tcpv6_prot,
2202         .ops            =       &inet6_stream_ops,
2203         .flags          =       INET_PROTOSW_PERMANENT |
2204                                 INET_PROTOSW_ICSK,
2205 };
2206
2207 static int __net_init tcpv6_net_init(struct net *net)
2208 {
2209         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2210                                     SOCK_RAW, IPPROTO_TCP, net);
2211 }
2212
2213 static void __net_exit tcpv6_net_exit(struct net *net)
2214 {
2215         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2216 }
2217
2218 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2219 {
2220         tcp_twsk_purge(net_exit_list, AF_INET6);
2221 }
2222
2223 static struct pernet_operations tcpv6_net_ops = {
2224         .init       = tcpv6_net_init,
2225         .exit       = tcpv6_net_exit,
2226         .exit_batch = tcpv6_net_exit_batch,
2227 };
2228
2229 int __init tcpv6_init(void)
2230 {
2231         int ret;
2232
2233         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2234         if (ret)
2235                 goto out;
2236
2237         /* register inet6 protocol */
2238         ret = inet6_register_protosw(&tcpv6_protosw);
2239         if (ret)
2240                 goto out_tcpv6_protocol;
2241
2242         ret = register_pernet_subsys(&tcpv6_net_ops);
2243         if (ret)
2244                 goto out_tcpv6_protosw;
2245
2246         ret = mptcpv6_init();
2247         if (ret)
2248                 goto out_tcpv6_pernet_subsys;
2249
2250 out:
2251         return ret;
2252
2253 out_tcpv6_pernet_subsys:
2254         unregister_pernet_subsys(&tcpv6_net_ops);
2255 out_tcpv6_protosw:
2256         inet6_unregister_protosw(&tcpv6_protosw);
2257 out_tcpv6_protocol:
2258         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2259         goto out;
2260 }
2261
2262 void tcpv6_exit(void)
2263 {
2264         unregister_pernet_subsys(&tcpv6_net_ops);
2265         inet6_unregister_protosw(&tcpv6_protosw);
2266         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2267 }