net/tcp: Prepare tcp_md5sig_pool for TCP-AO
[linux-2.6-block.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
97                                               struct tcp6_sock, tcp)->inet6)
98
99 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
100 {
101         struct dst_entry *dst = skb_dst(skb);
102
103         if (dst && dst_hold_safe(dst)) {
104                 const struct rt6_info *rt = (const struct rt6_info *)dst;
105
106                 rcu_assign_pointer(sk->sk_rx_dst, dst);
107                 sk->sk_rx_dst_ifindex = skb->skb_iif;
108                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
109         }
110 }
111
112 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
113 {
114         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
115                                 ipv6_hdr(skb)->saddr.s6_addr32,
116                                 tcp_hdr(skb)->dest,
117                                 tcp_hdr(skb)->source);
118 }
119
120 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
121 {
122         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
123                                    ipv6_hdr(skb)->saddr.s6_addr32);
124 }
125
126 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
127                               int addr_len)
128 {
129         /* This check is replicated from tcp_v6_connect() and intended to
130          * prevent BPF program called below from accessing bytes that are out
131          * of the bound specified by user in addr_len.
132          */
133         if (addr_len < SIN6_LEN_RFC2133)
134                 return -EINVAL;
135
136         sock_owned_by_me(sk);
137
138         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
139 }
140
141 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
142                           int addr_len)
143 {
144         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
145         struct inet_connection_sock *icsk = inet_csk(sk);
146         struct in6_addr *saddr = NULL, *final_p, final;
147         struct inet_timewait_death_row *tcp_death_row;
148         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
149         struct inet_sock *inet = inet_sk(sk);
150         struct tcp_sock *tp = tcp_sk(sk);
151         struct net *net = sock_net(sk);
152         struct ipv6_txoptions *opt;
153         struct dst_entry *dst;
154         struct flowi6 fl6;
155         int addr_type;
156         int err;
157
158         if (addr_len < SIN6_LEN_RFC2133)
159                 return -EINVAL;
160
161         if (usin->sin6_family != AF_INET6)
162                 return -EAFNOSUPPORT;
163
164         memset(&fl6, 0, sizeof(fl6));
165
166         if (inet6_test_bit(SNDFLOW, sk)) {
167                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
168                 IP6_ECN_flow_init(fl6.flowlabel);
169                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
170                         struct ip6_flowlabel *flowlabel;
171                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
172                         if (IS_ERR(flowlabel))
173                                 return -EINVAL;
174                         fl6_sock_release(flowlabel);
175                 }
176         }
177
178         /*
179          *      connect() to INADDR_ANY means loopback (BSD'ism).
180          */
181
182         if (ipv6_addr_any(&usin->sin6_addr)) {
183                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
184                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
185                                                &usin->sin6_addr);
186                 else
187                         usin->sin6_addr = in6addr_loopback;
188         }
189
190         addr_type = ipv6_addr_type(&usin->sin6_addr);
191
192         if (addr_type & IPV6_ADDR_MULTICAST)
193                 return -ENETUNREACH;
194
195         if (addr_type&IPV6_ADDR_LINKLOCAL) {
196                 if (addr_len >= sizeof(struct sockaddr_in6) &&
197                     usin->sin6_scope_id) {
198                         /* If interface is set while binding, indices
199                          * must coincide.
200                          */
201                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
202                                 return -EINVAL;
203
204                         sk->sk_bound_dev_if = usin->sin6_scope_id;
205                 }
206
207                 /* Connect to link-local address requires an interface */
208                 if (!sk->sk_bound_dev_if)
209                         return -EINVAL;
210         }
211
212         if (tp->rx_opt.ts_recent_stamp &&
213             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
214                 tp->rx_opt.ts_recent = 0;
215                 tp->rx_opt.ts_recent_stamp = 0;
216                 WRITE_ONCE(tp->write_seq, 0);
217         }
218
219         sk->sk_v6_daddr = usin->sin6_addr;
220         np->flow_label = fl6.flowlabel;
221
222         /*
223          *      TCP over IPv4
224          */
225
226         if (addr_type & IPV6_ADDR_MAPPED) {
227                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
228                 struct sockaddr_in sin;
229
230                 if (ipv6_only_sock(sk))
231                         return -ENETUNREACH;
232
233                 sin.sin_family = AF_INET;
234                 sin.sin_port = usin->sin6_port;
235                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
236
237                 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
238                 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
239                 if (sk_is_mptcp(sk))
240                         mptcpv6_handle_mapped(sk, true);
241                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
242 #ifdef CONFIG_TCP_MD5SIG
243                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
244 #endif
245
246                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
247
248                 if (err) {
249                         icsk->icsk_ext_hdr_len = exthdrlen;
250                         /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
251                         WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
252                         if (sk_is_mptcp(sk))
253                                 mptcpv6_handle_mapped(sk, false);
254                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
255 #ifdef CONFIG_TCP_MD5SIG
256                         tp->af_specific = &tcp_sock_ipv6_specific;
257 #endif
258                         goto failure;
259                 }
260                 np->saddr = sk->sk_v6_rcv_saddr;
261
262                 return err;
263         }
264
265         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
266                 saddr = &sk->sk_v6_rcv_saddr;
267
268         fl6.flowi6_proto = IPPROTO_TCP;
269         fl6.daddr = sk->sk_v6_daddr;
270         fl6.saddr = saddr ? *saddr : np->saddr;
271         fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
272         fl6.flowi6_oif = sk->sk_bound_dev_if;
273         fl6.flowi6_mark = sk->sk_mark;
274         fl6.fl6_dport = usin->sin6_port;
275         fl6.fl6_sport = inet->inet_sport;
276         fl6.flowi6_uid = sk->sk_uid;
277
278         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279         final_p = fl6_update_dst(&fl6, opt, &final);
280
281         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282
283         dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
284         if (IS_ERR(dst)) {
285                 err = PTR_ERR(dst);
286                 goto failure;
287         }
288
289         tp->tcp_usec_ts = dst_tcp_usec_ts(dst);
290         tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
291
292         if (!saddr) {
293                 saddr = &fl6.saddr;
294
295                 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
296                 if (err)
297                         goto failure;
298         }
299
300         /* set the source address */
301         np->saddr = *saddr;
302         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
303
304         sk->sk_gso_type = SKB_GSO_TCPV6;
305         ip6_dst_store(sk, dst, NULL, NULL);
306
307         icsk->icsk_ext_hdr_len = 0;
308         if (opt)
309                 icsk->icsk_ext_hdr_len = opt->opt_flen +
310                                          opt->opt_nflen;
311
312         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
313
314         inet->inet_dport = usin->sin6_port;
315
316         tcp_set_state(sk, TCP_SYN_SENT);
317         err = inet6_hash_connect(tcp_death_row, sk);
318         if (err)
319                 goto late_failure;
320
321         sk_set_txhash(sk);
322
323         if (likely(!tp->repair)) {
324                 if (!tp->write_seq)
325                         WRITE_ONCE(tp->write_seq,
326                                    secure_tcpv6_seq(np->saddr.s6_addr32,
327                                                     sk->sk_v6_daddr.s6_addr32,
328                                                     inet->inet_sport,
329                                                     inet->inet_dport));
330                 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
331                                                    sk->sk_v6_daddr.s6_addr32);
332         }
333
334         if (tcp_fastopen_defer_connect(sk, &err))
335                 return err;
336         if (err)
337                 goto late_failure;
338
339         err = tcp_connect(sk);
340         if (err)
341                 goto late_failure;
342
343         return 0;
344
345 late_failure:
346         tcp_set_state(sk, TCP_CLOSE);
347         inet_bhash2_reset_saddr(sk);
348 failure:
349         inet->inet_dport = 0;
350         sk->sk_route_caps = 0;
351         return err;
352 }
353
354 static void tcp_v6_mtu_reduced(struct sock *sk)
355 {
356         struct dst_entry *dst;
357         u32 mtu;
358
359         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
360                 return;
361
362         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
363
364         /* Drop requests trying to increase our current mss.
365          * Check done in __ip6_rt_update_pmtu() is too late.
366          */
367         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
368                 return;
369
370         dst = inet6_csk_update_pmtu(sk, mtu);
371         if (!dst)
372                 return;
373
374         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
375                 tcp_sync_mss(sk, dst_mtu(dst));
376                 tcp_simple_retransmit(sk);
377         }
378 }
379
380 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
381                 u8 type, u8 code, int offset, __be32 info)
382 {
383         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
384         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
385         struct net *net = dev_net(skb->dev);
386         struct request_sock *fastopen;
387         struct ipv6_pinfo *np;
388         struct tcp_sock *tp;
389         __u32 seq, snd_una;
390         struct sock *sk;
391         bool fatal;
392         int err;
393
394         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
395                                         &hdr->daddr, th->dest,
396                                         &hdr->saddr, ntohs(th->source),
397                                         skb->dev->ifindex, inet6_sdif(skb));
398
399         if (!sk) {
400                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
401                                   ICMP6_MIB_INERRORS);
402                 return -ENOENT;
403         }
404
405         if (sk->sk_state == TCP_TIME_WAIT) {
406                 inet_twsk_put(inet_twsk(sk));
407                 return 0;
408         }
409         seq = ntohl(th->seq);
410         fatal = icmpv6_err_convert(type, code, &err);
411         if (sk->sk_state == TCP_NEW_SYN_RECV) {
412                 tcp_req_err(sk, seq, fatal);
413                 return 0;
414         }
415
416         bh_lock_sock(sk);
417         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
418                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
419
420         if (sk->sk_state == TCP_CLOSE)
421                 goto out;
422
423         if (static_branch_unlikely(&ip6_min_hopcount)) {
424                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
425                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
426                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
427                         goto out;
428                 }
429         }
430
431         tp = tcp_sk(sk);
432         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
433         fastopen = rcu_dereference(tp->fastopen_rsk);
434         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
435         if (sk->sk_state != TCP_LISTEN &&
436             !between(seq, snd_una, tp->snd_nxt)) {
437                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
438                 goto out;
439         }
440
441         np = tcp_inet6_sk(sk);
442
443         if (type == NDISC_REDIRECT) {
444                 if (!sock_owned_by_user(sk)) {
445                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
446
447                         if (dst)
448                                 dst->ops->redirect(dst, sk, skb);
449                 }
450                 goto out;
451         }
452
453         if (type == ICMPV6_PKT_TOOBIG) {
454                 u32 mtu = ntohl(info);
455
456                 /* We are not interested in TCP_LISTEN and open_requests
457                  * (SYN-ACKs send out by Linux are always <576bytes so
458                  * they should go through unfragmented).
459                  */
460                 if (sk->sk_state == TCP_LISTEN)
461                         goto out;
462
463                 if (!ip6_sk_accept_pmtu(sk))
464                         goto out;
465
466                 if (mtu < IPV6_MIN_MTU)
467                         goto out;
468
469                 WRITE_ONCE(tp->mtu_info, mtu);
470
471                 if (!sock_owned_by_user(sk))
472                         tcp_v6_mtu_reduced(sk);
473                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
474                                            &sk->sk_tsq_flags))
475                         sock_hold(sk);
476                 goto out;
477         }
478
479
480         /* Might be for an request_sock */
481         switch (sk->sk_state) {
482         case TCP_SYN_SENT:
483         case TCP_SYN_RECV:
484                 /* Only in fast or simultaneous open. If a fast open socket is
485                  * already accepted it is treated as a connected one below.
486                  */
487                 if (fastopen && !fastopen->sk)
488                         break;
489
490                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
491
492                 if (!sock_owned_by_user(sk)) {
493                         WRITE_ONCE(sk->sk_err, err);
494                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
495
496                         tcp_done(sk);
497                 } else {
498                         WRITE_ONCE(sk->sk_err_soft, err);
499                 }
500                 goto out;
501         case TCP_LISTEN:
502                 break;
503         default:
504                 /* check if this ICMP message allows revert of backoff.
505                  * (see RFC 6069)
506                  */
507                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
508                     code == ICMPV6_NOROUTE)
509                         tcp_ld_RTO_revert(sk, seq);
510         }
511
512         if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
513                 WRITE_ONCE(sk->sk_err, err);
514                 sk_error_report(sk);
515         } else {
516                 WRITE_ONCE(sk->sk_err_soft, err);
517         }
518 out:
519         bh_unlock_sock(sk);
520         sock_put(sk);
521         return 0;
522 }
523
524
525 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
526                               struct flowi *fl,
527                               struct request_sock *req,
528                               struct tcp_fastopen_cookie *foc,
529                               enum tcp_synack_type synack_type,
530                               struct sk_buff *syn_skb)
531 {
532         struct inet_request_sock *ireq = inet_rsk(req);
533         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
534         struct ipv6_txoptions *opt;
535         struct flowi6 *fl6 = &fl->u.ip6;
536         struct sk_buff *skb;
537         int err = -ENOMEM;
538         u8 tclass;
539
540         /* First, grab a route. */
541         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
542                                                IPPROTO_TCP)) == NULL)
543                 goto done;
544
545         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
546
547         if (skb) {
548                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
549                                     &ireq->ir_v6_rmt_addr);
550
551                 fl6->daddr = ireq->ir_v6_rmt_addr;
552                 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts)
553                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
554
555                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
556                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
557                                 (np->tclass & INET_ECN_MASK) :
558                                 np->tclass;
559
560                 if (!INET_ECN_is_capable(tclass) &&
561                     tcp_bpf_ca_needs_ecn((struct sock *)req))
562                         tclass |= INET_ECN_ECT_0;
563
564                 rcu_read_lock();
565                 opt = ireq->ipv6_opt;
566                 if (!opt)
567                         opt = rcu_dereference(np->opt);
568                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
569                                opt, tclass, READ_ONCE(sk->sk_priority));
570                 rcu_read_unlock();
571                 err = net_xmit_eval(err);
572         }
573
574 done:
575         return err;
576 }
577
578
579 static void tcp_v6_reqsk_destructor(struct request_sock *req)
580 {
581         kfree(inet_rsk(req)->ipv6_opt);
582         consume_skb(inet_rsk(req)->pktopts);
583 }
584
585 #ifdef CONFIG_TCP_MD5SIG
586 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
587                                                    const struct in6_addr *addr,
588                                                    int l3index)
589 {
590         return tcp_md5_do_lookup(sk, l3index,
591                                  (union tcp_md5_addr *)addr, AF_INET6);
592 }
593
594 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
595                                                 const struct sock *addr_sk)
596 {
597         int l3index;
598
599         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
600                                                  addr_sk->sk_bound_dev_if);
601         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
602                                     l3index);
603 }
604
605 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
606                                  sockptr_t optval, int optlen)
607 {
608         struct tcp_md5sig cmd;
609         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
610         int l3index = 0;
611         u8 prefixlen;
612         u8 flags;
613
614         if (optlen < sizeof(cmd))
615                 return -EINVAL;
616
617         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
618                 return -EFAULT;
619
620         if (sin6->sin6_family != AF_INET6)
621                 return -EINVAL;
622
623         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
624
625         if (optname == TCP_MD5SIG_EXT &&
626             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
627                 prefixlen = cmd.tcpm_prefixlen;
628                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
629                                         prefixlen > 32))
630                         return -EINVAL;
631         } else {
632                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
633         }
634
635         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
636             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
637                 struct net_device *dev;
638
639                 rcu_read_lock();
640                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
641                 if (dev && netif_is_l3_master(dev))
642                         l3index = dev->ifindex;
643                 rcu_read_unlock();
644
645                 /* ok to reference set/not set outside of rcu;
646                  * right now device MUST be an L3 master
647                  */
648                 if (!dev || !l3index)
649                         return -EINVAL;
650         }
651
652         if (!cmd.tcpm_keylen) {
653                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
654                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
655                                               AF_INET, prefixlen,
656                                               l3index, flags);
657                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
658                                       AF_INET6, prefixlen, l3index, flags);
659         }
660
661         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
662                 return -EINVAL;
663
664         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
665                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
666                                       AF_INET, prefixlen, l3index, flags,
667                                       cmd.tcpm_key, cmd.tcpm_keylen);
668
669         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
670                               AF_INET6, prefixlen, l3index, flags,
671                               cmd.tcpm_key, cmd.tcpm_keylen);
672 }
673
674 static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp,
675                                    const struct in6_addr *daddr,
676                                    const struct in6_addr *saddr,
677                                    const struct tcphdr *th, int nbytes)
678 {
679         struct tcp6_pseudohdr *bp;
680         struct scatterlist sg;
681         struct tcphdr *_th;
682
683         bp = hp->scratch;
684         /* 1. TCP pseudo-header (RFC2460) */
685         bp->saddr = *saddr;
686         bp->daddr = *daddr;
687         bp->protocol = cpu_to_be32(IPPROTO_TCP);
688         bp->len = cpu_to_be32(nbytes);
689
690         _th = (struct tcphdr *)(bp + 1);
691         memcpy(_th, th, sizeof(*th));
692         _th->check = 0;
693
694         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
695         ahash_request_set_crypt(hp->req, &sg, NULL,
696                                 sizeof(*bp) + sizeof(*th));
697         return crypto_ahash_update(hp->req);
698 }
699
700 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
701                                const struct in6_addr *daddr, struct in6_addr *saddr,
702                                const struct tcphdr *th)
703 {
704         struct tcp_sigpool hp;
705
706         if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
707                 goto clear_hash_nostart;
708
709         if (crypto_ahash_init(hp.req))
710                 goto clear_hash;
711         if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2))
712                 goto clear_hash;
713         if (tcp_md5_hash_key(&hp, key))
714                 goto clear_hash;
715         ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
716         if (crypto_ahash_final(hp.req))
717                 goto clear_hash;
718
719         tcp_sigpool_end(&hp);
720         return 0;
721
722 clear_hash:
723         tcp_sigpool_end(&hp);
724 clear_hash_nostart:
725         memset(md5_hash, 0, 16);
726         return 1;
727 }
728
729 static int tcp_v6_md5_hash_skb(char *md5_hash,
730                                const struct tcp_md5sig_key *key,
731                                const struct sock *sk,
732                                const struct sk_buff *skb)
733 {
734         const struct tcphdr *th = tcp_hdr(skb);
735         const struct in6_addr *saddr, *daddr;
736         struct tcp_sigpool hp;
737
738         if (sk) { /* valid for establish/request sockets */
739                 saddr = &sk->sk_v6_rcv_saddr;
740                 daddr = &sk->sk_v6_daddr;
741         } else {
742                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
743                 saddr = &ip6h->saddr;
744                 daddr = &ip6h->daddr;
745         }
746
747         if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
748                 goto clear_hash_nostart;
749
750         if (crypto_ahash_init(hp.req))
751                 goto clear_hash;
752
753         if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len))
754                 goto clear_hash;
755         if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2))
756                 goto clear_hash;
757         if (tcp_md5_hash_key(&hp, key))
758                 goto clear_hash;
759         ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
760         if (crypto_ahash_final(hp.req))
761                 goto clear_hash;
762
763         tcp_sigpool_end(&hp);
764         return 0;
765
766 clear_hash:
767         tcp_sigpool_end(&hp);
768 clear_hash_nostart:
769         memset(md5_hash, 0, 16);
770         return 1;
771 }
772
773 #endif
774
775 static void tcp_v6_init_req(struct request_sock *req,
776                             const struct sock *sk_listener,
777                             struct sk_buff *skb)
778 {
779         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
780         struct inet_request_sock *ireq = inet_rsk(req);
781         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
782
783         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
784         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
785
786         /* So that link locals have meaning */
787         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
788             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
789                 ireq->ir_iif = tcp_v6_iif(skb);
790
791         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
792             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
793              np->rxopt.bits.rxinfo ||
794              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
795              np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) {
796                 refcount_inc(&skb->users);
797                 ireq->pktopts = skb;
798         }
799 }
800
801 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
802                                           struct sk_buff *skb,
803                                           struct flowi *fl,
804                                           struct request_sock *req)
805 {
806         tcp_v6_init_req(req, sk, skb);
807
808         if (security_inet_conn_request(sk, skb, req))
809                 return NULL;
810
811         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
812 }
813
814 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
815         .family         =       AF_INET6,
816         .obj_size       =       sizeof(struct tcp6_request_sock),
817         .rtx_syn_ack    =       tcp_rtx_synack,
818         .send_ack       =       tcp_v6_reqsk_send_ack,
819         .destructor     =       tcp_v6_reqsk_destructor,
820         .send_reset     =       tcp_v6_send_reset,
821         .syn_ack_timeout =      tcp_syn_ack_timeout,
822 };
823
824 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
825         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
826                                 sizeof(struct ipv6hdr),
827 #ifdef CONFIG_TCP_MD5SIG
828         .req_md5_lookup =       tcp_v6_md5_lookup,
829         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
830 #endif
831 #ifdef CONFIG_SYN_COOKIES
832         .cookie_init_seq =      cookie_v6_init_sequence,
833 #endif
834         .route_req      =       tcp_v6_route_req,
835         .init_seq       =       tcp_v6_init_seq,
836         .init_ts_off    =       tcp_v6_init_ts_off,
837         .send_synack    =       tcp_v6_send_synack,
838 };
839
840 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
841                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
842                                  int oif, struct tcp_md5sig_key *key, int rst,
843                                  u8 tclass, __be32 label, u32 priority, u32 txhash)
844 {
845         const struct tcphdr *th = tcp_hdr(skb);
846         struct tcphdr *t1;
847         struct sk_buff *buff;
848         struct flowi6 fl6;
849         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
850         struct sock *ctl_sk = net->ipv6.tcp_sk;
851         unsigned int tot_len = sizeof(struct tcphdr);
852         __be32 mrst = 0, *topt;
853         struct dst_entry *dst;
854         __u32 mark = 0;
855
856         if (tsecr)
857                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
858 #ifdef CONFIG_TCP_MD5SIG
859         if (key)
860                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
861 #endif
862
863 #ifdef CONFIG_MPTCP
864         if (rst && !key) {
865                 mrst = mptcp_reset_option(skb);
866
867                 if (mrst)
868                         tot_len += sizeof(__be32);
869         }
870 #endif
871
872         buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
873         if (!buff)
874                 return;
875
876         skb_reserve(buff, MAX_TCP_HEADER);
877
878         t1 = skb_push(buff, tot_len);
879         skb_reset_transport_header(buff);
880
881         /* Swap the send and the receive. */
882         memset(t1, 0, sizeof(*t1));
883         t1->dest = th->source;
884         t1->source = th->dest;
885         t1->doff = tot_len / 4;
886         t1->seq = htonl(seq);
887         t1->ack_seq = htonl(ack);
888         t1->ack = !rst || !th->ack;
889         t1->rst = rst;
890         t1->window = htons(win);
891
892         topt = (__be32 *)(t1 + 1);
893
894         if (tsecr) {
895                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
896                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
897                 *topt++ = htonl(tsval);
898                 *topt++ = htonl(tsecr);
899         }
900
901         if (mrst)
902                 *topt++ = mrst;
903
904 #ifdef CONFIG_TCP_MD5SIG
905         if (key) {
906                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
907                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
908                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
909                                     &ipv6_hdr(skb)->saddr,
910                                     &ipv6_hdr(skb)->daddr, t1);
911         }
912 #endif
913
914         memset(&fl6, 0, sizeof(fl6));
915         fl6.daddr = ipv6_hdr(skb)->saddr;
916         fl6.saddr = ipv6_hdr(skb)->daddr;
917         fl6.flowlabel = label;
918
919         buff->ip_summed = CHECKSUM_PARTIAL;
920
921         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
922
923         fl6.flowi6_proto = IPPROTO_TCP;
924         if (rt6_need_strict(&fl6.daddr) && !oif)
925                 fl6.flowi6_oif = tcp_v6_iif(skb);
926         else {
927                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
928                         oif = skb->skb_iif;
929
930                 fl6.flowi6_oif = oif;
931         }
932
933         if (sk) {
934                 if (sk->sk_state == TCP_TIME_WAIT)
935                         mark = inet_twsk(sk)->tw_mark;
936                 else
937                         mark = READ_ONCE(sk->sk_mark);
938                 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
939         }
940         if (txhash) {
941                 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
942                 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
943         }
944         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
945         fl6.fl6_dport = t1->dest;
946         fl6.fl6_sport = t1->source;
947         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
948         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
949
950         /* Pass a socket to ip6_dst_lookup either it is for RST
951          * Underlying function will use this to retrieve the network
952          * namespace
953          */
954         if (sk && sk->sk_state != TCP_TIME_WAIT)
955                 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
956         else
957                 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
958         if (!IS_ERR(dst)) {
959                 skb_dst_set(buff, dst);
960                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
961                          tclass & ~INET_ECN_MASK, priority);
962                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
963                 if (rst)
964                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
965                 return;
966         }
967
968         kfree_skb(buff);
969 }
970
971 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
972 {
973         const struct tcphdr *th = tcp_hdr(skb);
974         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
975         u32 seq = 0, ack_seq = 0;
976         struct tcp_md5sig_key *key = NULL;
977 #ifdef CONFIG_TCP_MD5SIG
978         const __u8 *hash_location = NULL;
979         unsigned char newhash[16];
980         int genhash;
981         struct sock *sk1 = NULL;
982 #endif
983         __be32 label = 0;
984         u32 priority = 0;
985         struct net *net;
986         u32 txhash = 0;
987         int oif = 0;
988
989         if (th->rst)
990                 return;
991
992         /* If sk not NULL, it means we did a successful lookup and incoming
993          * route had to be correct. prequeue might have dropped our dst.
994          */
995         if (!sk && !ipv6_unicast_destination(skb))
996                 return;
997
998         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
999 #ifdef CONFIG_TCP_MD5SIG
1000         rcu_read_lock();
1001         hash_location = tcp_parse_md5sig_option(th);
1002         if (sk && sk_fullsock(sk)) {
1003                 int l3index;
1004
1005                 /* sdif set, means packet ingressed via a device
1006                  * in an L3 domain and inet_iif is set to it.
1007                  */
1008                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1009                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1010         } else if (hash_location) {
1011                 int dif = tcp_v6_iif_l3_slave(skb);
1012                 int sdif = tcp_v6_sdif(skb);
1013                 int l3index;
1014
1015                 /*
1016                  * active side is lost. Try to find listening socket through
1017                  * source port, and then find md5 key through listening socket.
1018                  * we are not loose security here:
1019                  * Incoming packet is checked with md5 hash with finding key,
1020                  * no RST generated if md5 hash doesn't match.
1021                  */
1022                 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1023                                             NULL, 0, &ipv6h->saddr, th->source,
1024                                             &ipv6h->daddr, ntohs(th->source),
1025                                             dif, sdif);
1026                 if (!sk1)
1027                         goto out;
1028
1029                 /* sdif set, means packet ingressed via a device
1030                  * in an L3 domain and dif is set to it.
1031                  */
1032                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1033
1034                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1035                 if (!key)
1036                         goto out;
1037
1038                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1039                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1040                         goto out;
1041         }
1042 #endif
1043
1044         if (th->ack)
1045                 seq = ntohl(th->ack_seq);
1046         else
1047                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1048                           (th->doff << 2);
1049
1050         if (sk) {
1051                 oif = sk->sk_bound_dev_if;
1052                 if (sk_fullsock(sk)) {
1053                         trace_tcp_send_reset(sk, skb);
1054                         if (inet6_test_bit(REPFLOW, sk))
1055                                 label = ip6_flowlabel(ipv6h);
1056                         priority = READ_ONCE(sk->sk_priority);
1057                         txhash = sk->sk_txhash;
1058                 }
1059                 if (sk->sk_state == TCP_TIME_WAIT) {
1060                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1061                         priority = inet_twsk(sk)->tw_priority;
1062                         txhash = inet_twsk(sk)->tw_txhash;
1063                 }
1064         } else {
1065                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1066                         label = ip6_flowlabel(ipv6h);
1067         }
1068
1069         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1070                              ipv6_get_dsfield(ipv6h), label, priority, txhash);
1071
1072 #ifdef CONFIG_TCP_MD5SIG
1073 out:
1074         rcu_read_unlock();
1075 #endif
1076 }
1077
1078 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1079                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1080                             struct tcp_md5sig_key *key, u8 tclass,
1081                             __be32 label, u32 priority, u32 txhash)
1082 {
1083         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1084                              tclass, label, priority, txhash);
1085 }
1086
1087 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1088 {
1089         struct inet_timewait_sock *tw = inet_twsk(sk);
1090         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1091
1092         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1093                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1094                         tcp_tw_tsval(tcptw),
1095                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1096                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1097                         tw->tw_txhash);
1098
1099         inet_twsk_put(tw);
1100 }
1101
1102 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1103                                   struct request_sock *req)
1104 {
1105         int l3index;
1106
1107         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1108
1109         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1110          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1111          */
1112         /* RFC 7323 2.3
1113          * The window field (SEG.WND) of every outgoing segment, with the
1114          * exception of <SYN> segments, MUST be right-shifted by
1115          * Rcv.Wind.Shift bits:
1116          */
1117         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1118                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1119                         tcp_rsk(req)->rcv_nxt,
1120                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1121                         tcp_rsk_tsval(tcp_rsk(req)),
1122                         READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1123                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1124                         ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1125                         READ_ONCE(sk->sk_priority),
1126                         READ_ONCE(tcp_rsk(req)->txhash));
1127 }
1128
1129
1130 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1131 {
1132 #ifdef CONFIG_SYN_COOKIES
1133         const struct tcphdr *th = tcp_hdr(skb);
1134
1135         if (!th->syn)
1136                 sk = cookie_v6_check(sk, skb);
1137 #endif
1138         return sk;
1139 }
1140
1141 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1142                          struct tcphdr *th, u32 *cookie)
1143 {
1144         u16 mss = 0;
1145 #ifdef CONFIG_SYN_COOKIES
1146         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1147                                     &tcp_request_sock_ipv6_ops, sk, th);
1148         if (mss) {
1149                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1150                 tcp_synq_overflow(sk);
1151         }
1152 #endif
1153         return mss;
1154 }
1155
1156 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1157 {
1158         if (skb->protocol == htons(ETH_P_IP))
1159                 return tcp_v4_conn_request(sk, skb);
1160
1161         if (!ipv6_unicast_destination(skb))
1162                 goto drop;
1163
1164         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1165                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1166                 return 0;
1167         }
1168
1169         return tcp_conn_request(&tcp6_request_sock_ops,
1170                                 &tcp_request_sock_ipv6_ops, sk, skb);
1171
1172 drop:
1173         tcp_listendrop(sk);
1174         return 0; /* don't send reset */
1175 }
1176
1177 static void tcp_v6_restore_cb(struct sk_buff *skb)
1178 {
1179         /* We need to move header back to the beginning if xfrm6_policy_check()
1180          * and tcp_v6_fill_cb() are going to be called again.
1181          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1182          */
1183         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1184                 sizeof(struct inet6_skb_parm));
1185 }
1186
1187 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1188                                          struct request_sock *req,
1189                                          struct dst_entry *dst,
1190                                          struct request_sock *req_unhash,
1191                                          bool *own_req)
1192 {
1193         struct inet_request_sock *ireq;
1194         struct ipv6_pinfo *newnp;
1195         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1196         struct ipv6_txoptions *opt;
1197         struct inet_sock *newinet;
1198         bool found_dup_sk = false;
1199         struct tcp_sock *newtp;
1200         struct sock *newsk;
1201 #ifdef CONFIG_TCP_MD5SIG
1202         struct tcp_md5sig_key *key;
1203         int l3index;
1204 #endif
1205         struct flowi6 fl6;
1206
1207         if (skb->protocol == htons(ETH_P_IP)) {
1208                 /*
1209                  *      v6 mapped
1210                  */
1211
1212                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1213                                              req_unhash, own_req);
1214
1215                 if (!newsk)
1216                         return NULL;
1217
1218                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1219
1220                 newnp = tcp_inet6_sk(newsk);
1221                 newtp = tcp_sk(newsk);
1222
1223                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1224
1225                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1226
1227                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1228                 if (sk_is_mptcp(newsk))
1229                         mptcpv6_handle_mapped(newsk, true);
1230                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1231 #ifdef CONFIG_TCP_MD5SIG
1232                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1233 #endif
1234
1235                 newnp->ipv6_mc_list = NULL;
1236                 newnp->ipv6_ac_list = NULL;
1237                 newnp->ipv6_fl_list = NULL;
1238                 newnp->pktoptions  = NULL;
1239                 newnp->opt         = NULL;
1240                 newnp->mcast_oif   = inet_iif(skb);
1241                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1242                 newnp->rcv_flowinfo = 0;
1243                 if (inet6_test_bit(REPFLOW, sk))
1244                         newnp->flow_label = 0;
1245
1246                 /*
1247                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1248                  * here, tcp_create_openreq_child now does this for us, see the comment in
1249                  * that function for the gory details. -acme
1250                  */
1251
1252                 /* It is tricky place. Until this moment IPv4 tcp
1253                    worked with IPv6 icsk.icsk_af_ops.
1254                    Sync it now.
1255                  */
1256                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1257
1258                 return newsk;
1259         }
1260
1261         ireq = inet_rsk(req);
1262
1263         if (sk_acceptq_is_full(sk))
1264                 goto out_overflow;
1265
1266         if (!dst) {
1267                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1268                 if (!dst)
1269                         goto out;
1270         }
1271
1272         newsk = tcp_create_openreq_child(sk, req, skb);
1273         if (!newsk)
1274                 goto out_nonewsk;
1275
1276         /*
1277          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1278          * count here, tcp_create_openreq_child now does this for us, see the
1279          * comment in that function for the gory details. -acme
1280          */
1281
1282         newsk->sk_gso_type = SKB_GSO_TCPV6;
1283         ip6_dst_store(newsk, dst, NULL, NULL);
1284         inet6_sk_rx_dst_set(newsk, skb);
1285
1286         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1287
1288         newtp = tcp_sk(newsk);
1289         newinet = inet_sk(newsk);
1290         newnp = tcp_inet6_sk(newsk);
1291
1292         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1293
1294         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1295         newnp->saddr = ireq->ir_v6_loc_addr;
1296         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1297         newsk->sk_bound_dev_if = ireq->ir_iif;
1298
1299         /* Now IPv6 options...
1300
1301            First: no IPv4 options.
1302          */
1303         newinet->inet_opt = NULL;
1304         newnp->ipv6_mc_list = NULL;
1305         newnp->ipv6_ac_list = NULL;
1306         newnp->ipv6_fl_list = NULL;
1307
1308         /* Clone RX bits */
1309         newnp->rxopt.all = np->rxopt.all;
1310
1311         newnp->pktoptions = NULL;
1312         newnp->opt        = NULL;
1313         newnp->mcast_oif  = tcp_v6_iif(skb);
1314         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1315         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1316         if (inet6_test_bit(REPFLOW, sk))
1317                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1318
1319         /* Set ToS of the new socket based upon the value of incoming SYN.
1320          * ECT bits are set later in tcp_init_transfer().
1321          */
1322         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1323                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1324
1325         /* Clone native IPv6 options from listening socket (if any)
1326
1327            Yes, keeping reference count would be much more clever,
1328            but we make one more one thing there: reattach optmem
1329            to newsk.
1330          */
1331         opt = ireq->ipv6_opt;
1332         if (!opt)
1333                 opt = rcu_dereference(np->opt);
1334         if (opt) {
1335                 opt = ipv6_dup_options(newsk, opt);
1336                 RCU_INIT_POINTER(newnp->opt, opt);
1337         }
1338         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1339         if (opt)
1340                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1341                                                     opt->opt_flen;
1342
1343         tcp_ca_openreq_child(newsk, dst);
1344
1345         tcp_sync_mss(newsk, dst_mtu(dst));
1346         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1347
1348         tcp_initialize_rcv_mss(newsk);
1349
1350         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1351         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1352
1353 #ifdef CONFIG_TCP_MD5SIG
1354         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1355
1356         /* Copy over the MD5 key from the original socket */
1357         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1358         if (key) {
1359                 const union tcp_md5_addr *addr;
1360
1361                 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1362                 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1363                         inet_csk_prepare_forced_close(newsk);
1364                         tcp_done(newsk);
1365                         goto out;
1366                 }
1367         }
1368 #endif
1369
1370         if (__inet_inherit_port(sk, newsk) < 0) {
1371                 inet_csk_prepare_forced_close(newsk);
1372                 tcp_done(newsk);
1373                 goto out;
1374         }
1375         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1376                                        &found_dup_sk);
1377         if (*own_req) {
1378                 tcp_move_syn(newtp, req);
1379
1380                 /* Clone pktoptions received with SYN, if we own the req */
1381                 if (ireq->pktopts) {
1382                         newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1383                         consume_skb(ireq->pktopts);
1384                         ireq->pktopts = NULL;
1385                         if (newnp->pktoptions)
1386                                 tcp_v6_restore_cb(newnp->pktoptions);
1387                 }
1388         } else {
1389                 if (!req_unhash && found_dup_sk) {
1390                         /* This code path should only be executed in the
1391                          * syncookie case only
1392                          */
1393                         bh_unlock_sock(newsk);
1394                         sock_put(newsk);
1395                         newsk = NULL;
1396                 }
1397         }
1398
1399         return newsk;
1400
1401 out_overflow:
1402         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1403 out_nonewsk:
1404         dst_release(dst);
1405 out:
1406         tcp_listendrop(sk);
1407         return NULL;
1408 }
1409
1410 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1411                                                            u32));
1412 /* The socket must have it's spinlock held when we get
1413  * here, unless it is a TCP_LISTEN socket.
1414  *
1415  * We have a potential double-lock case here, so even when
1416  * doing backlog processing we use the BH locking scheme.
1417  * This is because we cannot sleep with the original spinlock
1418  * held.
1419  */
1420 INDIRECT_CALLABLE_SCOPE
1421 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1422 {
1423         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1424         struct sk_buff *opt_skb = NULL;
1425         enum skb_drop_reason reason;
1426         struct tcp_sock *tp;
1427
1428         /* Imagine: socket is IPv6. IPv4 packet arrives,
1429            goes to IPv4 receive handler and backlogged.
1430            From backlog it always goes here. Kerboom...
1431            Fortunately, tcp_rcv_established and rcv_established
1432            handle them correctly, but it is not case with
1433            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1434          */
1435
1436         if (skb->protocol == htons(ETH_P_IP))
1437                 return tcp_v4_do_rcv(sk, skb);
1438
1439         /*
1440          *      socket locking is here for SMP purposes as backlog rcv
1441          *      is currently called with bh processing disabled.
1442          */
1443
1444         /* Do Stevens' IPV6_PKTOPTIONS.
1445
1446            Yes, guys, it is the only place in our code, where we
1447            may make it not affecting IPv4.
1448            The rest of code is protocol independent,
1449            and I do not like idea to uglify IPv4.
1450
1451            Actually, all the idea behind IPV6_PKTOPTIONS
1452            looks not very well thought. For now we latch
1453            options, received in the last packet, enqueued
1454            by tcp. Feel free to propose better solution.
1455                                                --ANK (980728)
1456          */
1457         if (np->rxopt.all)
1458                 opt_skb = skb_clone_and_charge_r(skb, sk);
1459
1460         reason = SKB_DROP_REASON_NOT_SPECIFIED;
1461         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1462                 struct dst_entry *dst;
1463
1464                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1465                                                 lockdep_sock_is_held(sk));
1466
1467                 sock_rps_save_rxhash(sk, skb);
1468                 sk_mark_napi_id(sk, skb);
1469                 if (dst) {
1470                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1471                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1472                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1473                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1474                                 dst_release(dst);
1475                         }
1476                 }
1477
1478                 tcp_rcv_established(sk, skb);
1479                 if (opt_skb)
1480                         goto ipv6_pktoptions;
1481                 return 0;
1482         }
1483
1484         if (tcp_checksum_complete(skb))
1485                 goto csum_err;
1486
1487         if (sk->sk_state == TCP_LISTEN) {
1488                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1489
1490                 if (!nsk)
1491                         goto discard;
1492
1493                 if (nsk != sk) {
1494                         if (tcp_child_process(sk, nsk, skb))
1495                                 goto reset;
1496                         if (opt_skb)
1497                                 __kfree_skb(opt_skb);
1498                         return 0;
1499                 }
1500         } else
1501                 sock_rps_save_rxhash(sk, skb);
1502
1503         if (tcp_rcv_state_process(sk, skb))
1504                 goto reset;
1505         if (opt_skb)
1506                 goto ipv6_pktoptions;
1507         return 0;
1508
1509 reset:
1510         tcp_v6_send_reset(sk, skb);
1511 discard:
1512         if (opt_skb)
1513                 __kfree_skb(opt_skb);
1514         kfree_skb_reason(skb, reason);
1515         return 0;
1516 csum_err:
1517         reason = SKB_DROP_REASON_TCP_CSUM;
1518         trace_tcp_bad_csum(skb);
1519         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1520         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1521         goto discard;
1522
1523
1524 ipv6_pktoptions:
1525         /* Do you ask, what is it?
1526
1527            1. skb was enqueued by tcp.
1528            2. skb is added to tail of read queue, rather than out of order.
1529            3. socket is not in passive state.
1530            4. Finally, it really contains options, which user wants to receive.
1531          */
1532         tp = tcp_sk(sk);
1533         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1534             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1535                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1536                         np->mcast_oif = tcp_v6_iif(opt_skb);
1537                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1538                         WRITE_ONCE(np->mcast_hops,
1539                                    ipv6_hdr(opt_skb)->hop_limit);
1540                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1541                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1542                 if (inet6_test_bit(REPFLOW, sk))
1543                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1544                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1545                         tcp_v6_restore_cb(opt_skb);
1546                         opt_skb = xchg(&np->pktoptions, opt_skb);
1547                 } else {
1548                         __kfree_skb(opt_skb);
1549                         opt_skb = xchg(&np->pktoptions, NULL);
1550                 }
1551         }
1552
1553         consume_skb(opt_skb);
1554         return 0;
1555 }
1556
1557 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1558                            const struct tcphdr *th)
1559 {
1560         /* This is tricky: we move IP6CB at its correct location into
1561          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1562          * _decode_session6() uses IP6CB().
1563          * barrier() makes sure compiler won't play aliasing games.
1564          */
1565         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1566                 sizeof(struct inet6_skb_parm));
1567         barrier();
1568
1569         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1570         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1571                                     skb->len - th->doff*4);
1572         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1573         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1574         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1575         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1576         TCP_SKB_CB(skb)->sacked = 0;
1577         TCP_SKB_CB(skb)->has_rxtstamp =
1578                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1579 }
1580
1581 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1582 {
1583         enum skb_drop_reason drop_reason;
1584         int sdif = inet6_sdif(skb);
1585         int dif = inet6_iif(skb);
1586         const struct tcphdr *th;
1587         const struct ipv6hdr *hdr;
1588         bool refcounted;
1589         struct sock *sk;
1590         int ret;
1591         struct net *net = dev_net(skb->dev);
1592
1593         drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1594         if (skb->pkt_type != PACKET_HOST)
1595                 goto discard_it;
1596
1597         /*
1598          *      Count it even if it's bad.
1599          */
1600         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1601
1602         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1603                 goto discard_it;
1604
1605         th = (const struct tcphdr *)skb->data;
1606
1607         if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1608                 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1609                 goto bad_packet;
1610         }
1611         if (!pskb_may_pull(skb, th->doff*4))
1612                 goto discard_it;
1613
1614         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1615                 goto csum_error;
1616
1617         th = (const struct tcphdr *)skb->data;
1618         hdr = ipv6_hdr(skb);
1619
1620 lookup:
1621         sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1622                                 th->source, th->dest, inet6_iif(skb), sdif,
1623                                 &refcounted);
1624         if (!sk)
1625                 goto no_tcp_socket;
1626
1627 process:
1628         if (sk->sk_state == TCP_TIME_WAIT)
1629                 goto do_time_wait;
1630
1631         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1632                 struct request_sock *req = inet_reqsk(sk);
1633                 bool req_stolen = false;
1634                 struct sock *nsk;
1635
1636                 sk = req->rsk_listener;
1637                 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1638                         drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1639                 else
1640                         drop_reason = tcp_inbound_md5_hash(sk, skb,
1641                                                            &hdr->saddr, &hdr->daddr,
1642                                                            AF_INET6, dif, sdif);
1643                 if (drop_reason) {
1644                         sk_drops_add(sk, skb);
1645                         reqsk_put(req);
1646                         goto discard_it;
1647                 }
1648                 if (tcp_checksum_complete(skb)) {
1649                         reqsk_put(req);
1650                         goto csum_error;
1651                 }
1652                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1653                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1654                         if (!nsk) {
1655                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1656                                 goto lookup;
1657                         }
1658                         sk = nsk;
1659                         /* reuseport_migrate_sock() has already held one sk_refcnt
1660                          * before returning.
1661                          */
1662                 } else {
1663                         sock_hold(sk);
1664                 }
1665                 refcounted = true;
1666                 nsk = NULL;
1667                 if (!tcp_filter(sk, skb)) {
1668                         th = (const struct tcphdr *)skb->data;
1669                         hdr = ipv6_hdr(skb);
1670                         tcp_v6_fill_cb(skb, hdr, th);
1671                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1672                 } else {
1673                         drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1674                 }
1675                 if (!nsk) {
1676                         reqsk_put(req);
1677                         if (req_stolen) {
1678                                 /* Another cpu got exclusive access to req
1679                                  * and created a full blown socket.
1680                                  * Try to feed this packet to this socket
1681                                  * instead of discarding it.
1682                                  */
1683                                 tcp_v6_restore_cb(skb);
1684                                 sock_put(sk);
1685                                 goto lookup;
1686                         }
1687                         goto discard_and_relse;
1688                 }
1689                 nf_reset_ct(skb);
1690                 if (nsk == sk) {
1691                         reqsk_put(req);
1692                         tcp_v6_restore_cb(skb);
1693                 } else if (tcp_child_process(sk, nsk, skb)) {
1694                         tcp_v6_send_reset(nsk, skb);
1695                         goto discard_and_relse;
1696                 } else {
1697                         sock_put(sk);
1698                         return 0;
1699                 }
1700         }
1701
1702         if (static_branch_unlikely(&ip6_min_hopcount)) {
1703                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1704                 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1705                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1706                         drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1707                         goto discard_and_relse;
1708                 }
1709         }
1710
1711         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1712                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1713                 goto discard_and_relse;
1714         }
1715
1716         drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1717                                            AF_INET6, dif, sdif);
1718         if (drop_reason)
1719                 goto discard_and_relse;
1720
1721         nf_reset_ct(skb);
1722
1723         if (tcp_filter(sk, skb)) {
1724                 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1725                 goto discard_and_relse;
1726         }
1727         th = (const struct tcphdr *)skb->data;
1728         hdr = ipv6_hdr(skb);
1729         tcp_v6_fill_cb(skb, hdr, th);
1730
1731         skb->dev = NULL;
1732
1733         if (sk->sk_state == TCP_LISTEN) {
1734                 ret = tcp_v6_do_rcv(sk, skb);
1735                 goto put_and_return;
1736         }
1737
1738         sk_incoming_cpu_update(sk);
1739
1740         bh_lock_sock_nested(sk);
1741         tcp_segs_in(tcp_sk(sk), skb);
1742         ret = 0;
1743         if (!sock_owned_by_user(sk)) {
1744                 ret = tcp_v6_do_rcv(sk, skb);
1745         } else {
1746                 if (tcp_add_backlog(sk, skb, &drop_reason))
1747                         goto discard_and_relse;
1748         }
1749         bh_unlock_sock(sk);
1750 put_and_return:
1751         if (refcounted)
1752                 sock_put(sk);
1753         return ret ? -1 : 0;
1754
1755 no_tcp_socket:
1756         drop_reason = SKB_DROP_REASON_NO_SOCKET;
1757         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1758                 goto discard_it;
1759
1760         tcp_v6_fill_cb(skb, hdr, th);
1761
1762         if (tcp_checksum_complete(skb)) {
1763 csum_error:
1764                 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1765                 trace_tcp_bad_csum(skb);
1766                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1767 bad_packet:
1768                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1769         } else {
1770                 tcp_v6_send_reset(NULL, skb);
1771         }
1772
1773 discard_it:
1774         SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1775         kfree_skb_reason(skb, drop_reason);
1776         return 0;
1777
1778 discard_and_relse:
1779         sk_drops_add(sk, skb);
1780         if (refcounted)
1781                 sock_put(sk);
1782         goto discard_it;
1783
1784 do_time_wait:
1785         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1786                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1787                 inet_twsk_put(inet_twsk(sk));
1788                 goto discard_it;
1789         }
1790
1791         tcp_v6_fill_cb(skb, hdr, th);
1792
1793         if (tcp_checksum_complete(skb)) {
1794                 inet_twsk_put(inet_twsk(sk));
1795                 goto csum_error;
1796         }
1797
1798         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1799         case TCP_TW_SYN:
1800         {
1801                 struct sock *sk2;
1802
1803                 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1804                                             skb, __tcp_hdrlen(th),
1805                                             &ipv6_hdr(skb)->saddr, th->source,
1806                                             &ipv6_hdr(skb)->daddr,
1807                                             ntohs(th->dest),
1808                                             tcp_v6_iif_l3_slave(skb),
1809                                             sdif);
1810                 if (sk2) {
1811                         struct inet_timewait_sock *tw = inet_twsk(sk);
1812                         inet_twsk_deschedule_put(tw);
1813                         sk = sk2;
1814                         tcp_v6_restore_cb(skb);
1815                         refcounted = false;
1816                         goto process;
1817                 }
1818         }
1819                 /* to ACK */
1820                 fallthrough;
1821         case TCP_TW_ACK:
1822                 tcp_v6_timewait_ack(sk, skb);
1823                 break;
1824         case TCP_TW_RST:
1825                 tcp_v6_send_reset(sk, skb);
1826                 inet_twsk_deschedule_put(inet_twsk(sk));
1827                 goto discard_it;
1828         case TCP_TW_SUCCESS:
1829                 ;
1830         }
1831         goto discard_it;
1832 }
1833
1834 void tcp_v6_early_demux(struct sk_buff *skb)
1835 {
1836         struct net *net = dev_net(skb->dev);
1837         const struct ipv6hdr *hdr;
1838         const struct tcphdr *th;
1839         struct sock *sk;
1840
1841         if (skb->pkt_type != PACKET_HOST)
1842                 return;
1843
1844         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1845                 return;
1846
1847         hdr = ipv6_hdr(skb);
1848         th = tcp_hdr(skb);
1849
1850         if (th->doff < sizeof(struct tcphdr) / 4)
1851                 return;
1852
1853         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1854         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1855                                         &hdr->saddr, th->source,
1856                                         &hdr->daddr, ntohs(th->dest),
1857                                         inet6_iif(skb), inet6_sdif(skb));
1858         if (sk) {
1859                 skb->sk = sk;
1860                 skb->destructor = sock_edemux;
1861                 if (sk_fullsock(sk)) {
1862                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1863
1864                         if (dst)
1865                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1866                         if (dst &&
1867                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1868                                 skb_dst_set_noref(skb, dst);
1869                 }
1870         }
1871 }
1872
1873 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1874         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1875         .twsk_unique    = tcp_twsk_unique,
1876         .twsk_destructor = tcp_twsk_destructor,
1877 };
1878
1879 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1880 {
1881         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1882 }
1883
1884 const struct inet_connection_sock_af_ops ipv6_specific = {
1885         .queue_xmit        = inet6_csk_xmit,
1886         .send_check        = tcp_v6_send_check,
1887         .rebuild_header    = inet6_sk_rebuild_header,
1888         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1889         .conn_request      = tcp_v6_conn_request,
1890         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1891         .net_header_len    = sizeof(struct ipv6hdr),
1892         .setsockopt        = ipv6_setsockopt,
1893         .getsockopt        = ipv6_getsockopt,
1894         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1895         .sockaddr_len      = sizeof(struct sockaddr_in6),
1896         .mtu_reduced       = tcp_v6_mtu_reduced,
1897 };
1898
1899 #ifdef CONFIG_TCP_MD5SIG
1900 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1901         .md5_lookup     =       tcp_v6_md5_lookup,
1902         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1903         .md5_parse      =       tcp_v6_parse_md5_keys,
1904 };
1905 #endif
1906
1907 /*
1908  *      TCP over IPv4 via INET6 API
1909  */
1910 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1911         .queue_xmit        = ip_queue_xmit,
1912         .send_check        = tcp_v4_send_check,
1913         .rebuild_header    = inet_sk_rebuild_header,
1914         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1915         .conn_request      = tcp_v6_conn_request,
1916         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1917         .net_header_len    = sizeof(struct iphdr),
1918         .setsockopt        = ipv6_setsockopt,
1919         .getsockopt        = ipv6_getsockopt,
1920         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1921         .sockaddr_len      = sizeof(struct sockaddr_in6),
1922         .mtu_reduced       = tcp_v4_mtu_reduced,
1923 };
1924
1925 #ifdef CONFIG_TCP_MD5SIG
1926 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1927         .md5_lookup     =       tcp_v4_md5_lookup,
1928         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1929         .md5_parse      =       tcp_v6_parse_md5_keys,
1930 };
1931 #endif
1932
1933 /* NOTE: A lot of things set to zero explicitly by call to
1934  *       sk_alloc() so need not be done here.
1935  */
1936 static int tcp_v6_init_sock(struct sock *sk)
1937 {
1938         struct inet_connection_sock *icsk = inet_csk(sk);
1939
1940         tcp_init_sock(sk);
1941
1942         icsk->icsk_af_ops = &ipv6_specific;
1943
1944 #ifdef CONFIG_TCP_MD5SIG
1945         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1946 #endif
1947
1948         return 0;
1949 }
1950
1951 #ifdef CONFIG_PROC_FS
1952 /* Proc filesystem TCPv6 sock list dumping. */
1953 static void get_openreq6(struct seq_file *seq,
1954                          const struct request_sock *req, int i)
1955 {
1956         long ttd = req->rsk_timer.expires - jiffies;
1957         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1958         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1959
1960         if (ttd < 0)
1961                 ttd = 0;
1962
1963         seq_printf(seq,
1964                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1965                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1966                    i,
1967                    src->s6_addr32[0], src->s6_addr32[1],
1968                    src->s6_addr32[2], src->s6_addr32[3],
1969                    inet_rsk(req)->ir_num,
1970                    dest->s6_addr32[0], dest->s6_addr32[1],
1971                    dest->s6_addr32[2], dest->s6_addr32[3],
1972                    ntohs(inet_rsk(req)->ir_rmt_port),
1973                    TCP_SYN_RECV,
1974                    0, 0, /* could print option size, but that is af dependent. */
1975                    1,   /* timers active (only the expire timer) */
1976                    jiffies_to_clock_t(ttd),
1977                    req->num_timeout,
1978                    from_kuid_munged(seq_user_ns(seq),
1979                                     sock_i_uid(req->rsk_listener)),
1980                    0,  /* non standard timer */
1981                    0, /* open_requests have no inode */
1982                    0, req);
1983 }
1984
1985 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1986 {
1987         const struct in6_addr *dest, *src;
1988         __u16 destp, srcp;
1989         int timer_active;
1990         unsigned long timer_expires;
1991         const struct inet_sock *inet = inet_sk(sp);
1992         const struct tcp_sock *tp = tcp_sk(sp);
1993         const struct inet_connection_sock *icsk = inet_csk(sp);
1994         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1995         int rx_queue;
1996         int state;
1997
1998         dest  = &sp->sk_v6_daddr;
1999         src   = &sp->sk_v6_rcv_saddr;
2000         destp = ntohs(inet->inet_dport);
2001         srcp  = ntohs(inet->inet_sport);
2002
2003         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2004             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2005             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2006                 timer_active    = 1;
2007                 timer_expires   = icsk->icsk_timeout;
2008         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2009                 timer_active    = 4;
2010                 timer_expires   = icsk->icsk_timeout;
2011         } else if (timer_pending(&sp->sk_timer)) {
2012                 timer_active    = 2;
2013                 timer_expires   = sp->sk_timer.expires;
2014         } else {
2015                 timer_active    = 0;
2016                 timer_expires = jiffies;
2017         }
2018
2019         state = inet_sk_state_load(sp);
2020         if (state == TCP_LISTEN)
2021                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2022         else
2023                 /* Because we don't lock the socket,
2024                  * we might find a transient negative value.
2025                  */
2026                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2027                                       READ_ONCE(tp->copied_seq), 0);
2028
2029         seq_printf(seq,
2030                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2031                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2032                    i,
2033                    src->s6_addr32[0], src->s6_addr32[1],
2034                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2035                    dest->s6_addr32[0], dest->s6_addr32[1],
2036                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2037                    state,
2038                    READ_ONCE(tp->write_seq) - tp->snd_una,
2039                    rx_queue,
2040                    timer_active,
2041                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2042                    icsk->icsk_retransmits,
2043                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2044                    icsk->icsk_probes_out,
2045                    sock_i_ino(sp),
2046                    refcount_read(&sp->sk_refcnt), sp,
2047                    jiffies_to_clock_t(icsk->icsk_rto),
2048                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2049                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2050                    tcp_snd_cwnd(tp),
2051                    state == TCP_LISTEN ?
2052                         fastopenq->max_qlen :
2053                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2054                    );
2055 }
2056
2057 static void get_timewait6_sock(struct seq_file *seq,
2058                                struct inet_timewait_sock *tw, int i)
2059 {
2060         long delta = tw->tw_timer.expires - jiffies;
2061         const struct in6_addr *dest, *src;
2062         __u16 destp, srcp;
2063
2064         dest = &tw->tw_v6_daddr;
2065         src  = &tw->tw_v6_rcv_saddr;
2066         destp = ntohs(tw->tw_dport);
2067         srcp  = ntohs(tw->tw_sport);
2068
2069         seq_printf(seq,
2070                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2071                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2072                    i,
2073                    src->s6_addr32[0], src->s6_addr32[1],
2074                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2075                    dest->s6_addr32[0], dest->s6_addr32[1],
2076                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2077                    tw->tw_substate, 0, 0,
2078                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2079                    refcount_read(&tw->tw_refcnt), tw);
2080 }
2081
2082 static int tcp6_seq_show(struct seq_file *seq, void *v)
2083 {
2084         struct tcp_iter_state *st;
2085         struct sock *sk = v;
2086
2087         if (v == SEQ_START_TOKEN) {
2088                 seq_puts(seq,
2089                          "  sl  "
2090                          "local_address                         "
2091                          "remote_address                        "
2092                          "st tx_queue rx_queue tr tm->when retrnsmt"
2093                          "   uid  timeout inode\n");
2094                 goto out;
2095         }
2096         st = seq->private;
2097
2098         if (sk->sk_state == TCP_TIME_WAIT)
2099                 get_timewait6_sock(seq, v, st->num);
2100         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2101                 get_openreq6(seq, v, st->num);
2102         else
2103                 get_tcp6_sock(seq, v, st->num);
2104 out:
2105         return 0;
2106 }
2107
2108 static const struct seq_operations tcp6_seq_ops = {
2109         .show           = tcp6_seq_show,
2110         .start          = tcp_seq_start,
2111         .next           = tcp_seq_next,
2112         .stop           = tcp_seq_stop,
2113 };
2114
2115 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2116         .family         = AF_INET6,
2117 };
2118
2119 int __net_init tcp6_proc_init(struct net *net)
2120 {
2121         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2122                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2123                 return -ENOMEM;
2124         return 0;
2125 }
2126
2127 void tcp6_proc_exit(struct net *net)
2128 {
2129         remove_proc_entry("tcp6", net->proc_net);
2130 }
2131 #endif
2132
2133 struct proto tcpv6_prot = {
2134         .name                   = "TCPv6",
2135         .owner                  = THIS_MODULE,
2136         .close                  = tcp_close,
2137         .pre_connect            = tcp_v6_pre_connect,
2138         .connect                = tcp_v6_connect,
2139         .disconnect             = tcp_disconnect,
2140         .accept                 = inet_csk_accept,
2141         .ioctl                  = tcp_ioctl,
2142         .init                   = tcp_v6_init_sock,
2143         .destroy                = tcp_v4_destroy_sock,
2144         .shutdown               = tcp_shutdown,
2145         .setsockopt             = tcp_setsockopt,
2146         .getsockopt             = tcp_getsockopt,
2147         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2148         .keepalive              = tcp_set_keepalive,
2149         .recvmsg                = tcp_recvmsg,
2150         .sendmsg                = tcp_sendmsg,
2151         .splice_eof             = tcp_splice_eof,
2152         .backlog_rcv            = tcp_v6_do_rcv,
2153         .release_cb             = tcp_release_cb,
2154         .hash                   = inet6_hash,
2155         .unhash                 = inet_unhash,
2156         .get_port               = inet_csk_get_port,
2157         .put_port               = inet_put_port,
2158 #ifdef CONFIG_BPF_SYSCALL
2159         .psock_update_sk_prot   = tcp_bpf_update_proto,
2160 #endif
2161         .enter_memory_pressure  = tcp_enter_memory_pressure,
2162         .leave_memory_pressure  = tcp_leave_memory_pressure,
2163         .stream_memory_free     = tcp_stream_memory_free,
2164         .sockets_allocated      = &tcp_sockets_allocated,
2165
2166         .memory_allocated       = &tcp_memory_allocated,
2167         .per_cpu_fw_alloc       = &tcp_memory_per_cpu_fw_alloc,
2168
2169         .memory_pressure        = &tcp_memory_pressure,
2170         .orphan_count           = &tcp_orphan_count,
2171         .sysctl_mem             = sysctl_tcp_mem,
2172         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2173         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2174         .max_header             = MAX_TCP_HEADER,
2175         .obj_size               = sizeof(struct tcp6_sock),
2176         .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2177         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2178         .twsk_prot              = &tcp6_timewait_sock_ops,
2179         .rsk_prot               = &tcp6_request_sock_ops,
2180         .h.hashinfo             = NULL,
2181         .no_autobind            = true,
2182         .diag_destroy           = tcp_abort,
2183 };
2184 EXPORT_SYMBOL_GPL(tcpv6_prot);
2185
2186 static const struct inet6_protocol tcpv6_protocol = {
2187         .handler        =       tcp_v6_rcv,
2188         .err_handler    =       tcp_v6_err,
2189         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2190 };
2191
2192 static struct inet_protosw tcpv6_protosw = {
2193         .type           =       SOCK_STREAM,
2194         .protocol       =       IPPROTO_TCP,
2195         .prot           =       &tcpv6_prot,
2196         .ops            =       &inet6_stream_ops,
2197         .flags          =       INET_PROTOSW_PERMANENT |
2198                                 INET_PROTOSW_ICSK,
2199 };
2200
2201 static int __net_init tcpv6_net_init(struct net *net)
2202 {
2203         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2204                                     SOCK_RAW, IPPROTO_TCP, net);
2205 }
2206
2207 static void __net_exit tcpv6_net_exit(struct net *net)
2208 {
2209         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2210 }
2211
2212 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2213 {
2214         tcp_twsk_purge(net_exit_list, AF_INET6);
2215 }
2216
2217 static struct pernet_operations tcpv6_net_ops = {
2218         .init       = tcpv6_net_init,
2219         .exit       = tcpv6_net_exit,
2220         .exit_batch = tcpv6_net_exit_batch,
2221 };
2222
2223 int __init tcpv6_init(void)
2224 {
2225         int ret;
2226
2227         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2228         if (ret)
2229                 goto out;
2230
2231         /* register inet6 protocol */
2232         ret = inet6_register_protosw(&tcpv6_protosw);
2233         if (ret)
2234                 goto out_tcpv6_protocol;
2235
2236         ret = register_pernet_subsys(&tcpv6_net_ops);
2237         if (ret)
2238                 goto out_tcpv6_protosw;
2239
2240         ret = mptcpv6_init();
2241         if (ret)
2242                 goto out_tcpv6_pernet_subsys;
2243
2244 out:
2245         return ret;
2246
2247 out_tcpv6_pernet_subsys:
2248         unregister_pernet_subsys(&tcpv6_net_ops);
2249 out_tcpv6_protosw:
2250         inet6_unregister_protosw(&tcpv6_protosw);
2251 out_tcpv6_protocol:
2252         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2253         goto out;
2254 }
2255
2256 void tcpv6_exit(void)
2257 {
2258         unregister_pernet_subsys(&tcpv6_net_ops);
2259         inet6_unregister_protosw(&tcpv6_protosw);
2260         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2261 }