Merge tag 'for-netdev' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/bpf...
[linux-2.6-block.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
97                                               struct tcp6_sock, tcp)->inet6)
98
99 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
100 {
101         struct dst_entry *dst = skb_dst(skb);
102
103         if (dst && dst_hold_safe(dst)) {
104                 const struct rt6_info *rt = (const struct rt6_info *)dst;
105
106                 rcu_assign_pointer(sk->sk_rx_dst, dst);
107                 sk->sk_rx_dst_ifindex = skb->skb_iif;
108                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
109         }
110 }
111
112 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
113 {
114         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
115                                 ipv6_hdr(skb)->saddr.s6_addr32,
116                                 tcp_hdr(skb)->dest,
117                                 tcp_hdr(skb)->source);
118 }
119
120 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
121 {
122         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
123                                    ipv6_hdr(skb)->saddr.s6_addr32);
124 }
125
126 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
127                               int addr_len)
128 {
129         /* This check is replicated from tcp_v6_connect() and intended to
130          * prevent BPF program called below from accessing bytes that are out
131          * of the bound specified by user in addr_len.
132          */
133         if (addr_len < SIN6_LEN_RFC2133)
134                 return -EINVAL;
135
136         sock_owned_by_me(sk);
137
138         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
139 }
140
141 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
142                           int addr_len)
143 {
144         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
145         struct inet_connection_sock *icsk = inet_csk(sk);
146         struct in6_addr *saddr = NULL, *final_p, final;
147         struct inet_timewait_death_row *tcp_death_row;
148         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
149         struct inet_sock *inet = inet_sk(sk);
150         struct tcp_sock *tp = tcp_sk(sk);
151         struct net *net = sock_net(sk);
152         struct ipv6_txoptions *opt;
153         struct dst_entry *dst;
154         struct flowi6 fl6;
155         int addr_type;
156         int err;
157
158         if (addr_len < SIN6_LEN_RFC2133)
159                 return -EINVAL;
160
161         if (usin->sin6_family != AF_INET6)
162                 return -EAFNOSUPPORT;
163
164         memset(&fl6, 0, sizeof(fl6));
165
166         if (inet6_test_bit(SNDFLOW, sk)) {
167                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
168                 IP6_ECN_flow_init(fl6.flowlabel);
169                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
170                         struct ip6_flowlabel *flowlabel;
171                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
172                         if (IS_ERR(flowlabel))
173                                 return -EINVAL;
174                         fl6_sock_release(flowlabel);
175                 }
176         }
177
178         /*
179          *      connect() to INADDR_ANY means loopback (BSD'ism).
180          */
181
182         if (ipv6_addr_any(&usin->sin6_addr)) {
183                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
184                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
185                                                &usin->sin6_addr);
186                 else
187                         usin->sin6_addr = in6addr_loopback;
188         }
189
190         addr_type = ipv6_addr_type(&usin->sin6_addr);
191
192         if (addr_type & IPV6_ADDR_MULTICAST)
193                 return -ENETUNREACH;
194
195         if (addr_type&IPV6_ADDR_LINKLOCAL) {
196                 if (addr_len >= sizeof(struct sockaddr_in6) &&
197                     usin->sin6_scope_id) {
198                         /* If interface is set while binding, indices
199                          * must coincide.
200                          */
201                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
202                                 return -EINVAL;
203
204                         sk->sk_bound_dev_if = usin->sin6_scope_id;
205                 }
206
207                 /* Connect to link-local address requires an interface */
208                 if (!sk->sk_bound_dev_if)
209                         return -EINVAL;
210         }
211
212         if (tp->rx_opt.ts_recent_stamp &&
213             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
214                 tp->rx_opt.ts_recent = 0;
215                 tp->rx_opt.ts_recent_stamp = 0;
216                 WRITE_ONCE(tp->write_seq, 0);
217         }
218
219         sk->sk_v6_daddr = usin->sin6_addr;
220         np->flow_label = fl6.flowlabel;
221
222         /*
223          *      TCP over IPv4
224          */
225
226         if (addr_type & IPV6_ADDR_MAPPED) {
227                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
228                 struct sockaddr_in sin;
229
230                 if (ipv6_only_sock(sk))
231                         return -ENETUNREACH;
232
233                 sin.sin_family = AF_INET;
234                 sin.sin_port = usin->sin6_port;
235                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
236
237                 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
238                 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
239                 if (sk_is_mptcp(sk))
240                         mptcpv6_handle_mapped(sk, true);
241                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
242 #ifdef CONFIG_TCP_MD5SIG
243                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
244 #endif
245
246                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
247
248                 if (err) {
249                         icsk->icsk_ext_hdr_len = exthdrlen;
250                         /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
251                         WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
252                         if (sk_is_mptcp(sk))
253                                 mptcpv6_handle_mapped(sk, false);
254                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
255 #ifdef CONFIG_TCP_MD5SIG
256                         tp->af_specific = &tcp_sock_ipv6_specific;
257 #endif
258                         goto failure;
259                 }
260                 np->saddr = sk->sk_v6_rcv_saddr;
261
262                 return err;
263         }
264
265         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
266                 saddr = &sk->sk_v6_rcv_saddr;
267
268         fl6.flowi6_proto = IPPROTO_TCP;
269         fl6.daddr = sk->sk_v6_daddr;
270         fl6.saddr = saddr ? *saddr : np->saddr;
271         fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
272         fl6.flowi6_oif = sk->sk_bound_dev_if;
273         fl6.flowi6_mark = sk->sk_mark;
274         fl6.fl6_dport = usin->sin6_port;
275         fl6.fl6_sport = inet->inet_sport;
276         fl6.flowi6_uid = sk->sk_uid;
277
278         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279         final_p = fl6_update_dst(&fl6, opt, &final);
280
281         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282
283         dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
284         if (IS_ERR(dst)) {
285                 err = PTR_ERR(dst);
286                 goto failure;
287         }
288
289         tp->tcp_usec_ts = dst_tcp_usec_ts(dst);
290         tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
291
292         if (!saddr) {
293                 saddr = &fl6.saddr;
294
295                 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
296                 if (err)
297                         goto failure;
298         }
299
300         /* set the source address */
301         np->saddr = *saddr;
302         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
303
304         sk->sk_gso_type = SKB_GSO_TCPV6;
305         ip6_dst_store(sk, dst, NULL, NULL);
306
307         icsk->icsk_ext_hdr_len = 0;
308         if (opt)
309                 icsk->icsk_ext_hdr_len = opt->opt_flen +
310                                          opt->opt_nflen;
311
312         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
313
314         inet->inet_dport = usin->sin6_port;
315
316         tcp_set_state(sk, TCP_SYN_SENT);
317         err = inet6_hash_connect(tcp_death_row, sk);
318         if (err)
319                 goto late_failure;
320
321         sk_set_txhash(sk);
322
323         if (likely(!tp->repair)) {
324                 if (!tp->write_seq)
325                         WRITE_ONCE(tp->write_seq,
326                                    secure_tcpv6_seq(np->saddr.s6_addr32,
327                                                     sk->sk_v6_daddr.s6_addr32,
328                                                     inet->inet_sport,
329                                                     inet->inet_dport));
330                 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
331                                                    sk->sk_v6_daddr.s6_addr32);
332         }
333
334         if (tcp_fastopen_defer_connect(sk, &err))
335                 return err;
336         if (err)
337                 goto late_failure;
338
339         err = tcp_connect(sk);
340         if (err)
341                 goto late_failure;
342
343         return 0;
344
345 late_failure:
346         tcp_set_state(sk, TCP_CLOSE);
347         inet_bhash2_reset_saddr(sk);
348 failure:
349         inet->inet_dport = 0;
350         sk->sk_route_caps = 0;
351         return err;
352 }
353
354 static void tcp_v6_mtu_reduced(struct sock *sk)
355 {
356         struct dst_entry *dst;
357         u32 mtu;
358
359         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
360                 return;
361
362         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
363
364         /* Drop requests trying to increase our current mss.
365          * Check done in __ip6_rt_update_pmtu() is too late.
366          */
367         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
368                 return;
369
370         dst = inet6_csk_update_pmtu(sk, mtu);
371         if (!dst)
372                 return;
373
374         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
375                 tcp_sync_mss(sk, dst_mtu(dst));
376                 tcp_simple_retransmit(sk);
377         }
378 }
379
380 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
381                 u8 type, u8 code, int offset, __be32 info)
382 {
383         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
384         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
385         struct net *net = dev_net(skb->dev);
386         struct request_sock *fastopen;
387         struct ipv6_pinfo *np;
388         struct tcp_sock *tp;
389         __u32 seq, snd_una;
390         struct sock *sk;
391         bool fatal;
392         int err;
393
394         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
395                                         &hdr->daddr, th->dest,
396                                         &hdr->saddr, ntohs(th->source),
397                                         skb->dev->ifindex, inet6_sdif(skb));
398
399         if (!sk) {
400                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
401                                   ICMP6_MIB_INERRORS);
402                 return -ENOENT;
403         }
404
405         if (sk->sk_state == TCP_TIME_WAIT) {
406                 inet_twsk_put(inet_twsk(sk));
407                 return 0;
408         }
409         seq = ntohl(th->seq);
410         fatal = icmpv6_err_convert(type, code, &err);
411         if (sk->sk_state == TCP_NEW_SYN_RECV) {
412                 tcp_req_err(sk, seq, fatal);
413                 return 0;
414         }
415
416         bh_lock_sock(sk);
417         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
418                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
419
420         if (sk->sk_state == TCP_CLOSE)
421                 goto out;
422
423         if (static_branch_unlikely(&ip6_min_hopcount)) {
424                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
425                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
426                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
427                         goto out;
428                 }
429         }
430
431         tp = tcp_sk(sk);
432         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
433         fastopen = rcu_dereference(tp->fastopen_rsk);
434         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
435         if (sk->sk_state != TCP_LISTEN &&
436             !between(seq, snd_una, tp->snd_nxt)) {
437                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
438                 goto out;
439         }
440
441         np = tcp_inet6_sk(sk);
442
443         if (type == NDISC_REDIRECT) {
444                 if (!sock_owned_by_user(sk)) {
445                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
446
447                         if (dst)
448                                 dst->ops->redirect(dst, sk, skb);
449                 }
450                 goto out;
451         }
452
453         if (type == ICMPV6_PKT_TOOBIG) {
454                 u32 mtu = ntohl(info);
455
456                 /* We are not interested in TCP_LISTEN and open_requests
457                  * (SYN-ACKs send out by Linux are always <576bytes so
458                  * they should go through unfragmented).
459                  */
460                 if (sk->sk_state == TCP_LISTEN)
461                         goto out;
462
463                 if (!ip6_sk_accept_pmtu(sk))
464                         goto out;
465
466                 if (mtu < IPV6_MIN_MTU)
467                         goto out;
468
469                 WRITE_ONCE(tp->mtu_info, mtu);
470
471                 if (!sock_owned_by_user(sk))
472                         tcp_v6_mtu_reduced(sk);
473                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
474                                            &sk->sk_tsq_flags))
475                         sock_hold(sk);
476                 goto out;
477         }
478
479
480         /* Might be for an request_sock */
481         switch (sk->sk_state) {
482         case TCP_SYN_SENT:
483         case TCP_SYN_RECV:
484                 /* Only in fast or simultaneous open. If a fast open socket is
485                  * already accepted it is treated as a connected one below.
486                  */
487                 if (fastopen && !fastopen->sk)
488                         break;
489
490                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
491
492                 if (!sock_owned_by_user(sk)) {
493                         WRITE_ONCE(sk->sk_err, err);
494                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
495
496                         tcp_done(sk);
497                 } else {
498                         WRITE_ONCE(sk->sk_err_soft, err);
499                 }
500                 goto out;
501         case TCP_LISTEN:
502                 break;
503         default:
504                 /* check if this ICMP message allows revert of backoff.
505                  * (see RFC 6069)
506                  */
507                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
508                     code == ICMPV6_NOROUTE)
509                         tcp_ld_RTO_revert(sk, seq);
510         }
511
512         if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
513                 WRITE_ONCE(sk->sk_err, err);
514                 sk_error_report(sk);
515         } else {
516                 WRITE_ONCE(sk->sk_err_soft, err);
517         }
518 out:
519         bh_unlock_sock(sk);
520         sock_put(sk);
521         return 0;
522 }
523
524
525 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
526                               struct flowi *fl,
527                               struct request_sock *req,
528                               struct tcp_fastopen_cookie *foc,
529                               enum tcp_synack_type synack_type,
530                               struct sk_buff *syn_skb)
531 {
532         struct inet_request_sock *ireq = inet_rsk(req);
533         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
534         struct ipv6_txoptions *opt;
535         struct flowi6 *fl6 = &fl->u.ip6;
536         struct sk_buff *skb;
537         int err = -ENOMEM;
538         u8 tclass;
539
540         /* First, grab a route. */
541         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
542                                                IPPROTO_TCP)) == NULL)
543                 goto done;
544
545         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
546
547         if (skb) {
548                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
549                                     &ireq->ir_v6_rmt_addr);
550
551                 fl6->daddr = ireq->ir_v6_rmt_addr;
552                 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts)
553                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
554
555                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
556                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
557                                 (np->tclass & INET_ECN_MASK) :
558                                 np->tclass;
559
560                 if (!INET_ECN_is_capable(tclass) &&
561                     tcp_bpf_ca_needs_ecn((struct sock *)req))
562                         tclass |= INET_ECN_ECT_0;
563
564                 rcu_read_lock();
565                 opt = ireq->ipv6_opt;
566                 if (!opt)
567                         opt = rcu_dereference(np->opt);
568                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
569                                opt, tclass, READ_ONCE(sk->sk_priority));
570                 rcu_read_unlock();
571                 err = net_xmit_eval(err);
572         }
573
574 done:
575         return err;
576 }
577
578
579 static void tcp_v6_reqsk_destructor(struct request_sock *req)
580 {
581         kfree(inet_rsk(req)->ipv6_opt);
582         consume_skb(inet_rsk(req)->pktopts);
583 }
584
585 #ifdef CONFIG_TCP_MD5SIG
586 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
587                                                    const struct in6_addr *addr,
588                                                    int l3index)
589 {
590         return tcp_md5_do_lookup(sk, l3index,
591                                  (union tcp_md5_addr *)addr, AF_INET6);
592 }
593
594 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
595                                                 const struct sock *addr_sk)
596 {
597         int l3index;
598
599         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
600                                                  addr_sk->sk_bound_dev_if);
601         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
602                                     l3index);
603 }
604
605 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
606                                  sockptr_t optval, int optlen)
607 {
608         struct tcp_md5sig cmd;
609         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
610         int l3index = 0;
611         u8 prefixlen;
612         u8 flags;
613
614         if (optlen < sizeof(cmd))
615                 return -EINVAL;
616
617         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
618                 return -EFAULT;
619
620         if (sin6->sin6_family != AF_INET6)
621                 return -EINVAL;
622
623         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
624
625         if (optname == TCP_MD5SIG_EXT &&
626             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
627                 prefixlen = cmd.tcpm_prefixlen;
628                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
629                                         prefixlen > 32))
630                         return -EINVAL;
631         } else {
632                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
633         }
634
635         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
636             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
637                 struct net_device *dev;
638
639                 rcu_read_lock();
640                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
641                 if (dev && netif_is_l3_master(dev))
642                         l3index = dev->ifindex;
643                 rcu_read_unlock();
644
645                 /* ok to reference set/not set outside of rcu;
646                  * right now device MUST be an L3 master
647                  */
648                 if (!dev || !l3index)
649                         return -EINVAL;
650         }
651
652         if (!cmd.tcpm_keylen) {
653                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
654                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
655                                               AF_INET, prefixlen,
656                                               l3index, flags);
657                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
658                                       AF_INET6, prefixlen, l3index, flags);
659         }
660
661         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
662                 return -EINVAL;
663
664         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
665                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
666                                       AF_INET, prefixlen, l3index, flags,
667                                       cmd.tcpm_key, cmd.tcpm_keylen);
668
669         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
670                               AF_INET6, prefixlen, l3index, flags,
671                               cmd.tcpm_key, cmd.tcpm_keylen);
672 }
673
674 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
675                                    const struct in6_addr *daddr,
676                                    const struct in6_addr *saddr,
677                                    const struct tcphdr *th, int nbytes)
678 {
679         struct tcp6_pseudohdr *bp;
680         struct scatterlist sg;
681         struct tcphdr *_th;
682
683         bp = hp->scratch;
684         /* 1. TCP pseudo-header (RFC2460) */
685         bp->saddr = *saddr;
686         bp->daddr = *daddr;
687         bp->protocol = cpu_to_be32(IPPROTO_TCP);
688         bp->len = cpu_to_be32(nbytes);
689
690         _th = (struct tcphdr *)(bp + 1);
691         memcpy(_th, th, sizeof(*th));
692         _th->check = 0;
693
694         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
695         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
696                                 sizeof(*bp) + sizeof(*th));
697         return crypto_ahash_update(hp->md5_req);
698 }
699
700 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
701                                const struct in6_addr *daddr, struct in6_addr *saddr,
702                                const struct tcphdr *th)
703 {
704         struct tcp_md5sig_pool *hp;
705         struct ahash_request *req;
706
707         hp = tcp_get_md5sig_pool();
708         if (!hp)
709                 goto clear_hash_noput;
710         req = hp->md5_req;
711
712         if (crypto_ahash_init(req))
713                 goto clear_hash;
714         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
715                 goto clear_hash;
716         if (tcp_md5_hash_key(hp, key))
717                 goto clear_hash;
718         ahash_request_set_crypt(req, NULL, md5_hash, 0);
719         if (crypto_ahash_final(req))
720                 goto clear_hash;
721
722         tcp_put_md5sig_pool();
723         return 0;
724
725 clear_hash:
726         tcp_put_md5sig_pool();
727 clear_hash_noput:
728         memset(md5_hash, 0, 16);
729         return 1;
730 }
731
732 static int tcp_v6_md5_hash_skb(char *md5_hash,
733                                const struct tcp_md5sig_key *key,
734                                const struct sock *sk,
735                                const struct sk_buff *skb)
736 {
737         const struct in6_addr *saddr, *daddr;
738         struct tcp_md5sig_pool *hp;
739         struct ahash_request *req;
740         const struct tcphdr *th = tcp_hdr(skb);
741
742         if (sk) { /* valid for establish/request sockets */
743                 saddr = &sk->sk_v6_rcv_saddr;
744                 daddr = &sk->sk_v6_daddr;
745         } else {
746                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
747                 saddr = &ip6h->saddr;
748                 daddr = &ip6h->daddr;
749         }
750
751         hp = tcp_get_md5sig_pool();
752         if (!hp)
753                 goto clear_hash_noput;
754         req = hp->md5_req;
755
756         if (crypto_ahash_init(req))
757                 goto clear_hash;
758
759         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
760                 goto clear_hash;
761         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
762                 goto clear_hash;
763         if (tcp_md5_hash_key(hp, key))
764                 goto clear_hash;
765         ahash_request_set_crypt(req, NULL, md5_hash, 0);
766         if (crypto_ahash_final(req))
767                 goto clear_hash;
768
769         tcp_put_md5sig_pool();
770         return 0;
771
772 clear_hash:
773         tcp_put_md5sig_pool();
774 clear_hash_noput:
775         memset(md5_hash, 0, 16);
776         return 1;
777 }
778
779 #endif
780
781 static void tcp_v6_init_req(struct request_sock *req,
782                             const struct sock *sk_listener,
783                             struct sk_buff *skb)
784 {
785         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
786         struct inet_request_sock *ireq = inet_rsk(req);
787         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
788
789         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
790         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
791
792         /* So that link locals have meaning */
793         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
794             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
795                 ireq->ir_iif = tcp_v6_iif(skb);
796
797         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
798             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
799              np->rxopt.bits.rxinfo ||
800              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
801              np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) {
802                 refcount_inc(&skb->users);
803                 ireq->pktopts = skb;
804         }
805 }
806
807 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
808                                           struct sk_buff *skb,
809                                           struct flowi *fl,
810                                           struct request_sock *req)
811 {
812         tcp_v6_init_req(req, sk, skb);
813
814         if (security_inet_conn_request(sk, skb, req))
815                 return NULL;
816
817         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
818 }
819
820 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
821         .family         =       AF_INET6,
822         .obj_size       =       sizeof(struct tcp6_request_sock),
823         .rtx_syn_ack    =       tcp_rtx_synack,
824         .send_ack       =       tcp_v6_reqsk_send_ack,
825         .destructor     =       tcp_v6_reqsk_destructor,
826         .send_reset     =       tcp_v6_send_reset,
827         .syn_ack_timeout =      tcp_syn_ack_timeout,
828 };
829
830 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
831         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
832                                 sizeof(struct ipv6hdr),
833 #ifdef CONFIG_TCP_MD5SIG
834         .req_md5_lookup =       tcp_v6_md5_lookup,
835         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
836 #endif
837 #ifdef CONFIG_SYN_COOKIES
838         .cookie_init_seq =      cookie_v6_init_sequence,
839 #endif
840         .route_req      =       tcp_v6_route_req,
841         .init_seq       =       tcp_v6_init_seq,
842         .init_ts_off    =       tcp_v6_init_ts_off,
843         .send_synack    =       tcp_v6_send_synack,
844 };
845
846 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
847                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
848                                  int oif, struct tcp_md5sig_key *key, int rst,
849                                  u8 tclass, __be32 label, u32 priority, u32 txhash)
850 {
851         const struct tcphdr *th = tcp_hdr(skb);
852         struct tcphdr *t1;
853         struct sk_buff *buff;
854         struct flowi6 fl6;
855         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
856         struct sock *ctl_sk = net->ipv6.tcp_sk;
857         unsigned int tot_len = sizeof(struct tcphdr);
858         __be32 mrst = 0, *topt;
859         struct dst_entry *dst;
860         __u32 mark = 0;
861
862         if (tsecr)
863                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
864 #ifdef CONFIG_TCP_MD5SIG
865         if (key)
866                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
867 #endif
868
869 #ifdef CONFIG_MPTCP
870         if (rst && !key) {
871                 mrst = mptcp_reset_option(skb);
872
873                 if (mrst)
874                         tot_len += sizeof(__be32);
875         }
876 #endif
877
878         buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
879         if (!buff)
880                 return;
881
882         skb_reserve(buff, MAX_TCP_HEADER);
883
884         t1 = skb_push(buff, tot_len);
885         skb_reset_transport_header(buff);
886
887         /* Swap the send and the receive. */
888         memset(t1, 0, sizeof(*t1));
889         t1->dest = th->source;
890         t1->source = th->dest;
891         t1->doff = tot_len / 4;
892         t1->seq = htonl(seq);
893         t1->ack_seq = htonl(ack);
894         t1->ack = !rst || !th->ack;
895         t1->rst = rst;
896         t1->window = htons(win);
897
898         topt = (__be32 *)(t1 + 1);
899
900         if (tsecr) {
901                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
902                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
903                 *topt++ = htonl(tsval);
904                 *topt++ = htonl(tsecr);
905         }
906
907         if (mrst)
908                 *topt++ = mrst;
909
910 #ifdef CONFIG_TCP_MD5SIG
911         if (key) {
912                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
913                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
914                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
915                                     &ipv6_hdr(skb)->saddr,
916                                     &ipv6_hdr(skb)->daddr, t1);
917         }
918 #endif
919
920         memset(&fl6, 0, sizeof(fl6));
921         fl6.daddr = ipv6_hdr(skb)->saddr;
922         fl6.saddr = ipv6_hdr(skb)->daddr;
923         fl6.flowlabel = label;
924
925         buff->ip_summed = CHECKSUM_PARTIAL;
926
927         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
928
929         fl6.flowi6_proto = IPPROTO_TCP;
930         if (rt6_need_strict(&fl6.daddr) && !oif)
931                 fl6.flowi6_oif = tcp_v6_iif(skb);
932         else {
933                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
934                         oif = skb->skb_iif;
935
936                 fl6.flowi6_oif = oif;
937         }
938
939         if (sk) {
940                 if (sk->sk_state == TCP_TIME_WAIT)
941                         mark = inet_twsk(sk)->tw_mark;
942                 else
943                         mark = READ_ONCE(sk->sk_mark);
944                 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
945         }
946         if (txhash) {
947                 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
948                 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
949         }
950         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
951         fl6.fl6_dport = t1->dest;
952         fl6.fl6_sport = t1->source;
953         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
954         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
955
956         /* Pass a socket to ip6_dst_lookup either it is for RST
957          * Underlying function will use this to retrieve the network
958          * namespace
959          */
960         if (sk && sk->sk_state != TCP_TIME_WAIT)
961                 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
962         else
963                 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
964         if (!IS_ERR(dst)) {
965                 skb_dst_set(buff, dst);
966                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
967                          tclass & ~INET_ECN_MASK, priority);
968                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
969                 if (rst)
970                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
971                 return;
972         }
973
974         kfree_skb(buff);
975 }
976
977 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
978 {
979         const struct tcphdr *th = tcp_hdr(skb);
980         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
981         u32 seq = 0, ack_seq = 0;
982         struct tcp_md5sig_key *key = NULL;
983 #ifdef CONFIG_TCP_MD5SIG
984         const __u8 *hash_location = NULL;
985         unsigned char newhash[16];
986         int genhash;
987         struct sock *sk1 = NULL;
988 #endif
989         __be32 label = 0;
990         u32 priority = 0;
991         struct net *net;
992         u32 txhash = 0;
993         int oif = 0;
994
995         if (th->rst)
996                 return;
997
998         /* If sk not NULL, it means we did a successful lookup and incoming
999          * route had to be correct. prequeue might have dropped our dst.
1000          */
1001         if (!sk && !ipv6_unicast_destination(skb))
1002                 return;
1003
1004         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1005 #ifdef CONFIG_TCP_MD5SIG
1006         rcu_read_lock();
1007         hash_location = tcp_parse_md5sig_option(th);
1008         if (sk && sk_fullsock(sk)) {
1009                 int l3index;
1010
1011                 /* sdif set, means packet ingressed via a device
1012                  * in an L3 domain and inet_iif is set to it.
1013                  */
1014                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1015                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1016         } else if (hash_location) {
1017                 int dif = tcp_v6_iif_l3_slave(skb);
1018                 int sdif = tcp_v6_sdif(skb);
1019                 int l3index;
1020
1021                 /*
1022                  * active side is lost. Try to find listening socket through
1023                  * source port, and then find md5 key through listening socket.
1024                  * we are not loose security here:
1025                  * Incoming packet is checked with md5 hash with finding key,
1026                  * no RST generated if md5 hash doesn't match.
1027                  */
1028                 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1029                                             NULL, 0, &ipv6h->saddr, th->source,
1030                                             &ipv6h->daddr, ntohs(th->source),
1031                                             dif, sdif);
1032                 if (!sk1)
1033                         goto out;
1034
1035                 /* sdif set, means packet ingressed via a device
1036                  * in an L3 domain and dif is set to it.
1037                  */
1038                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1039
1040                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1041                 if (!key)
1042                         goto out;
1043
1044                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1045                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1046                         goto out;
1047         }
1048 #endif
1049
1050         if (th->ack)
1051                 seq = ntohl(th->ack_seq);
1052         else
1053                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1054                           (th->doff << 2);
1055
1056         if (sk) {
1057                 oif = sk->sk_bound_dev_if;
1058                 if (sk_fullsock(sk)) {
1059                         trace_tcp_send_reset(sk, skb);
1060                         if (inet6_test_bit(REPFLOW, sk))
1061                                 label = ip6_flowlabel(ipv6h);
1062                         priority = READ_ONCE(sk->sk_priority);
1063                         txhash = sk->sk_txhash;
1064                 }
1065                 if (sk->sk_state == TCP_TIME_WAIT) {
1066                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1067                         priority = inet_twsk(sk)->tw_priority;
1068                         txhash = inet_twsk(sk)->tw_txhash;
1069                 }
1070         } else {
1071                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1072                         label = ip6_flowlabel(ipv6h);
1073         }
1074
1075         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1076                              ipv6_get_dsfield(ipv6h), label, priority, txhash);
1077
1078 #ifdef CONFIG_TCP_MD5SIG
1079 out:
1080         rcu_read_unlock();
1081 #endif
1082 }
1083
1084 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1085                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1086                             struct tcp_md5sig_key *key, u8 tclass,
1087                             __be32 label, u32 priority, u32 txhash)
1088 {
1089         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1090                              tclass, label, priority, txhash);
1091 }
1092
1093 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1094 {
1095         struct inet_timewait_sock *tw = inet_twsk(sk);
1096         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1097
1098         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1099                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1100                         tcp_tw_tsval(tcptw),
1101                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1102                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1103                         tw->tw_txhash);
1104
1105         inet_twsk_put(tw);
1106 }
1107
1108 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1109                                   struct request_sock *req)
1110 {
1111         int l3index;
1112
1113         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1114
1115         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1116          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1117          */
1118         /* RFC 7323 2.3
1119          * The window field (SEG.WND) of every outgoing segment, with the
1120          * exception of <SYN> segments, MUST be right-shifted by
1121          * Rcv.Wind.Shift bits:
1122          */
1123         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1124                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1125                         tcp_rsk(req)->rcv_nxt,
1126                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1127                         tcp_rsk_tsval(tcp_rsk(req)),
1128                         READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1129                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1130                         ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1131                         READ_ONCE(sk->sk_priority),
1132                         READ_ONCE(tcp_rsk(req)->txhash));
1133 }
1134
1135
1136 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1137 {
1138 #ifdef CONFIG_SYN_COOKIES
1139         const struct tcphdr *th = tcp_hdr(skb);
1140
1141         if (!th->syn)
1142                 sk = cookie_v6_check(sk, skb);
1143 #endif
1144         return sk;
1145 }
1146
1147 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1148                          struct tcphdr *th, u32 *cookie)
1149 {
1150         u16 mss = 0;
1151 #ifdef CONFIG_SYN_COOKIES
1152         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1153                                     &tcp_request_sock_ipv6_ops, sk, th);
1154         if (mss) {
1155                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1156                 tcp_synq_overflow(sk);
1157         }
1158 #endif
1159         return mss;
1160 }
1161
1162 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1163 {
1164         if (skb->protocol == htons(ETH_P_IP))
1165                 return tcp_v4_conn_request(sk, skb);
1166
1167         if (!ipv6_unicast_destination(skb))
1168                 goto drop;
1169
1170         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1171                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1172                 return 0;
1173         }
1174
1175         return tcp_conn_request(&tcp6_request_sock_ops,
1176                                 &tcp_request_sock_ipv6_ops, sk, skb);
1177
1178 drop:
1179         tcp_listendrop(sk);
1180         return 0; /* don't send reset */
1181 }
1182
1183 static void tcp_v6_restore_cb(struct sk_buff *skb)
1184 {
1185         /* We need to move header back to the beginning if xfrm6_policy_check()
1186          * and tcp_v6_fill_cb() are going to be called again.
1187          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1188          */
1189         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1190                 sizeof(struct inet6_skb_parm));
1191 }
1192
1193 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1194                                          struct request_sock *req,
1195                                          struct dst_entry *dst,
1196                                          struct request_sock *req_unhash,
1197                                          bool *own_req)
1198 {
1199         struct inet_request_sock *ireq;
1200         struct ipv6_pinfo *newnp;
1201         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1202         struct ipv6_txoptions *opt;
1203         struct inet_sock *newinet;
1204         bool found_dup_sk = false;
1205         struct tcp_sock *newtp;
1206         struct sock *newsk;
1207 #ifdef CONFIG_TCP_MD5SIG
1208         struct tcp_md5sig_key *key;
1209         int l3index;
1210 #endif
1211         struct flowi6 fl6;
1212
1213         if (skb->protocol == htons(ETH_P_IP)) {
1214                 /*
1215                  *      v6 mapped
1216                  */
1217
1218                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1219                                              req_unhash, own_req);
1220
1221                 if (!newsk)
1222                         return NULL;
1223
1224                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1225
1226                 newnp = tcp_inet6_sk(newsk);
1227                 newtp = tcp_sk(newsk);
1228
1229                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1230
1231                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1232
1233                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1234                 if (sk_is_mptcp(newsk))
1235                         mptcpv6_handle_mapped(newsk, true);
1236                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1237 #ifdef CONFIG_TCP_MD5SIG
1238                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1239 #endif
1240
1241                 newnp->ipv6_mc_list = NULL;
1242                 newnp->ipv6_ac_list = NULL;
1243                 newnp->ipv6_fl_list = NULL;
1244                 newnp->pktoptions  = NULL;
1245                 newnp->opt         = NULL;
1246                 newnp->mcast_oif   = inet_iif(skb);
1247                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1248                 newnp->rcv_flowinfo = 0;
1249                 if (inet6_test_bit(REPFLOW, sk))
1250                         newnp->flow_label = 0;
1251
1252                 /*
1253                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1254                  * here, tcp_create_openreq_child now does this for us, see the comment in
1255                  * that function for the gory details. -acme
1256                  */
1257
1258                 /* It is tricky place. Until this moment IPv4 tcp
1259                    worked with IPv6 icsk.icsk_af_ops.
1260                    Sync it now.
1261                  */
1262                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1263
1264                 return newsk;
1265         }
1266
1267         ireq = inet_rsk(req);
1268
1269         if (sk_acceptq_is_full(sk))
1270                 goto out_overflow;
1271
1272         if (!dst) {
1273                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1274                 if (!dst)
1275                         goto out;
1276         }
1277
1278         newsk = tcp_create_openreq_child(sk, req, skb);
1279         if (!newsk)
1280                 goto out_nonewsk;
1281
1282         /*
1283          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1284          * count here, tcp_create_openreq_child now does this for us, see the
1285          * comment in that function for the gory details. -acme
1286          */
1287
1288         newsk->sk_gso_type = SKB_GSO_TCPV6;
1289         ip6_dst_store(newsk, dst, NULL, NULL);
1290         inet6_sk_rx_dst_set(newsk, skb);
1291
1292         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1293
1294         newtp = tcp_sk(newsk);
1295         newinet = inet_sk(newsk);
1296         newnp = tcp_inet6_sk(newsk);
1297
1298         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1299
1300         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1301         newnp->saddr = ireq->ir_v6_loc_addr;
1302         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1303         newsk->sk_bound_dev_if = ireq->ir_iif;
1304
1305         /* Now IPv6 options...
1306
1307            First: no IPv4 options.
1308          */
1309         newinet->inet_opt = NULL;
1310         newnp->ipv6_mc_list = NULL;
1311         newnp->ipv6_ac_list = NULL;
1312         newnp->ipv6_fl_list = NULL;
1313
1314         /* Clone RX bits */
1315         newnp->rxopt.all = np->rxopt.all;
1316
1317         newnp->pktoptions = NULL;
1318         newnp->opt        = NULL;
1319         newnp->mcast_oif  = tcp_v6_iif(skb);
1320         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1321         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1322         if (inet6_test_bit(REPFLOW, sk))
1323                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1324
1325         /* Set ToS of the new socket based upon the value of incoming SYN.
1326          * ECT bits are set later in tcp_init_transfer().
1327          */
1328         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1329                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1330
1331         /* Clone native IPv6 options from listening socket (if any)
1332
1333            Yes, keeping reference count would be much more clever,
1334            but we make one more one thing there: reattach optmem
1335            to newsk.
1336          */
1337         opt = ireq->ipv6_opt;
1338         if (!opt)
1339                 opt = rcu_dereference(np->opt);
1340         if (opt) {
1341                 opt = ipv6_dup_options(newsk, opt);
1342                 RCU_INIT_POINTER(newnp->opt, opt);
1343         }
1344         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1345         if (opt)
1346                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1347                                                     opt->opt_flen;
1348
1349         tcp_ca_openreq_child(newsk, dst);
1350
1351         tcp_sync_mss(newsk, dst_mtu(dst));
1352         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1353
1354         tcp_initialize_rcv_mss(newsk);
1355
1356         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1357         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1358
1359 #ifdef CONFIG_TCP_MD5SIG
1360         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1361
1362         /* Copy over the MD5 key from the original socket */
1363         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1364         if (key) {
1365                 const union tcp_md5_addr *addr;
1366
1367                 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1368                 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1369                         inet_csk_prepare_forced_close(newsk);
1370                         tcp_done(newsk);
1371                         goto out;
1372                 }
1373         }
1374 #endif
1375
1376         if (__inet_inherit_port(sk, newsk) < 0) {
1377                 inet_csk_prepare_forced_close(newsk);
1378                 tcp_done(newsk);
1379                 goto out;
1380         }
1381         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1382                                        &found_dup_sk);
1383         if (*own_req) {
1384                 tcp_move_syn(newtp, req);
1385
1386                 /* Clone pktoptions received with SYN, if we own the req */
1387                 if (ireq->pktopts) {
1388                         newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1389                         consume_skb(ireq->pktopts);
1390                         ireq->pktopts = NULL;
1391                         if (newnp->pktoptions)
1392                                 tcp_v6_restore_cb(newnp->pktoptions);
1393                 }
1394         } else {
1395                 if (!req_unhash && found_dup_sk) {
1396                         /* This code path should only be executed in the
1397                          * syncookie case only
1398                          */
1399                         bh_unlock_sock(newsk);
1400                         sock_put(newsk);
1401                         newsk = NULL;
1402                 }
1403         }
1404
1405         return newsk;
1406
1407 out_overflow:
1408         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1409 out_nonewsk:
1410         dst_release(dst);
1411 out:
1412         tcp_listendrop(sk);
1413         return NULL;
1414 }
1415
1416 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1417                                                            u32));
1418 /* The socket must have it's spinlock held when we get
1419  * here, unless it is a TCP_LISTEN socket.
1420  *
1421  * We have a potential double-lock case here, so even when
1422  * doing backlog processing we use the BH locking scheme.
1423  * This is because we cannot sleep with the original spinlock
1424  * held.
1425  */
1426 INDIRECT_CALLABLE_SCOPE
1427 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1428 {
1429         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1430         struct sk_buff *opt_skb = NULL;
1431         enum skb_drop_reason reason;
1432         struct tcp_sock *tp;
1433
1434         /* Imagine: socket is IPv6. IPv4 packet arrives,
1435            goes to IPv4 receive handler and backlogged.
1436            From backlog it always goes here. Kerboom...
1437            Fortunately, tcp_rcv_established and rcv_established
1438            handle them correctly, but it is not case with
1439            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1440          */
1441
1442         if (skb->protocol == htons(ETH_P_IP))
1443                 return tcp_v4_do_rcv(sk, skb);
1444
1445         /*
1446          *      socket locking is here for SMP purposes as backlog rcv
1447          *      is currently called with bh processing disabled.
1448          */
1449
1450         /* Do Stevens' IPV6_PKTOPTIONS.
1451
1452            Yes, guys, it is the only place in our code, where we
1453            may make it not affecting IPv4.
1454            The rest of code is protocol independent,
1455            and I do not like idea to uglify IPv4.
1456
1457            Actually, all the idea behind IPV6_PKTOPTIONS
1458            looks not very well thought. For now we latch
1459            options, received in the last packet, enqueued
1460            by tcp. Feel free to propose better solution.
1461                                                --ANK (980728)
1462          */
1463         if (np->rxopt.all)
1464                 opt_skb = skb_clone_and_charge_r(skb, sk);
1465
1466         reason = SKB_DROP_REASON_NOT_SPECIFIED;
1467         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1468                 struct dst_entry *dst;
1469
1470                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1471                                                 lockdep_sock_is_held(sk));
1472
1473                 sock_rps_save_rxhash(sk, skb);
1474                 sk_mark_napi_id(sk, skb);
1475                 if (dst) {
1476                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1477                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1478                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1479                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1480                                 dst_release(dst);
1481                         }
1482                 }
1483
1484                 tcp_rcv_established(sk, skb);
1485                 if (opt_skb)
1486                         goto ipv6_pktoptions;
1487                 return 0;
1488         }
1489
1490         if (tcp_checksum_complete(skb))
1491                 goto csum_err;
1492
1493         if (sk->sk_state == TCP_LISTEN) {
1494                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1495
1496                 if (!nsk)
1497                         goto discard;
1498
1499                 if (nsk != sk) {
1500                         if (tcp_child_process(sk, nsk, skb))
1501                                 goto reset;
1502                         if (opt_skb)
1503                                 __kfree_skb(opt_skb);
1504                         return 0;
1505                 }
1506         } else
1507                 sock_rps_save_rxhash(sk, skb);
1508
1509         if (tcp_rcv_state_process(sk, skb))
1510                 goto reset;
1511         if (opt_skb)
1512                 goto ipv6_pktoptions;
1513         return 0;
1514
1515 reset:
1516         tcp_v6_send_reset(sk, skb);
1517 discard:
1518         if (opt_skb)
1519                 __kfree_skb(opt_skb);
1520         kfree_skb_reason(skb, reason);
1521         return 0;
1522 csum_err:
1523         reason = SKB_DROP_REASON_TCP_CSUM;
1524         trace_tcp_bad_csum(skb);
1525         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1526         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1527         goto discard;
1528
1529
1530 ipv6_pktoptions:
1531         /* Do you ask, what is it?
1532
1533            1. skb was enqueued by tcp.
1534            2. skb is added to tail of read queue, rather than out of order.
1535            3. socket is not in passive state.
1536            4. Finally, it really contains options, which user wants to receive.
1537          */
1538         tp = tcp_sk(sk);
1539         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1540             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1541                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1542                         np->mcast_oif = tcp_v6_iif(opt_skb);
1543                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1544                         WRITE_ONCE(np->mcast_hops,
1545                                    ipv6_hdr(opt_skb)->hop_limit);
1546                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1547                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1548                 if (inet6_test_bit(REPFLOW, sk))
1549                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1550                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1551                         tcp_v6_restore_cb(opt_skb);
1552                         opt_skb = xchg(&np->pktoptions, opt_skb);
1553                 } else {
1554                         __kfree_skb(opt_skb);
1555                         opt_skb = xchg(&np->pktoptions, NULL);
1556                 }
1557         }
1558
1559         consume_skb(opt_skb);
1560         return 0;
1561 }
1562
1563 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1564                            const struct tcphdr *th)
1565 {
1566         /* This is tricky: we move IP6CB at its correct location into
1567          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1568          * _decode_session6() uses IP6CB().
1569          * barrier() makes sure compiler won't play aliasing games.
1570          */
1571         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1572                 sizeof(struct inet6_skb_parm));
1573         barrier();
1574
1575         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1576         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1577                                     skb->len - th->doff*4);
1578         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1579         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1580         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1581         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1582         TCP_SKB_CB(skb)->sacked = 0;
1583         TCP_SKB_CB(skb)->has_rxtstamp =
1584                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1585 }
1586
1587 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1588 {
1589         enum skb_drop_reason drop_reason;
1590         int sdif = inet6_sdif(skb);
1591         int dif = inet6_iif(skb);
1592         const struct tcphdr *th;
1593         const struct ipv6hdr *hdr;
1594         bool refcounted;
1595         struct sock *sk;
1596         int ret;
1597         struct net *net = dev_net(skb->dev);
1598
1599         drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1600         if (skb->pkt_type != PACKET_HOST)
1601                 goto discard_it;
1602
1603         /*
1604          *      Count it even if it's bad.
1605          */
1606         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1607
1608         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1609                 goto discard_it;
1610
1611         th = (const struct tcphdr *)skb->data;
1612
1613         if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1614                 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1615                 goto bad_packet;
1616         }
1617         if (!pskb_may_pull(skb, th->doff*4))
1618                 goto discard_it;
1619
1620         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1621                 goto csum_error;
1622
1623         th = (const struct tcphdr *)skb->data;
1624         hdr = ipv6_hdr(skb);
1625
1626 lookup:
1627         sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1628                                 th->source, th->dest, inet6_iif(skb), sdif,
1629                                 &refcounted);
1630         if (!sk)
1631                 goto no_tcp_socket;
1632
1633 process:
1634         if (sk->sk_state == TCP_TIME_WAIT)
1635                 goto do_time_wait;
1636
1637         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1638                 struct request_sock *req = inet_reqsk(sk);
1639                 bool req_stolen = false;
1640                 struct sock *nsk;
1641
1642                 sk = req->rsk_listener;
1643                 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1644                         drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1645                 else
1646                         drop_reason = tcp_inbound_md5_hash(sk, skb,
1647                                                            &hdr->saddr, &hdr->daddr,
1648                                                            AF_INET6, dif, sdif);
1649                 if (drop_reason) {
1650                         sk_drops_add(sk, skb);
1651                         reqsk_put(req);
1652                         goto discard_it;
1653                 }
1654                 if (tcp_checksum_complete(skb)) {
1655                         reqsk_put(req);
1656                         goto csum_error;
1657                 }
1658                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1659                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1660                         if (!nsk) {
1661                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1662                                 goto lookup;
1663                         }
1664                         sk = nsk;
1665                         /* reuseport_migrate_sock() has already held one sk_refcnt
1666                          * before returning.
1667                          */
1668                 } else {
1669                         sock_hold(sk);
1670                 }
1671                 refcounted = true;
1672                 nsk = NULL;
1673                 if (!tcp_filter(sk, skb)) {
1674                         th = (const struct tcphdr *)skb->data;
1675                         hdr = ipv6_hdr(skb);
1676                         tcp_v6_fill_cb(skb, hdr, th);
1677                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1678                 } else {
1679                         drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1680                 }
1681                 if (!nsk) {
1682                         reqsk_put(req);
1683                         if (req_stolen) {
1684                                 /* Another cpu got exclusive access to req
1685                                  * and created a full blown socket.
1686                                  * Try to feed this packet to this socket
1687                                  * instead of discarding it.
1688                                  */
1689                                 tcp_v6_restore_cb(skb);
1690                                 sock_put(sk);
1691                                 goto lookup;
1692                         }
1693                         goto discard_and_relse;
1694                 }
1695                 nf_reset_ct(skb);
1696                 if (nsk == sk) {
1697                         reqsk_put(req);
1698                         tcp_v6_restore_cb(skb);
1699                 } else if (tcp_child_process(sk, nsk, skb)) {
1700                         tcp_v6_send_reset(nsk, skb);
1701                         goto discard_and_relse;
1702                 } else {
1703                         sock_put(sk);
1704                         return 0;
1705                 }
1706         }
1707
1708         if (static_branch_unlikely(&ip6_min_hopcount)) {
1709                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1710                 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1711                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1712                         drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1713                         goto discard_and_relse;
1714                 }
1715         }
1716
1717         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1718                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1719                 goto discard_and_relse;
1720         }
1721
1722         drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1723                                            AF_INET6, dif, sdif);
1724         if (drop_reason)
1725                 goto discard_and_relse;
1726
1727         nf_reset_ct(skb);
1728
1729         if (tcp_filter(sk, skb)) {
1730                 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1731                 goto discard_and_relse;
1732         }
1733         th = (const struct tcphdr *)skb->data;
1734         hdr = ipv6_hdr(skb);
1735         tcp_v6_fill_cb(skb, hdr, th);
1736
1737         skb->dev = NULL;
1738
1739         if (sk->sk_state == TCP_LISTEN) {
1740                 ret = tcp_v6_do_rcv(sk, skb);
1741                 goto put_and_return;
1742         }
1743
1744         sk_incoming_cpu_update(sk);
1745
1746         bh_lock_sock_nested(sk);
1747         tcp_segs_in(tcp_sk(sk), skb);
1748         ret = 0;
1749         if (!sock_owned_by_user(sk)) {
1750                 ret = tcp_v6_do_rcv(sk, skb);
1751         } else {
1752                 if (tcp_add_backlog(sk, skb, &drop_reason))
1753                         goto discard_and_relse;
1754         }
1755         bh_unlock_sock(sk);
1756 put_and_return:
1757         if (refcounted)
1758                 sock_put(sk);
1759         return ret ? -1 : 0;
1760
1761 no_tcp_socket:
1762         drop_reason = SKB_DROP_REASON_NO_SOCKET;
1763         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1764                 goto discard_it;
1765
1766         tcp_v6_fill_cb(skb, hdr, th);
1767
1768         if (tcp_checksum_complete(skb)) {
1769 csum_error:
1770                 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1771                 trace_tcp_bad_csum(skb);
1772                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1773 bad_packet:
1774                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1775         } else {
1776                 tcp_v6_send_reset(NULL, skb);
1777         }
1778
1779 discard_it:
1780         SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1781         kfree_skb_reason(skb, drop_reason);
1782         return 0;
1783
1784 discard_and_relse:
1785         sk_drops_add(sk, skb);
1786         if (refcounted)
1787                 sock_put(sk);
1788         goto discard_it;
1789
1790 do_time_wait:
1791         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1792                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1793                 inet_twsk_put(inet_twsk(sk));
1794                 goto discard_it;
1795         }
1796
1797         tcp_v6_fill_cb(skb, hdr, th);
1798
1799         if (tcp_checksum_complete(skb)) {
1800                 inet_twsk_put(inet_twsk(sk));
1801                 goto csum_error;
1802         }
1803
1804         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1805         case TCP_TW_SYN:
1806         {
1807                 struct sock *sk2;
1808
1809                 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1810                                             skb, __tcp_hdrlen(th),
1811                                             &ipv6_hdr(skb)->saddr, th->source,
1812                                             &ipv6_hdr(skb)->daddr,
1813                                             ntohs(th->dest),
1814                                             tcp_v6_iif_l3_slave(skb),
1815                                             sdif);
1816                 if (sk2) {
1817                         struct inet_timewait_sock *tw = inet_twsk(sk);
1818                         inet_twsk_deschedule_put(tw);
1819                         sk = sk2;
1820                         tcp_v6_restore_cb(skb);
1821                         refcounted = false;
1822                         goto process;
1823                 }
1824         }
1825                 /* to ACK */
1826                 fallthrough;
1827         case TCP_TW_ACK:
1828                 tcp_v6_timewait_ack(sk, skb);
1829                 break;
1830         case TCP_TW_RST:
1831                 tcp_v6_send_reset(sk, skb);
1832                 inet_twsk_deschedule_put(inet_twsk(sk));
1833                 goto discard_it;
1834         case TCP_TW_SUCCESS:
1835                 ;
1836         }
1837         goto discard_it;
1838 }
1839
1840 void tcp_v6_early_demux(struct sk_buff *skb)
1841 {
1842         struct net *net = dev_net(skb->dev);
1843         const struct ipv6hdr *hdr;
1844         const struct tcphdr *th;
1845         struct sock *sk;
1846
1847         if (skb->pkt_type != PACKET_HOST)
1848                 return;
1849
1850         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1851                 return;
1852
1853         hdr = ipv6_hdr(skb);
1854         th = tcp_hdr(skb);
1855
1856         if (th->doff < sizeof(struct tcphdr) / 4)
1857                 return;
1858
1859         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1860         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1861                                         &hdr->saddr, th->source,
1862                                         &hdr->daddr, ntohs(th->dest),
1863                                         inet6_iif(skb), inet6_sdif(skb));
1864         if (sk) {
1865                 skb->sk = sk;
1866                 skb->destructor = sock_edemux;
1867                 if (sk_fullsock(sk)) {
1868                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1869
1870                         if (dst)
1871                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1872                         if (dst &&
1873                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1874                                 skb_dst_set_noref(skb, dst);
1875                 }
1876         }
1877 }
1878
1879 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1880         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1881         .twsk_unique    = tcp_twsk_unique,
1882         .twsk_destructor = tcp_twsk_destructor,
1883 };
1884
1885 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1886 {
1887         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1888 }
1889
1890 const struct inet_connection_sock_af_ops ipv6_specific = {
1891         .queue_xmit        = inet6_csk_xmit,
1892         .send_check        = tcp_v6_send_check,
1893         .rebuild_header    = inet6_sk_rebuild_header,
1894         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1895         .conn_request      = tcp_v6_conn_request,
1896         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1897         .net_header_len    = sizeof(struct ipv6hdr),
1898         .setsockopt        = ipv6_setsockopt,
1899         .getsockopt        = ipv6_getsockopt,
1900         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1901         .sockaddr_len      = sizeof(struct sockaddr_in6),
1902         .mtu_reduced       = tcp_v6_mtu_reduced,
1903 };
1904
1905 #ifdef CONFIG_TCP_MD5SIG
1906 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1907         .md5_lookup     =       tcp_v6_md5_lookup,
1908         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1909         .md5_parse      =       tcp_v6_parse_md5_keys,
1910 };
1911 #endif
1912
1913 /*
1914  *      TCP over IPv4 via INET6 API
1915  */
1916 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1917         .queue_xmit        = ip_queue_xmit,
1918         .send_check        = tcp_v4_send_check,
1919         .rebuild_header    = inet_sk_rebuild_header,
1920         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1921         .conn_request      = tcp_v6_conn_request,
1922         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1923         .net_header_len    = sizeof(struct iphdr),
1924         .setsockopt        = ipv6_setsockopt,
1925         .getsockopt        = ipv6_getsockopt,
1926         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1927         .sockaddr_len      = sizeof(struct sockaddr_in6),
1928         .mtu_reduced       = tcp_v4_mtu_reduced,
1929 };
1930
1931 #ifdef CONFIG_TCP_MD5SIG
1932 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1933         .md5_lookup     =       tcp_v4_md5_lookup,
1934         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1935         .md5_parse      =       tcp_v6_parse_md5_keys,
1936 };
1937 #endif
1938
1939 /* NOTE: A lot of things set to zero explicitly by call to
1940  *       sk_alloc() so need not be done here.
1941  */
1942 static int tcp_v6_init_sock(struct sock *sk)
1943 {
1944         struct inet_connection_sock *icsk = inet_csk(sk);
1945
1946         tcp_init_sock(sk);
1947
1948         icsk->icsk_af_ops = &ipv6_specific;
1949
1950 #ifdef CONFIG_TCP_MD5SIG
1951         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1952 #endif
1953
1954         return 0;
1955 }
1956
1957 #ifdef CONFIG_PROC_FS
1958 /* Proc filesystem TCPv6 sock list dumping. */
1959 static void get_openreq6(struct seq_file *seq,
1960                          const struct request_sock *req, int i)
1961 {
1962         long ttd = req->rsk_timer.expires - jiffies;
1963         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1964         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1965
1966         if (ttd < 0)
1967                 ttd = 0;
1968
1969         seq_printf(seq,
1970                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1971                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1972                    i,
1973                    src->s6_addr32[0], src->s6_addr32[1],
1974                    src->s6_addr32[2], src->s6_addr32[3],
1975                    inet_rsk(req)->ir_num,
1976                    dest->s6_addr32[0], dest->s6_addr32[1],
1977                    dest->s6_addr32[2], dest->s6_addr32[3],
1978                    ntohs(inet_rsk(req)->ir_rmt_port),
1979                    TCP_SYN_RECV,
1980                    0, 0, /* could print option size, but that is af dependent. */
1981                    1,   /* timers active (only the expire timer) */
1982                    jiffies_to_clock_t(ttd),
1983                    req->num_timeout,
1984                    from_kuid_munged(seq_user_ns(seq),
1985                                     sock_i_uid(req->rsk_listener)),
1986                    0,  /* non standard timer */
1987                    0, /* open_requests have no inode */
1988                    0, req);
1989 }
1990
1991 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1992 {
1993         const struct in6_addr *dest, *src;
1994         __u16 destp, srcp;
1995         int timer_active;
1996         unsigned long timer_expires;
1997         const struct inet_sock *inet = inet_sk(sp);
1998         const struct tcp_sock *tp = tcp_sk(sp);
1999         const struct inet_connection_sock *icsk = inet_csk(sp);
2000         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2001         int rx_queue;
2002         int state;
2003
2004         dest  = &sp->sk_v6_daddr;
2005         src   = &sp->sk_v6_rcv_saddr;
2006         destp = ntohs(inet->inet_dport);
2007         srcp  = ntohs(inet->inet_sport);
2008
2009         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2010             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2011             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2012                 timer_active    = 1;
2013                 timer_expires   = icsk->icsk_timeout;
2014         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2015                 timer_active    = 4;
2016                 timer_expires   = icsk->icsk_timeout;
2017         } else if (timer_pending(&sp->sk_timer)) {
2018                 timer_active    = 2;
2019                 timer_expires   = sp->sk_timer.expires;
2020         } else {
2021                 timer_active    = 0;
2022                 timer_expires = jiffies;
2023         }
2024
2025         state = inet_sk_state_load(sp);
2026         if (state == TCP_LISTEN)
2027                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2028         else
2029                 /* Because we don't lock the socket,
2030                  * we might find a transient negative value.
2031                  */
2032                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2033                                       READ_ONCE(tp->copied_seq), 0);
2034
2035         seq_printf(seq,
2036                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2037                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2038                    i,
2039                    src->s6_addr32[0], src->s6_addr32[1],
2040                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2041                    dest->s6_addr32[0], dest->s6_addr32[1],
2042                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2043                    state,
2044                    READ_ONCE(tp->write_seq) - tp->snd_una,
2045                    rx_queue,
2046                    timer_active,
2047                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2048                    icsk->icsk_retransmits,
2049                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2050                    icsk->icsk_probes_out,
2051                    sock_i_ino(sp),
2052                    refcount_read(&sp->sk_refcnt), sp,
2053                    jiffies_to_clock_t(icsk->icsk_rto),
2054                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2055                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2056                    tcp_snd_cwnd(tp),
2057                    state == TCP_LISTEN ?
2058                         fastopenq->max_qlen :
2059                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2060                    );
2061 }
2062
2063 static void get_timewait6_sock(struct seq_file *seq,
2064                                struct inet_timewait_sock *tw, int i)
2065 {
2066         long delta = tw->tw_timer.expires - jiffies;
2067         const struct in6_addr *dest, *src;
2068         __u16 destp, srcp;
2069
2070         dest = &tw->tw_v6_daddr;
2071         src  = &tw->tw_v6_rcv_saddr;
2072         destp = ntohs(tw->tw_dport);
2073         srcp  = ntohs(tw->tw_sport);
2074
2075         seq_printf(seq,
2076                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2077                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2078                    i,
2079                    src->s6_addr32[0], src->s6_addr32[1],
2080                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2081                    dest->s6_addr32[0], dest->s6_addr32[1],
2082                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2083                    tw->tw_substate, 0, 0,
2084                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2085                    refcount_read(&tw->tw_refcnt), tw);
2086 }
2087
2088 static int tcp6_seq_show(struct seq_file *seq, void *v)
2089 {
2090         struct tcp_iter_state *st;
2091         struct sock *sk = v;
2092
2093         if (v == SEQ_START_TOKEN) {
2094                 seq_puts(seq,
2095                          "  sl  "
2096                          "local_address                         "
2097                          "remote_address                        "
2098                          "st tx_queue rx_queue tr tm->when retrnsmt"
2099                          "   uid  timeout inode\n");
2100                 goto out;
2101         }
2102         st = seq->private;
2103
2104         if (sk->sk_state == TCP_TIME_WAIT)
2105                 get_timewait6_sock(seq, v, st->num);
2106         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2107                 get_openreq6(seq, v, st->num);
2108         else
2109                 get_tcp6_sock(seq, v, st->num);
2110 out:
2111         return 0;
2112 }
2113
2114 static const struct seq_operations tcp6_seq_ops = {
2115         .show           = tcp6_seq_show,
2116         .start          = tcp_seq_start,
2117         .next           = tcp_seq_next,
2118         .stop           = tcp_seq_stop,
2119 };
2120
2121 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2122         .family         = AF_INET6,
2123 };
2124
2125 int __net_init tcp6_proc_init(struct net *net)
2126 {
2127         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2128                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2129                 return -ENOMEM;
2130         return 0;
2131 }
2132
2133 void tcp6_proc_exit(struct net *net)
2134 {
2135         remove_proc_entry("tcp6", net->proc_net);
2136 }
2137 #endif
2138
2139 struct proto tcpv6_prot = {
2140         .name                   = "TCPv6",
2141         .owner                  = THIS_MODULE,
2142         .close                  = tcp_close,
2143         .pre_connect            = tcp_v6_pre_connect,
2144         .connect                = tcp_v6_connect,
2145         .disconnect             = tcp_disconnect,
2146         .accept                 = inet_csk_accept,
2147         .ioctl                  = tcp_ioctl,
2148         .init                   = tcp_v6_init_sock,
2149         .destroy                = tcp_v4_destroy_sock,
2150         .shutdown               = tcp_shutdown,
2151         .setsockopt             = tcp_setsockopt,
2152         .getsockopt             = tcp_getsockopt,
2153         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2154         .keepalive              = tcp_set_keepalive,
2155         .recvmsg                = tcp_recvmsg,
2156         .sendmsg                = tcp_sendmsg,
2157         .splice_eof             = tcp_splice_eof,
2158         .backlog_rcv            = tcp_v6_do_rcv,
2159         .release_cb             = tcp_release_cb,
2160         .hash                   = inet6_hash,
2161         .unhash                 = inet_unhash,
2162         .get_port               = inet_csk_get_port,
2163         .put_port               = inet_put_port,
2164 #ifdef CONFIG_BPF_SYSCALL
2165         .psock_update_sk_prot   = tcp_bpf_update_proto,
2166 #endif
2167         .enter_memory_pressure  = tcp_enter_memory_pressure,
2168         .leave_memory_pressure  = tcp_leave_memory_pressure,
2169         .stream_memory_free     = tcp_stream_memory_free,
2170         .sockets_allocated      = &tcp_sockets_allocated,
2171
2172         .memory_allocated       = &tcp_memory_allocated,
2173         .per_cpu_fw_alloc       = &tcp_memory_per_cpu_fw_alloc,
2174
2175         .memory_pressure        = &tcp_memory_pressure,
2176         .orphan_count           = &tcp_orphan_count,
2177         .sysctl_mem             = sysctl_tcp_mem,
2178         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2179         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2180         .max_header             = MAX_TCP_HEADER,
2181         .obj_size               = sizeof(struct tcp6_sock),
2182         .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2183         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2184         .twsk_prot              = &tcp6_timewait_sock_ops,
2185         .rsk_prot               = &tcp6_request_sock_ops,
2186         .h.hashinfo             = NULL,
2187         .no_autobind            = true,
2188         .diag_destroy           = tcp_abort,
2189 };
2190 EXPORT_SYMBOL_GPL(tcpv6_prot);
2191
2192 static const struct inet6_protocol tcpv6_protocol = {
2193         .handler        =       tcp_v6_rcv,
2194         .err_handler    =       tcp_v6_err,
2195         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2196 };
2197
2198 static struct inet_protosw tcpv6_protosw = {
2199         .type           =       SOCK_STREAM,
2200         .protocol       =       IPPROTO_TCP,
2201         .prot           =       &tcpv6_prot,
2202         .ops            =       &inet6_stream_ops,
2203         .flags          =       INET_PROTOSW_PERMANENT |
2204                                 INET_PROTOSW_ICSK,
2205 };
2206
2207 static int __net_init tcpv6_net_init(struct net *net)
2208 {
2209         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2210                                     SOCK_RAW, IPPROTO_TCP, net);
2211 }
2212
2213 static void __net_exit tcpv6_net_exit(struct net *net)
2214 {
2215         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2216 }
2217
2218 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2219 {
2220         tcp_twsk_purge(net_exit_list, AF_INET6);
2221 }
2222
2223 static struct pernet_operations tcpv6_net_ops = {
2224         .init       = tcpv6_net_init,
2225         .exit       = tcpv6_net_exit,
2226         .exit_batch = tcpv6_net_exit_batch,
2227 };
2228
2229 int __init tcpv6_init(void)
2230 {
2231         int ret;
2232
2233         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2234         if (ret)
2235                 goto out;
2236
2237         /* register inet6 protocol */
2238         ret = inet6_register_protosw(&tcpv6_protosw);
2239         if (ret)
2240                 goto out_tcpv6_protocol;
2241
2242         ret = register_pernet_subsys(&tcpv6_net_ops);
2243         if (ret)
2244                 goto out_tcpv6_protosw;
2245
2246         ret = mptcpv6_init();
2247         if (ret)
2248                 goto out_tcpv6_pernet_subsys;
2249
2250 out:
2251         return ret;
2252
2253 out_tcpv6_pernet_subsys:
2254         unregister_pernet_subsys(&tcpv6_net_ops);
2255 out_tcpv6_protosw:
2256         inet6_unregister_protosw(&tcpv6_protosw);
2257 out_tcpv6_protocol:
2258         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2259         goto out;
2260 }
2261
2262 void tcpv6_exit(void)
2263 {
2264         unregister_pernet_subsys(&tcpv6_net_ops);
2265         inet6_unregister_protosw(&tcpv6_protosw);
2266         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2267 }