Merge branch 'rework/misc-cleanups' into for-linus
[linux-2.6-block.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
97                                               struct tcp6_sock, tcp)->inet6)
98
99 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
100 {
101         struct dst_entry *dst = skb_dst(skb);
102
103         if (dst && dst_hold_safe(dst)) {
104                 const struct rt6_info *rt = (const struct rt6_info *)dst;
105
106                 rcu_assign_pointer(sk->sk_rx_dst, dst);
107                 sk->sk_rx_dst_ifindex = skb->skb_iif;
108                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
109         }
110 }
111
112 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
113 {
114         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
115                                 ipv6_hdr(skb)->saddr.s6_addr32,
116                                 tcp_hdr(skb)->dest,
117                                 tcp_hdr(skb)->source);
118 }
119
120 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
121 {
122         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
123                                    ipv6_hdr(skb)->saddr.s6_addr32);
124 }
125
126 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
127                               int addr_len)
128 {
129         /* This check is replicated from tcp_v6_connect() and intended to
130          * prevent BPF program called below from accessing bytes that are out
131          * of the bound specified by user in addr_len.
132          */
133         if (addr_len < SIN6_LEN_RFC2133)
134                 return -EINVAL;
135
136         sock_owned_by_me(sk);
137
138         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
139 }
140
141 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
142                           int addr_len)
143 {
144         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
145         struct inet_connection_sock *icsk = inet_csk(sk);
146         struct in6_addr *saddr = NULL, *final_p, final;
147         struct inet_timewait_death_row *tcp_death_row;
148         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
149         struct inet_sock *inet = inet_sk(sk);
150         struct tcp_sock *tp = tcp_sk(sk);
151         struct net *net = sock_net(sk);
152         struct ipv6_txoptions *opt;
153         struct dst_entry *dst;
154         struct flowi6 fl6;
155         int addr_type;
156         int err;
157
158         if (addr_len < SIN6_LEN_RFC2133)
159                 return -EINVAL;
160
161         if (usin->sin6_family != AF_INET6)
162                 return -EAFNOSUPPORT;
163
164         memset(&fl6, 0, sizeof(fl6));
165
166         if (np->sndflow) {
167                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
168                 IP6_ECN_flow_init(fl6.flowlabel);
169                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
170                         struct ip6_flowlabel *flowlabel;
171                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
172                         if (IS_ERR(flowlabel))
173                                 return -EINVAL;
174                         fl6_sock_release(flowlabel);
175                 }
176         }
177
178         /*
179          *      connect() to INADDR_ANY means loopback (BSD'ism).
180          */
181
182         if (ipv6_addr_any(&usin->sin6_addr)) {
183                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
184                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
185                                                &usin->sin6_addr);
186                 else
187                         usin->sin6_addr = in6addr_loopback;
188         }
189
190         addr_type = ipv6_addr_type(&usin->sin6_addr);
191
192         if (addr_type & IPV6_ADDR_MULTICAST)
193                 return -ENETUNREACH;
194
195         if (addr_type&IPV6_ADDR_LINKLOCAL) {
196                 if (addr_len >= sizeof(struct sockaddr_in6) &&
197                     usin->sin6_scope_id) {
198                         /* If interface is set while binding, indices
199                          * must coincide.
200                          */
201                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
202                                 return -EINVAL;
203
204                         sk->sk_bound_dev_if = usin->sin6_scope_id;
205                 }
206
207                 /* Connect to link-local address requires an interface */
208                 if (!sk->sk_bound_dev_if)
209                         return -EINVAL;
210         }
211
212         if (tp->rx_opt.ts_recent_stamp &&
213             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
214                 tp->rx_opt.ts_recent = 0;
215                 tp->rx_opt.ts_recent_stamp = 0;
216                 WRITE_ONCE(tp->write_seq, 0);
217         }
218
219         sk->sk_v6_daddr = usin->sin6_addr;
220         np->flow_label = fl6.flowlabel;
221
222         /*
223          *      TCP over IPv4
224          */
225
226         if (addr_type & IPV6_ADDR_MAPPED) {
227                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
228                 struct sockaddr_in sin;
229
230                 if (ipv6_only_sock(sk))
231                         return -ENETUNREACH;
232
233                 sin.sin_family = AF_INET;
234                 sin.sin_port = usin->sin6_port;
235                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
236
237                 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
238                 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
239                 if (sk_is_mptcp(sk))
240                         mptcpv6_handle_mapped(sk, true);
241                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
242 #ifdef CONFIG_TCP_MD5SIG
243                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
244 #endif
245
246                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
247
248                 if (err) {
249                         icsk->icsk_ext_hdr_len = exthdrlen;
250                         /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
251                         WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
252                         if (sk_is_mptcp(sk))
253                                 mptcpv6_handle_mapped(sk, false);
254                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
255 #ifdef CONFIG_TCP_MD5SIG
256                         tp->af_specific = &tcp_sock_ipv6_specific;
257 #endif
258                         goto failure;
259                 }
260                 np->saddr = sk->sk_v6_rcv_saddr;
261
262                 return err;
263         }
264
265         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
266                 saddr = &sk->sk_v6_rcv_saddr;
267
268         fl6.flowi6_proto = IPPROTO_TCP;
269         fl6.daddr = sk->sk_v6_daddr;
270         fl6.saddr = saddr ? *saddr : np->saddr;
271         fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
272         fl6.flowi6_oif = sk->sk_bound_dev_if;
273         fl6.flowi6_mark = sk->sk_mark;
274         fl6.fl6_dport = usin->sin6_port;
275         fl6.fl6_sport = inet->inet_sport;
276         fl6.flowi6_uid = sk->sk_uid;
277
278         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279         final_p = fl6_update_dst(&fl6, opt, &final);
280
281         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282
283         dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
284         if (IS_ERR(dst)) {
285                 err = PTR_ERR(dst);
286                 goto failure;
287         }
288
289         tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
290
291         if (!saddr) {
292                 saddr = &fl6.saddr;
293
294                 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
295                 if (err)
296                         goto failure;
297         }
298
299         /* set the source address */
300         np->saddr = *saddr;
301         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
302
303         sk->sk_gso_type = SKB_GSO_TCPV6;
304         ip6_dst_store(sk, dst, NULL, NULL);
305
306         icsk->icsk_ext_hdr_len = 0;
307         if (opt)
308                 icsk->icsk_ext_hdr_len = opt->opt_flen +
309                                          opt->opt_nflen;
310
311         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
312
313         inet->inet_dport = usin->sin6_port;
314
315         tcp_set_state(sk, TCP_SYN_SENT);
316         err = inet6_hash_connect(tcp_death_row, sk);
317         if (err)
318                 goto late_failure;
319
320         sk_set_txhash(sk);
321
322         if (likely(!tp->repair)) {
323                 if (!tp->write_seq)
324                         WRITE_ONCE(tp->write_seq,
325                                    secure_tcpv6_seq(np->saddr.s6_addr32,
326                                                     sk->sk_v6_daddr.s6_addr32,
327                                                     inet->inet_sport,
328                                                     inet->inet_dport));
329                 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
330                                                    sk->sk_v6_daddr.s6_addr32);
331         }
332
333         if (tcp_fastopen_defer_connect(sk, &err))
334                 return err;
335         if (err)
336                 goto late_failure;
337
338         err = tcp_connect(sk);
339         if (err)
340                 goto late_failure;
341
342         return 0;
343
344 late_failure:
345         tcp_set_state(sk, TCP_CLOSE);
346         inet_bhash2_reset_saddr(sk);
347 failure:
348         inet->inet_dport = 0;
349         sk->sk_route_caps = 0;
350         return err;
351 }
352
353 static void tcp_v6_mtu_reduced(struct sock *sk)
354 {
355         struct dst_entry *dst;
356         u32 mtu;
357
358         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
359                 return;
360
361         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
362
363         /* Drop requests trying to increase our current mss.
364          * Check done in __ip6_rt_update_pmtu() is too late.
365          */
366         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
367                 return;
368
369         dst = inet6_csk_update_pmtu(sk, mtu);
370         if (!dst)
371                 return;
372
373         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
374                 tcp_sync_mss(sk, dst_mtu(dst));
375                 tcp_simple_retransmit(sk);
376         }
377 }
378
379 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
380                 u8 type, u8 code, int offset, __be32 info)
381 {
382         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
383         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
384         struct net *net = dev_net(skb->dev);
385         struct request_sock *fastopen;
386         struct ipv6_pinfo *np;
387         struct tcp_sock *tp;
388         __u32 seq, snd_una;
389         struct sock *sk;
390         bool fatal;
391         int err;
392
393         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
394                                         &hdr->daddr, th->dest,
395                                         &hdr->saddr, ntohs(th->source),
396                                         skb->dev->ifindex, inet6_sdif(skb));
397
398         if (!sk) {
399                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
400                                   ICMP6_MIB_INERRORS);
401                 return -ENOENT;
402         }
403
404         if (sk->sk_state == TCP_TIME_WAIT) {
405                 inet_twsk_put(inet_twsk(sk));
406                 return 0;
407         }
408         seq = ntohl(th->seq);
409         fatal = icmpv6_err_convert(type, code, &err);
410         if (sk->sk_state == TCP_NEW_SYN_RECV) {
411                 tcp_req_err(sk, seq, fatal);
412                 return 0;
413         }
414
415         bh_lock_sock(sk);
416         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
417                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
418
419         if (sk->sk_state == TCP_CLOSE)
420                 goto out;
421
422         if (static_branch_unlikely(&ip6_min_hopcount)) {
423                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
424                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
425                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
426                         goto out;
427                 }
428         }
429
430         tp = tcp_sk(sk);
431         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
432         fastopen = rcu_dereference(tp->fastopen_rsk);
433         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
434         if (sk->sk_state != TCP_LISTEN &&
435             !between(seq, snd_una, tp->snd_nxt)) {
436                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
437                 goto out;
438         }
439
440         np = tcp_inet6_sk(sk);
441
442         if (type == NDISC_REDIRECT) {
443                 if (!sock_owned_by_user(sk)) {
444                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
445
446                         if (dst)
447                                 dst->ops->redirect(dst, sk, skb);
448                 }
449                 goto out;
450         }
451
452         if (type == ICMPV6_PKT_TOOBIG) {
453                 u32 mtu = ntohl(info);
454
455                 /* We are not interested in TCP_LISTEN and open_requests
456                  * (SYN-ACKs send out by Linux are always <576bytes so
457                  * they should go through unfragmented).
458                  */
459                 if (sk->sk_state == TCP_LISTEN)
460                         goto out;
461
462                 if (!ip6_sk_accept_pmtu(sk))
463                         goto out;
464
465                 if (mtu < IPV6_MIN_MTU)
466                         goto out;
467
468                 WRITE_ONCE(tp->mtu_info, mtu);
469
470                 if (!sock_owned_by_user(sk))
471                         tcp_v6_mtu_reduced(sk);
472                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
473                                            &sk->sk_tsq_flags))
474                         sock_hold(sk);
475                 goto out;
476         }
477
478
479         /* Might be for an request_sock */
480         switch (sk->sk_state) {
481         case TCP_SYN_SENT:
482         case TCP_SYN_RECV:
483                 /* Only in fast or simultaneous open. If a fast open socket is
484                  * already accepted it is treated as a connected one below.
485                  */
486                 if (fastopen && !fastopen->sk)
487                         break;
488
489                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
490
491                 if (!sock_owned_by_user(sk)) {
492                         WRITE_ONCE(sk->sk_err, err);
493                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
494
495                         tcp_done(sk);
496                 } else {
497                         WRITE_ONCE(sk->sk_err_soft, err);
498                 }
499                 goto out;
500         case TCP_LISTEN:
501                 break;
502         default:
503                 /* check if this ICMP message allows revert of backoff.
504                  * (see RFC 6069)
505                  */
506                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
507                     code == ICMPV6_NOROUTE)
508                         tcp_ld_RTO_revert(sk, seq);
509         }
510
511         if (!sock_owned_by_user(sk) && np->recverr) {
512                 WRITE_ONCE(sk->sk_err, err);
513                 sk_error_report(sk);
514         } else {
515                 WRITE_ONCE(sk->sk_err_soft, err);
516         }
517 out:
518         bh_unlock_sock(sk);
519         sock_put(sk);
520         return 0;
521 }
522
523
524 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
525                               struct flowi *fl,
526                               struct request_sock *req,
527                               struct tcp_fastopen_cookie *foc,
528                               enum tcp_synack_type synack_type,
529                               struct sk_buff *syn_skb)
530 {
531         struct inet_request_sock *ireq = inet_rsk(req);
532         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
533         struct ipv6_txoptions *opt;
534         struct flowi6 *fl6 = &fl->u.ip6;
535         struct sk_buff *skb;
536         int err = -ENOMEM;
537         u8 tclass;
538
539         /* First, grab a route. */
540         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
541                                                IPPROTO_TCP)) == NULL)
542                 goto done;
543
544         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
545
546         if (skb) {
547                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
548                                     &ireq->ir_v6_rmt_addr);
549
550                 fl6->daddr = ireq->ir_v6_rmt_addr;
551                 if (np->repflow && ireq->pktopts)
552                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
553
554                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
555                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
556                                 (np->tclass & INET_ECN_MASK) :
557                                 np->tclass;
558
559                 if (!INET_ECN_is_capable(tclass) &&
560                     tcp_bpf_ca_needs_ecn((struct sock *)req))
561                         tclass |= INET_ECN_ECT_0;
562
563                 rcu_read_lock();
564                 opt = ireq->ipv6_opt;
565                 if (!opt)
566                         opt = rcu_dereference(np->opt);
567                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
568                                opt, tclass, sk->sk_priority);
569                 rcu_read_unlock();
570                 err = net_xmit_eval(err);
571         }
572
573 done:
574         return err;
575 }
576
577
578 static void tcp_v6_reqsk_destructor(struct request_sock *req)
579 {
580         kfree(inet_rsk(req)->ipv6_opt);
581         consume_skb(inet_rsk(req)->pktopts);
582 }
583
584 #ifdef CONFIG_TCP_MD5SIG
585 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
586                                                    const struct in6_addr *addr,
587                                                    int l3index)
588 {
589         return tcp_md5_do_lookup(sk, l3index,
590                                  (union tcp_md5_addr *)addr, AF_INET6);
591 }
592
593 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
594                                                 const struct sock *addr_sk)
595 {
596         int l3index;
597
598         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
599                                                  addr_sk->sk_bound_dev_if);
600         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
601                                     l3index);
602 }
603
604 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
605                                  sockptr_t optval, int optlen)
606 {
607         struct tcp_md5sig cmd;
608         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
609         int l3index = 0;
610         u8 prefixlen;
611         u8 flags;
612
613         if (optlen < sizeof(cmd))
614                 return -EINVAL;
615
616         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
617                 return -EFAULT;
618
619         if (sin6->sin6_family != AF_INET6)
620                 return -EINVAL;
621
622         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
623
624         if (optname == TCP_MD5SIG_EXT &&
625             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
626                 prefixlen = cmd.tcpm_prefixlen;
627                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
628                                         prefixlen > 32))
629                         return -EINVAL;
630         } else {
631                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
632         }
633
634         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
635             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
636                 struct net_device *dev;
637
638                 rcu_read_lock();
639                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
640                 if (dev && netif_is_l3_master(dev))
641                         l3index = dev->ifindex;
642                 rcu_read_unlock();
643
644                 /* ok to reference set/not set outside of rcu;
645                  * right now device MUST be an L3 master
646                  */
647                 if (!dev || !l3index)
648                         return -EINVAL;
649         }
650
651         if (!cmd.tcpm_keylen) {
652                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
653                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
654                                               AF_INET, prefixlen,
655                                               l3index, flags);
656                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
657                                       AF_INET6, prefixlen, l3index, flags);
658         }
659
660         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
661                 return -EINVAL;
662
663         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
664                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
665                                       AF_INET, prefixlen, l3index, flags,
666                                       cmd.tcpm_key, cmd.tcpm_keylen);
667
668         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
669                               AF_INET6, prefixlen, l3index, flags,
670                               cmd.tcpm_key, cmd.tcpm_keylen);
671 }
672
673 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
674                                    const struct in6_addr *daddr,
675                                    const struct in6_addr *saddr,
676                                    const struct tcphdr *th, int nbytes)
677 {
678         struct tcp6_pseudohdr *bp;
679         struct scatterlist sg;
680         struct tcphdr *_th;
681
682         bp = hp->scratch;
683         /* 1. TCP pseudo-header (RFC2460) */
684         bp->saddr = *saddr;
685         bp->daddr = *daddr;
686         bp->protocol = cpu_to_be32(IPPROTO_TCP);
687         bp->len = cpu_to_be32(nbytes);
688
689         _th = (struct tcphdr *)(bp + 1);
690         memcpy(_th, th, sizeof(*th));
691         _th->check = 0;
692
693         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
694         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
695                                 sizeof(*bp) + sizeof(*th));
696         return crypto_ahash_update(hp->md5_req);
697 }
698
699 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
700                                const struct in6_addr *daddr, struct in6_addr *saddr,
701                                const struct tcphdr *th)
702 {
703         struct tcp_md5sig_pool *hp;
704         struct ahash_request *req;
705
706         hp = tcp_get_md5sig_pool();
707         if (!hp)
708                 goto clear_hash_noput;
709         req = hp->md5_req;
710
711         if (crypto_ahash_init(req))
712                 goto clear_hash;
713         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
714                 goto clear_hash;
715         if (tcp_md5_hash_key(hp, key))
716                 goto clear_hash;
717         ahash_request_set_crypt(req, NULL, md5_hash, 0);
718         if (crypto_ahash_final(req))
719                 goto clear_hash;
720
721         tcp_put_md5sig_pool();
722         return 0;
723
724 clear_hash:
725         tcp_put_md5sig_pool();
726 clear_hash_noput:
727         memset(md5_hash, 0, 16);
728         return 1;
729 }
730
731 static int tcp_v6_md5_hash_skb(char *md5_hash,
732                                const struct tcp_md5sig_key *key,
733                                const struct sock *sk,
734                                const struct sk_buff *skb)
735 {
736         const struct in6_addr *saddr, *daddr;
737         struct tcp_md5sig_pool *hp;
738         struct ahash_request *req;
739         const struct tcphdr *th = tcp_hdr(skb);
740
741         if (sk) { /* valid for establish/request sockets */
742                 saddr = &sk->sk_v6_rcv_saddr;
743                 daddr = &sk->sk_v6_daddr;
744         } else {
745                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
746                 saddr = &ip6h->saddr;
747                 daddr = &ip6h->daddr;
748         }
749
750         hp = tcp_get_md5sig_pool();
751         if (!hp)
752                 goto clear_hash_noput;
753         req = hp->md5_req;
754
755         if (crypto_ahash_init(req))
756                 goto clear_hash;
757
758         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
759                 goto clear_hash;
760         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
761                 goto clear_hash;
762         if (tcp_md5_hash_key(hp, key))
763                 goto clear_hash;
764         ahash_request_set_crypt(req, NULL, md5_hash, 0);
765         if (crypto_ahash_final(req))
766                 goto clear_hash;
767
768         tcp_put_md5sig_pool();
769         return 0;
770
771 clear_hash:
772         tcp_put_md5sig_pool();
773 clear_hash_noput:
774         memset(md5_hash, 0, 16);
775         return 1;
776 }
777
778 #endif
779
780 static void tcp_v6_init_req(struct request_sock *req,
781                             const struct sock *sk_listener,
782                             struct sk_buff *skb)
783 {
784         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
785         struct inet_request_sock *ireq = inet_rsk(req);
786         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
787
788         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
789         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
790
791         /* So that link locals have meaning */
792         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
793             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
794                 ireq->ir_iif = tcp_v6_iif(skb);
795
796         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
797             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
798              np->rxopt.bits.rxinfo ||
799              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
800              np->rxopt.bits.rxohlim || np->repflow)) {
801                 refcount_inc(&skb->users);
802                 ireq->pktopts = skb;
803         }
804 }
805
806 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
807                                           struct sk_buff *skb,
808                                           struct flowi *fl,
809                                           struct request_sock *req)
810 {
811         tcp_v6_init_req(req, sk, skb);
812
813         if (security_inet_conn_request(sk, skb, req))
814                 return NULL;
815
816         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
817 }
818
819 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
820         .family         =       AF_INET6,
821         .obj_size       =       sizeof(struct tcp6_request_sock),
822         .rtx_syn_ack    =       tcp_rtx_synack,
823         .send_ack       =       tcp_v6_reqsk_send_ack,
824         .destructor     =       tcp_v6_reqsk_destructor,
825         .send_reset     =       tcp_v6_send_reset,
826         .syn_ack_timeout =      tcp_syn_ack_timeout,
827 };
828
829 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
830         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
831                                 sizeof(struct ipv6hdr),
832 #ifdef CONFIG_TCP_MD5SIG
833         .req_md5_lookup =       tcp_v6_md5_lookup,
834         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
835 #endif
836 #ifdef CONFIG_SYN_COOKIES
837         .cookie_init_seq =      cookie_v6_init_sequence,
838 #endif
839         .route_req      =       tcp_v6_route_req,
840         .init_seq       =       tcp_v6_init_seq,
841         .init_ts_off    =       tcp_v6_init_ts_off,
842         .send_synack    =       tcp_v6_send_synack,
843 };
844
845 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
846                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
847                                  int oif, struct tcp_md5sig_key *key, int rst,
848                                  u8 tclass, __be32 label, u32 priority, u32 txhash)
849 {
850         const struct tcphdr *th = tcp_hdr(skb);
851         struct tcphdr *t1;
852         struct sk_buff *buff;
853         struct flowi6 fl6;
854         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
855         struct sock *ctl_sk = net->ipv6.tcp_sk;
856         unsigned int tot_len = sizeof(struct tcphdr);
857         __be32 mrst = 0, *topt;
858         struct dst_entry *dst;
859         __u32 mark = 0;
860
861         if (tsecr)
862                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
863 #ifdef CONFIG_TCP_MD5SIG
864         if (key)
865                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
866 #endif
867
868 #ifdef CONFIG_MPTCP
869         if (rst && !key) {
870                 mrst = mptcp_reset_option(skb);
871
872                 if (mrst)
873                         tot_len += sizeof(__be32);
874         }
875 #endif
876
877         buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
878         if (!buff)
879                 return;
880
881         skb_reserve(buff, MAX_TCP_HEADER);
882
883         t1 = skb_push(buff, tot_len);
884         skb_reset_transport_header(buff);
885
886         /* Swap the send and the receive. */
887         memset(t1, 0, sizeof(*t1));
888         t1->dest = th->source;
889         t1->source = th->dest;
890         t1->doff = tot_len / 4;
891         t1->seq = htonl(seq);
892         t1->ack_seq = htonl(ack);
893         t1->ack = !rst || !th->ack;
894         t1->rst = rst;
895         t1->window = htons(win);
896
897         topt = (__be32 *)(t1 + 1);
898
899         if (tsecr) {
900                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
901                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
902                 *topt++ = htonl(tsval);
903                 *topt++ = htonl(tsecr);
904         }
905
906         if (mrst)
907                 *topt++ = mrst;
908
909 #ifdef CONFIG_TCP_MD5SIG
910         if (key) {
911                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
912                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
913                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
914                                     &ipv6_hdr(skb)->saddr,
915                                     &ipv6_hdr(skb)->daddr, t1);
916         }
917 #endif
918
919         memset(&fl6, 0, sizeof(fl6));
920         fl6.daddr = ipv6_hdr(skb)->saddr;
921         fl6.saddr = ipv6_hdr(skb)->daddr;
922         fl6.flowlabel = label;
923
924         buff->ip_summed = CHECKSUM_PARTIAL;
925
926         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
927
928         fl6.flowi6_proto = IPPROTO_TCP;
929         if (rt6_need_strict(&fl6.daddr) && !oif)
930                 fl6.flowi6_oif = tcp_v6_iif(skb);
931         else {
932                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
933                         oif = skb->skb_iif;
934
935                 fl6.flowi6_oif = oif;
936         }
937
938         if (sk) {
939                 if (sk->sk_state == TCP_TIME_WAIT)
940                         mark = inet_twsk(sk)->tw_mark;
941                 else
942                         mark = READ_ONCE(sk->sk_mark);
943                 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
944         }
945         if (txhash) {
946                 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
947                 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
948         }
949         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
950         fl6.fl6_dport = t1->dest;
951         fl6.fl6_sport = t1->source;
952         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
953         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
954
955         /* Pass a socket to ip6_dst_lookup either it is for RST
956          * Underlying function will use this to retrieve the network
957          * namespace
958          */
959         if (sk && sk->sk_state != TCP_TIME_WAIT)
960                 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
961         else
962                 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
963         if (!IS_ERR(dst)) {
964                 skb_dst_set(buff, dst);
965                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
966                          tclass & ~INET_ECN_MASK, priority);
967                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
968                 if (rst)
969                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
970                 return;
971         }
972
973         kfree_skb(buff);
974 }
975
976 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
977 {
978         const struct tcphdr *th = tcp_hdr(skb);
979         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
980         u32 seq = 0, ack_seq = 0;
981         struct tcp_md5sig_key *key = NULL;
982 #ifdef CONFIG_TCP_MD5SIG
983         const __u8 *hash_location = NULL;
984         unsigned char newhash[16];
985         int genhash;
986         struct sock *sk1 = NULL;
987 #endif
988         __be32 label = 0;
989         u32 priority = 0;
990         struct net *net;
991         u32 txhash = 0;
992         int oif = 0;
993
994         if (th->rst)
995                 return;
996
997         /* If sk not NULL, it means we did a successful lookup and incoming
998          * route had to be correct. prequeue might have dropped our dst.
999          */
1000         if (!sk && !ipv6_unicast_destination(skb))
1001                 return;
1002
1003         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1004 #ifdef CONFIG_TCP_MD5SIG
1005         rcu_read_lock();
1006         hash_location = tcp_parse_md5sig_option(th);
1007         if (sk && sk_fullsock(sk)) {
1008                 int l3index;
1009
1010                 /* sdif set, means packet ingressed via a device
1011                  * in an L3 domain and inet_iif is set to it.
1012                  */
1013                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1014                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1015         } else if (hash_location) {
1016                 int dif = tcp_v6_iif_l3_slave(skb);
1017                 int sdif = tcp_v6_sdif(skb);
1018                 int l3index;
1019
1020                 /*
1021                  * active side is lost. Try to find listening socket through
1022                  * source port, and then find md5 key through listening socket.
1023                  * we are not loose security here:
1024                  * Incoming packet is checked with md5 hash with finding key,
1025                  * no RST generated if md5 hash doesn't match.
1026                  */
1027                 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1028                                             NULL, 0, &ipv6h->saddr, th->source,
1029                                             &ipv6h->daddr, ntohs(th->source),
1030                                             dif, sdif);
1031                 if (!sk1)
1032                         goto out;
1033
1034                 /* sdif set, means packet ingressed via a device
1035                  * in an L3 domain and dif is set to it.
1036                  */
1037                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1038
1039                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1040                 if (!key)
1041                         goto out;
1042
1043                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1044                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1045                         goto out;
1046         }
1047 #endif
1048
1049         if (th->ack)
1050                 seq = ntohl(th->ack_seq);
1051         else
1052                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1053                           (th->doff << 2);
1054
1055         if (sk) {
1056                 oif = sk->sk_bound_dev_if;
1057                 if (sk_fullsock(sk)) {
1058                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1059
1060                         trace_tcp_send_reset(sk, skb);
1061                         if (np->repflow)
1062                                 label = ip6_flowlabel(ipv6h);
1063                         priority = sk->sk_priority;
1064                         txhash = sk->sk_txhash;
1065                 }
1066                 if (sk->sk_state == TCP_TIME_WAIT) {
1067                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1068                         priority = inet_twsk(sk)->tw_priority;
1069                         txhash = inet_twsk(sk)->tw_txhash;
1070                 }
1071         } else {
1072                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1073                         label = ip6_flowlabel(ipv6h);
1074         }
1075
1076         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1077                              ipv6_get_dsfield(ipv6h), label, priority, txhash);
1078
1079 #ifdef CONFIG_TCP_MD5SIG
1080 out:
1081         rcu_read_unlock();
1082 #endif
1083 }
1084
1085 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1086                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1087                             struct tcp_md5sig_key *key, u8 tclass,
1088                             __be32 label, u32 priority, u32 txhash)
1089 {
1090         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1091                              tclass, label, priority, txhash);
1092 }
1093
1094 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1095 {
1096         struct inet_timewait_sock *tw = inet_twsk(sk);
1097         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1098
1099         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1100                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1101                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1102                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1103                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1104                         tw->tw_txhash);
1105
1106         inet_twsk_put(tw);
1107 }
1108
1109 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1110                                   struct request_sock *req)
1111 {
1112         int l3index;
1113
1114         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1115
1116         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1117          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1118          */
1119         /* RFC 7323 2.3
1120          * The window field (SEG.WND) of every outgoing segment, with the
1121          * exception of <SYN> segments, MUST be right-shifted by
1122          * Rcv.Wind.Shift bits:
1123          */
1124         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1125                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1126                         tcp_rsk(req)->rcv_nxt,
1127                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1128                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1129                         READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1130                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1131                         ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1132                         READ_ONCE(sk->sk_priority),
1133                         READ_ONCE(tcp_rsk(req)->txhash));
1134 }
1135
1136
1137 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1138 {
1139 #ifdef CONFIG_SYN_COOKIES
1140         const struct tcphdr *th = tcp_hdr(skb);
1141
1142         if (!th->syn)
1143                 sk = cookie_v6_check(sk, skb);
1144 #endif
1145         return sk;
1146 }
1147
1148 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1149                          struct tcphdr *th, u32 *cookie)
1150 {
1151         u16 mss = 0;
1152 #ifdef CONFIG_SYN_COOKIES
1153         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1154                                     &tcp_request_sock_ipv6_ops, sk, th);
1155         if (mss) {
1156                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1157                 tcp_synq_overflow(sk);
1158         }
1159 #endif
1160         return mss;
1161 }
1162
1163 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1164 {
1165         if (skb->protocol == htons(ETH_P_IP))
1166                 return tcp_v4_conn_request(sk, skb);
1167
1168         if (!ipv6_unicast_destination(skb))
1169                 goto drop;
1170
1171         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1172                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1173                 return 0;
1174         }
1175
1176         return tcp_conn_request(&tcp6_request_sock_ops,
1177                                 &tcp_request_sock_ipv6_ops, sk, skb);
1178
1179 drop:
1180         tcp_listendrop(sk);
1181         return 0; /* don't send reset */
1182 }
1183
1184 static void tcp_v6_restore_cb(struct sk_buff *skb)
1185 {
1186         /* We need to move header back to the beginning if xfrm6_policy_check()
1187          * and tcp_v6_fill_cb() are going to be called again.
1188          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1189          */
1190         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1191                 sizeof(struct inet6_skb_parm));
1192 }
1193
1194 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1195                                          struct request_sock *req,
1196                                          struct dst_entry *dst,
1197                                          struct request_sock *req_unhash,
1198                                          bool *own_req)
1199 {
1200         struct inet_request_sock *ireq;
1201         struct ipv6_pinfo *newnp;
1202         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1203         struct ipv6_txoptions *opt;
1204         struct inet_sock *newinet;
1205         bool found_dup_sk = false;
1206         struct tcp_sock *newtp;
1207         struct sock *newsk;
1208 #ifdef CONFIG_TCP_MD5SIG
1209         struct tcp_md5sig_key *key;
1210         int l3index;
1211 #endif
1212         struct flowi6 fl6;
1213
1214         if (skb->protocol == htons(ETH_P_IP)) {
1215                 /*
1216                  *      v6 mapped
1217                  */
1218
1219                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1220                                              req_unhash, own_req);
1221
1222                 if (!newsk)
1223                         return NULL;
1224
1225                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1226
1227                 newnp = tcp_inet6_sk(newsk);
1228                 newtp = tcp_sk(newsk);
1229
1230                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1231
1232                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1233
1234                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1235                 if (sk_is_mptcp(newsk))
1236                         mptcpv6_handle_mapped(newsk, true);
1237                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1238 #ifdef CONFIG_TCP_MD5SIG
1239                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1240 #endif
1241
1242                 newnp->ipv6_mc_list = NULL;
1243                 newnp->ipv6_ac_list = NULL;
1244                 newnp->ipv6_fl_list = NULL;
1245                 newnp->pktoptions  = NULL;
1246                 newnp->opt         = NULL;
1247                 newnp->mcast_oif   = inet_iif(skb);
1248                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1249                 newnp->rcv_flowinfo = 0;
1250                 if (np->repflow)
1251                         newnp->flow_label = 0;
1252
1253                 /*
1254                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1255                  * here, tcp_create_openreq_child now does this for us, see the comment in
1256                  * that function for the gory details. -acme
1257                  */
1258
1259                 /* It is tricky place. Until this moment IPv4 tcp
1260                    worked with IPv6 icsk.icsk_af_ops.
1261                    Sync it now.
1262                  */
1263                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1264
1265                 return newsk;
1266         }
1267
1268         ireq = inet_rsk(req);
1269
1270         if (sk_acceptq_is_full(sk))
1271                 goto out_overflow;
1272
1273         if (!dst) {
1274                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1275                 if (!dst)
1276                         goto out;
1277         }
1278
1279         newsk = tcp_create_openreq_child(sk, req, skb);
1280         if (!newsk)
1281                 goto out_nonewsk;
1282
1283         /*
1284          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1285          * count here, tcp_create_openreq_child now does this for us, see the
1286          * comment in that function for the gory details. -acme
1287          */
1288
1289         newsk->sk_gso_type = SKB_GSO_TCPV6;
1290         ip6_dst_store(newsk, dst, NULL, NULL);
1291         inet6_sk_rx_dst_set(newsk, skb);
1292
1293         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1294
1295         newtp = tcp_sk(newsk);
1296         newinet = inet_sk(newsk);
1297         newnp = tcp_inet6_sk(newsk);
1298
1299         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1300
1301         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1302         newnp->saddr = ireq->ir_v6_loc_addr;
1303         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1304         newsk->sk_bound_dev_if = ireq->ir_iif;
1305
1306         /* Now IPv6 options...
1307
1308            First: no IPv4 options.
1309          */
1310         newinet->inet_opt = NULL;
1311         newnp->ipv6_mc_list = NULL;
1312         newnp->ipv6_ac_list = NULL;
1313         newnp->ipv6_fl_list = NULL;
1314
1315         /* Clone RX bits */
1316         newnp->rxopt.all = np->rxopt.all;
1317
1318         newnp->pktoptions = NULL;
1319         newnp->opt        = NULL;
1320         newnp->mcast_oif  = tcp_v6_iif(skb);
1321         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1322         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1323         if (np->repflow)
1324                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1325
1326         /* Set ToS of the new socket based upon the value of incoming SYN.
1327          * ECT bits are set later in tcp_init_transfer().
1328          */
1329         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1330                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1331
1332         /* Clone native IPv6 options from listening socket (if any)
1333
1334            Yes, keeping reference count would be much more clever,
1335            but we make one more one thing there: reattach optmem
1336            to newsk.
1337          */
1338         opt = ireq->ipv6_opt;
1339         if (!opt)
1340                 opt = rcu_dereference(np->opt);
1341         if (opt) {
1342                 opt = ipv6_dup_options(newsk, opt);
1343                 RCU_INIT_POINTER(newnp->opt, opt);
1344         }
1345         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1346         if (opt)
1347                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1348                                                     opt->opt_flen;
1349
1350         tcp_ca_openreq_child(newsk, dst);
1351
1352         tcp_sync_mss(newsk, dst_mtu(dst));
1353         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1354
1355         tcp_initialize_rcv_mss(newsk);
1356
1357         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1358         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1359
1360 #ifdef CONFIG_TCP_MD5SIG
1361         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1362
1363         /* Copy over the MD5 key from the original socket */
1364         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1365         if (key) {
1366                 const union tcp_md5_addr *addr;
1367
1368                 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1369                 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1370                         inet_csk_prepare_forced_close(newsk);
1371                         tcp_done(newsk);
1372                         goto out;
1373                 }
1374         }
1375 #endif
1376
1377         if (__inet_inherit_port(sk, newsk) < 0) {
1378                 inet_csk_prepare_forced_close(newsk);
1379                 tcp_done(newsk);
1380                 goto out;
1381         }
1382         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1383                                        &found_dup_sk);
1384         if (*own_req) {
1385                 tcp_move_syn(newtp, req);
1386
1387                 /* Clone pktoptions received with SYN, if we own the req */
1388                 if (ireq->pktopts) {
1389                         newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1390                         consume_skb(ireq->pktopts);
1391                         ireq->pktopts = NULL;
1392                         if (newnp->pktoptions)
1393                                 tcp_v6_restore_cb(newnp->pktoptions);
1394                 }
1395         } else {
1396                 if (!req_unhash && found_dup_sk) {
1397                         /* This code path should only be executed in the
1398                          * syncookie case only
1399                          */
1400                         bh_unlock_sock(newsk);
1401                         sock_put(newsk);
1402                         newsk = NULL;
1403                 }
1404         }
1405
1406         return newsk;
1407
1408 out_overflow:
1409         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1410 out_nonewsk:
1411         dst_release(dst);
1412 out:
1413         tcp_listendrop(sk);
1414         return NULL;
1415 }
1416
1417 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1418                                                            u32));
1419 /* The socket must have it's spinlock held when we get
1420  * here, unless it is a TCP_LISTEN socket.
1421  *
1422  * We have a potential double-lock case here, so even when
1423  * doing backlog processing we use the BH locking scheme.
1424  * This is because we cannot sleep with the original spinlock
1425  * held.
1426  */
1427 INDIRECT_CALLABLE_SCOPE
1428 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1429 {
1430         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1431         struct sk_buff *opt_skb = NULL;
1432         enum skb_drop_reason reason;
1433         struct tcp_sock *tp;
1434
1435         /* Imagine: socket is IPv6. IPv4 packet arrives,
1436            goes to IPv4 receive handler and backlogged.
1437            From backlog it always goes here. Kerboom...
1438            Fortunately, tcp_rcv_established and rcv_established
1439            handle them correctly, but it is not case with
1440            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1441          */
1442
1443         if (skb->protocol == htons(ETH_P_IP))
1444                 return tcp_v4_do_rcv(sk, skb);
1445
1446         /*
1447          *      socket locking is here for SMP purposes as backlog rcv
1448          *      is currently called with bh processing disabled.
1449          */
1450
1451         /* Do Stevens' IPV6_PKTOPTIONS.
1452
1453            Yes, guys, it is the only place in our code, where we
1454            may make it not affecting IPv4.
1455            The rest of code is protocol independent,
1456            and I do not like idea to uglify IPv4.
1457
1458            Actually, all the idea behind IPV6_PKTOPTIONS
1459            looks not very well thought. For now we latch
1460            options, received in the last packet, enqueued
1461            by tcp. Feel free to propose better solution.
1462                                                --ANK (980728)
1463          */
1464         if (np->rxopt.all)
1465                 opt_skb = skb_clone_and_charge_r(skb, sk);
1466
1467         reason = SKB_DROP_REASON_NOT_SPECIFIED;
1468         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1469                 struct dst_entry *dst;
1470
1471                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1472                                                 lockdep_sock_is_held(sk));
1473
1474                 sock_rps_save_rxhash(sk, skb);
1475                 sk_mark_napi_id(sk, skb);
1476                 if (dst) {
1477                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1478                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1479                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1480                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1481                                 dst_release(dst);
1482                         }
1483                 }
1484
1485                 tcp_rcv_established(sk, skb);
1486                 if (opt_skb)
1487                         goto ipv6_pktoptions;
1488                 return 0;
1489         }
1490
1491         if (tcp_checksum_complete(skb))
1492                 goto csum_err;
1493
1494         if (sk->sk_state == TCP_LISTEN) {
1495                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1496
1497                 if (!nsk)
1498                         goto discard;
1499
1500                 if (nsk != sk) {
1501                         if (tcp_child_process(sk, nsk, skb))
1502                                 goto reset;
1503                         if (opt_skb)
1504                                 __kfree_skb(opt_skb);
1505                         return 0;
1506                 }
1507         } else
1508                 sock_rps_save_rxhash(sk, skb);
1509
1510         if (tcp_rcv_state_process(sk, skb))
1511                 goto reset;
1512         if (opt_skb)
1513                 goto ipv6_pktoptions;
1514         return 0;
1515
1516 reset:
1517         tcp_v6_send_reset(sk, skb);
1518 discard:
1519         if (opt_skb)
1520                 __kfree_skb(opt_skb);
1521         kfree_skb_reason(skb, reason);
1522         return 0;
1523 csum_err:
1524         reason = SKB_DROP_REASON_TCP_CSUM;
1525         trace_tcp_bad_csum(skb);
1526         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1527         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1528         goto discard;
1529
1530
1531 ipv6_pktoptions:
1532         /* Do you ask, what is it?
1533
1534            1. skb was enqueued by tcp.
1535            2. skb is added to tail of read queue, rather than out of order.
1536            3. socket is not in passive state.
1537            4. Finally, it really contains options, which user wants to receive.
1538          */
1539         tp = tcp_sk(sk);
1540         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1541             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1542                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1543                         np->mcast_oif = tcp_v6_iif(opt_skb);
1544                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1545                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1546                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1547                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1548                 if (np->repflow)
1549                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1550                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1551                         tcp_v6_restore_cb(opt_skb);
1552                         opt_skb = xchg(&np->pktoptions, opt_skb);
1553                 } else {
1554                         __kfree_skb(opt_skb);
1555                         opt_skb = xchg(&np->pktoptions, NULL);
1556                 }
1557         }
1558
1559         consume_skb(opt_skb);
1560         return 0;
1561 }
1562
1563 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1564                            const struct tcphdr *th)
1565 {
1566         /* This is tricky: we move IP6CB at its correct location into
1567          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1568          * _decode_session6() uses IP6CB().
1569          * barrier() makes sure compiler won't play aliasing games.
1570          */
1571         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1572                 sizeof(struct inet6_skb_parm));
1573         barrier();
1574
1575         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1576         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1577                                     skb->len - th->doff*4);
1578         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1579         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1580         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1581         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1582         TCP_SKB_CB(skb)->sacked = 0;
1583         TCP_SKB_CB(skb)->has_rxtstamp =
1584                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1585 }
1586
1587 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1588 {
1589         enum skb_drop_reason drop_reason;
1590         int sdif = inet6_sdif(skb);
1591         int dif = inet6_iif(skb);
1592         const struct tcphdr *th;
1593         const struct ipv6hdr *hdr;
1594         bool refcounted;
1595         struct sock *sk;
1596         int ret;
1597         struct net *net = dev_net(skb->dev);
1598
1599         drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1600         if (skb->pkt_type != PACKET_HOST)
1601                 goto discard_it;
1602
1603         /*
1604          *      Count it even if it's bad.
1605          */
1606         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1607
1608         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1609                 goto discard_it;
1610
1611         th = (const struct tcphdr *)skb->data;
1612
1613         if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1614                 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1615                 goto bad_packet;
1616         }
1617         if (!pskb_may_pull(skb, th->doff*4))
1618                 goto discard_it;
1619
1620         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1621                 goto csum_error;
1622
1623         th = (const struct tcphdr *)skb->data;
1624         hdr = ipv6_hdr(skb);
1625
1626 lookup:
1627         sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1628                                 th->source, th->dest, inet6_iif(skb), sdif,
1629                                 &refcounted);
1630         if (!sk)
1631                 goto no_tcp_socket;
1632
1633 process:
1634         if (sk->sk_state == TCP_TIME_WAIT)
1635                 goto do_time_wait;
1636
1637         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1638                 struct request_sock *req = inet_reqsk(sk);
1639                 bool req_stolen = false;
1640                 struct sock *nsk;
1641
1642                 sk = req->rsk_listener;
1643                 drop_reason = tcp_inbound_md5_hash(sk, skb,
1644                                                    &hdr->saddr, &hdr->daddr,
1645                                                    AF_INET6, dif, sdif);
1646                 if (drop_reason) {
1647                         sk_drops_add(sk, skb);
1648                         reqsk_put(req);
1649                         goto discard_it;
1650                 }
1651                 if (tcp_checksum_complete(skb)) {
1652                         reqsk_put(req);
1653                         goto csum_error;
1654                 }
1655                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1656                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1657                         if (!nsk) {
1658                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1659                                 goto lookup;
1660                         }
1661                         sk = nsk;
1662                         /* reuseport_migrate_sock() has already held one sk_refcnt
1663                          * before returning.
1664                          */
1665                 } else {
1666                         sock_hold(sk);
1667                 }
1668                 refcounted = true;
1669                 nsk = NULL;
1670                 if (!tcp_filter(sk, skb)) {
1671                         th = (const struct tcphdr *)skb->data;
1672                         hdr = ipv6_hdr(skb);
1673                         tcp_v6_fill_cb(skb, hdr, th);
1674                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1675                 } else {
1676                         drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1677                 }
1678                 if (!nsk) {
1679                         reqsk_put(req);
1680                         if (req_stolen) {
1681                                 /* Another cpu got exclusive access to req
1682                                  * and created a full blown socket.
1683                                  * Try to feed this packet to this socket
1684                                  * instead of discarding it.
1685                                  */
1686                                 tcp_v6_restore_cb(skb);
1687                                 sock_put(sk);
1688                                 goto lookup;
1689                         }
1690                         goto discard_and_relse;
1691                 }
1692                 if (nsk == sk) {
1693                         reqsk_put(req);
1694                         tcp_v6_restore_cb(skb);
1695                 } else if (tcp_child_process(sk, nsk, skb)) {
1696                         tcp_v6_send_reset(nsk, skb);
1697                         goto discard_and_relse;
1698                 } else {
1699                         sock_put(sk);
1700                         return 0;
1701                 }
1702         }
1703
1704         if (static_branch_unlikely(&ip6_min_hopcount)) {
1705                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1706                 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1707                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1708                         drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1709                         goto discard_and_relse;
1710                 }
1711         }
1712
1713         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1714                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1715                 goto discard_and_relse;
1716         }
1717
1718         drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1719                                            AF_INET6, dif, sdif);
1720         if (drop_reason)
1721                 goto discard_and_relse;
1722
1723         nf_reset_ct(skb);
1724
1725         if (tcp_filter(sk, skb)) {
1726                 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1727                 goto discard_and_relse;
1728         }
1729         th = (const struct tcphdr *)skb->data;
1730         hdr = ipv6_hdr(skb);
1731         tcp_v6_fill_cb(skb, hdr, th);
1732
1733         skb->dev = NULL;
1734
1735         if (sk->sk_state == TCP_LISTEN) {
1736                 ret = tcp_v6_do_rcv(sk, skb);
1737                 goto put_and_return;
1738         }
1739
1740         sk_incoming_cpu_update(sk);
1741
1742         bh_lock_sock_nested(sk);
1743         tcp_segs_in(tcp_sk(sk), skb);
1744         ret = 0;
1745         if (!sock_owned_by_user(sk)) {
1746                 ret = tcp_v6_do_rcv(sk, skb);
1747         } else {
1748                 if (tcp_add_backlog(sk, skb, &drop_reason))
1749                         goto discard_and_relse;
1750         }
1751         bh_unlock_sock(sk);
1752 put_and_return:
1753         if (refcounted)
1754                 sock_put(sk);
1755         return ret ? -1 : 0;
1756
1757 no_tcp_socket:
1758         drop_reason = SKB_DROP_REASON_NO_SOCKET;
1759         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1760                 goto discard_it;
1761
1762         tcp_v6_fill_cb(skb, hdr, th);
1763
1764         if (tcp_checksum_complete(skb)) {
1765 csum_error:
1766                 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1767                 trace_tcp_bad_csum(skb);
1768                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1769 bad_packet:
1770                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1771         } else {
1772                 tcp_v6_send_reset(NULL, skb);
1773         }
1774
1775 discard_it:
1776         SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1777         kfree_skb_reason(skb, drop_reason);
1778         return 0;
1779
1780 discard_and_relse:
1781         sk_drops_add(sk, skb);
1782         if (refcounted)
1783                 sock_put(sk);
1784         goto discard_it;
1785
1786 do_time_wait:
1787         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1788                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1789                 inet_twsk_put(inet_twsk(sk));
1790                 goto discard_it;
1791         }
1792
1793         tcp_v6_fill_cb(skb, hdr, th);
1794
1795         if (tcp_checksum_complete(skb)) {
1796                 inet_twsk_put(inet_twsk(sk));
1797                 goto csum_error;
1798         }
1799
1800         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1801         case TCP_TW_SYN:
1802         {
1803                 struct sock *sk2;
1804
1805                 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1806                                             skb, __tcp_hdrlen(th),
1807                                             &ipv6_hdr(skb)->saddr, th->source,
1808                                             &ipv6_hdr(skb)->daddr,
1809                                             ntohs(th->dest),
1810                                             tcp_v6_iif_l3_slave(skb),
1811                                             sdif);
1812                 if (sk2) {
1813                         struct inet_timewait_sock *tw = inet_twsk(sk);
1814                         inet_twsk_deschedule_put(tw);
1815                         sk = sk2;
1816                         tcp_v6_restore_cb(skb);
1817                         refcounted = false;
1818                         goto process;
1819                 }
1820         }
1821                 /* to ACK */
1822                 fallthrough;
1823         case TCP_TW_ACK:
1824                 tcp_v6_timewait_ack(sk, skb);
1825                 break;
1826         case TCP_TW_RST:
1827                 tcp_v6_send_reset(sk, skb);
1828                 inet_twsk_deschedule_put(inet_twsk(sk));
1829                 goto discard_it;
1830         case TCP_TW_SUCCESS:
1831                 ;
1832         }
1833         goto discard_it;
1834 }
1835
1836 void tcp_v6_early_demux(struct sk_buff *skb)
1837 {
1838         struct net *net = dev_net(skb->dev);
1839         const struct ipv6hdr *hdr;
1840         const struct tcphdr *th;
1841         struct sock *sk;
1842
1843         if (skb->pkt_type != PACKET_HOST)
1844                 return;
1845
1846         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1847                 return;
1848
1849         hdr = ipv6_hdr(skb);
1850         th = tcp_hdr(skb);
1851
1852         if (th->doff < sizeof(struct tcphdr) / 4)
1853                 return;
1854
1855         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1856         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1857                                         &hdr->saddr, th->source,
1858                                         &hdr->daddr, ntohs(th->dest),
1859                                         inet6_iif(skb), inet6_sdif(skb));
1860         if (sk) {
1861                 skb->sk = sk;
1862                 skb->destructor = sock_edemux;
1863                 if (sk_fullsock(sk)) {
1864                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1865
1866                         if (dst)
1867                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1868                         if (dst &&
1869                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1870                                 skb_dst_set_noref(skb, dst);
1871                 }
1872         }
1873 }
1874
1875 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1876         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1877         .twsk_unique    = tcp_twsk_unique,
1878         .twsk_destructor = tcp_twsk_destructor,
1879 };
1880
1881 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1882 {
1883         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1884 }
1885
1886 const struct inet_connection_sock_af_ops ipv6_specific = {
1887         .queue_xmit        = inet6_csk_xmit,
1888         .send_check        = tcp_v6_send_check,
1889         .rebuild_header    = inet6_sk_rebuild_header,
1890         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1891         .conn_request      = tcp_v6_conn_request,
1892         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1893         .net_header_len    = sizeof(struct ipv6hdr),
1894         .net_frag_header_len = sizeof(struct frag_hdr),
1895         .setsockopt        = ipv6_setsockopt,
1896         .getsockopt        = ipv6_getsockopt,
1897         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1898         .sockaddr_len      = sizeof(struct sockaddr_in6),
1899         .mtu_reduced       = tcp_v6_mtu_reduced,
1900 };
1901
1902 #ifdef CONFIG_TCP_MD5SIG
1903 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1904         .md5_lookup     =       tcp_v6_md5_lookup,
1905         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1906         .md5_parse      =       tcp_v6_parse_md5_keys,
1907 };
1908 #endif
1909
1910 /*
1911  *      TCP over IPv4 via INET6 API
1912  */
1913 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1914         .queue_xmit        = ip_queue_xmit,
1915         .send_check        = tcp_v4_send_check,
1916         .rebuild_header    = inet_sk_rebuild_header,
1917         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1918         .conn_request      = tcp_v6_conn_request,
1919         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1920         .net_header_len    = sizeof(struct iphdr),
1921         .setsockopt        = ipv6_setsockopt,
1922         .getsockopt        = ipv6_getsockopt,
1923         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1924         .sockaddr_len      = sizeof(struct sockaddr_in6),
1925         .mtu_reduced       = tcp_v4_mtu_reduced,
1926 };
1927
1928 #ifdef CONFIG_TCP_MD5SIG
1929 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1930         .md5_lookup     =       tcp_v4_md5_lookup,
1931         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1932         .md5_parse      =       tcp_v6_parse_md5_keys,
1933 };
1934 #endif
1935
1936 /* NOTE: A lot of things set to zero explicitly by call to
1937  *       sk_alloc() so need not be done here.
1938  */
1939 static int tcp_v6_init_sock(struct sock *sk)
1940 {
1941         struct inet_connection_sock *icsk = inet_csk(sk);
1942
1943         tcp_init_sock(sk);
1944
1945         icsk->icsk_af_ops = &ipv6_specific;
1946
1947 #ifdef CONFIG_TCP_MD5SIG
1948         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1949 #endif
1950
1951         return 0;
1952 }
1953
1954 #ifdef CONFIG_PROC_FS
1955 /* Proc filesystem TCPv6 sock list dumping. */
1956 static void get_openreq6(struct seq_file *seq,
1957                          const struct request_sock *req, int i)
1958 {
1959         long ttd = req->rsk_timer.expires - jiffies;
1960         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1961         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1962
1963         if (ttd < 0)
1964                 ttd = 0;
1965
1966         seq_printf(seq,
1967                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1968                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1969                    i,
1970                    src->s6_addr32[0], src->s6_addr32[1],
1971                    src->s6_addr32[2], src->s6_addr32[3],
1972                    inet_rsk(req)->ir_num,
1973                    dest->s6_addr32[0], dest->s6_addr32[1],
1974                    dest->s6_addr32[2], dest->s6_addr32[3],
1975                    ntohs(inet_rsk(req)->ir_rmt_port),
1976                    TCP_SYN_RECV,
1977                    0, 0, /* could print option size, but that is af dependent. */
1978                    1,   /* timers active (only the expire timer) */
1979                    jiffies_to_clock_t(ttd),
1980                    req->num_timeout,
1981                    from_kuid_munged(seq_user_ns(seq),
1982                                     sock_i_uid(req->rsk_listener)),
1983                    0,  /* non standard timer */
1984                    0, /* open_requests have no inode */
1985                    0, req);
1986 }
1987
1988 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1989 {
1990         const struct in6_addr *dest, *src;
1991         __u16 destp, srcp;
1992         int timer_active;
1993         unsigned long timer_expires;
1994         const struct inet_sock *inet = inet_sk(sp);
1995         const struct tcp_sock *tp = tcp_sk(sp);
1996         const struct inet_connection_sock *icsk = inet_csk(sp);
1997         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1998         int rx_queue;
1999         int state;
2000
2001         dest  = &sp->sk_v6_daddr;
2002         src   = &sp->sk_v6_rcv_saddr;
2003         destp = ntohs(inet->inet_dport);
2004         srcp  = ntohs(inet->inet_sport);
2005
2006         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2007             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2008             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2009                 timer_active    = 1;
2010                 timer_expires   = icsk->icsk_timeout;
2011         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2012                 timer_active    = 4;
2013                 timer_expires   = icsk->icsk_timeout;
2014         } else if (timer_pending(&sp->sk_timer)) {
2015                 timer_active    = 2;
2016                 timer_expires   = sp->sk_timer.expires;
2017         } else {
2018                 timer_active    = 0;
2019                 timer_expires = jiffies;
2020         }
2021
2022         state = inet_sk_state_load(sp);
2023         if (state == TCP_LISTEN)
2024                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2025         else
2026                 /* Because we don't lock the socket,
2027                  * we might find a transient negative value.
2028                  */
2029                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2030                                       READ_ONCE(tp->copied_seq), 0);
2031
2032         seq_printf(seq,
2033                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2034                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2035                    i,
2036                    src->s6_addr32[0], src->s6_addr32[1],
2037                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2038                    dest->s6_addr32[0], dest->s6_addr32[1],
2039                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2040                    state,
2041                    READ_ONCE(tp->write_seq) - tp->snd_una,
2042                    rx_queue,
2043                    timer_active,
2044                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2045                    icsk->icsk_retransmits,
2046                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2047                    icsk->icsk_probes_out,
2048                    sock_i_ino(sp),
2049                    refcount_read(&sp->sk_refcnt), sp,
2050                    jiffies_to_clock_t(icsk->icsk_rto),
2051                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2052                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2053                    tcp_snd_cwnd(tp),
2054                    state == TCP_LISTEN ?
2055                         fastopenq->max_qlen :
2056                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2057                    );
2058 }
2059
2060 static void get_timewait6_sock(struct seq_file *seq,
2061                                struct inet_timewait_sock *tw, int i)
2062 {
2063         long delta = tw->tw_timer.expires - jiffies;
2064         const struct in6_addr *dest, *src;
2065         __u16 destp, srcp;
2066
2067         dest = &tw->tw_v6_daddr;
2068         src  = &tw->tw_v6_rcv_saddr;
2069         destp = ntohs(tw->tw_dport);
2070         srcp  = ntohs(tw->tw_sport);
2071
2072         seq_printf(seq,
2073                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2074                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2075                    i,
2076                    src->s6_addr32[0], src->s6_addr32[1],
2077                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2078                    dest->s6_addr32[0], dest->s6_addr32[1],
2079                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2080                    tw->tw_substate, 0, 0,
2081                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2082                    refcount_read(&tw->tw_refcnt), tw);
2083 }
2084
2085 static int tcp6_seq_show(struct seq_file *seq, void *v)
2086 {
2087         struct tcp_iter_state *st;
2088         struct sock *sk = v;
2089
2090         if (v == SEQ_START_TOKEN) {
2091                 seq_puts(seq,
2092                          "  sl  "
2093                          "local_address                         "
2094                          "remote_address                        "
2095                          "st tx_queue rx_queue tr tm->when retrnsmt"
2096                          "   uid  timeout inode\n");
2097                 goto out;
2098         }
2099         st = seq->private;
2100
2101         if (sk->sk_state == TCP_TIME_WAIT)
2102                 get_timewait6_sock(seq, v, st->num);
2103         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2104                 get_openreq6(seq, v, st->num);
2105         else
2106                 get_tcp6_sock(seq, v, st->num);
2107 out:
2108         return 0;
2109 }
2110
2111 static const struct seq_operations tcp6_seq_ops = {
2112         .show           = tcp6_seq_show,
2113         .start          = tcp_seq_start,
2114         .next           = tcp_seq_next,
2115         .stop           = tcp_seq_stop,
2116 };
2117
2118 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2119         .family         = AF_INET6,
2120 };
2121
2122 int __net_init tcp6_proc_init(struct net *net)
2123 {
2124         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2125                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2126                 return -ENOMEM;
2127         return 0;
2128 }
2129
2130 void tcp6_proc_exit(struct net *net)
2131 {
2132         remove_proc_entry("tcp6", net->proc_net);
2133 }
2134 #endif
2135
2136 struct proto tcpv6_prot = {
2137         .name                   = "TCPv6",
2138         .owner                  = THIS_MODULE,
2139         .close                  = tcp_close,
2140         .pre_connect            = tcp_v6_pre_connect,
2141         .connect                = tcp_v6_connect,
2142         .disconnect             = tcp_disconnect,
2143         .accept                 = inet_csk_accept,
2144         .ioctl                  = tcp_ioctl,
2145         .init                   = tcp_v6_init_sock,
2146         .destroy                = tcp_v4_destroy_sock,
2147         .shutdown               = tcp_shutdown,
2148         .setsockopt             = tcp_setsockopt,
2149         .getsockopt             = tcp_getsockopt,
2150         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2151         .keepalive              = tcp_set_keepalive,
2152         .recvmsg                = tcp_recvmsg,
2153         .sendmsg                = tcp_sendmsg,
2154         .splice_eof             = tcp_splice_eof,
2155         .backlog_rcv            = tcp_v6_do_rcv,
2156         .release_cb             = tcp_release_cb,
2157         .hash                   = inet6_hash,
2158         .unhash                 = inet_unhash,
2159         .get_port               = inet_csk_get_port,
2160         .put_port               = inet_put_port,
2161 #ifdef CONFIG_BPF_SYSCALL
2162         .psock_update_sk_prot   = tcp_bpf_update_proto,
2163 #endif
2164         .enter_memory_pressure  = tcp_enter_memory_pressure,
2165         .leave_memory_pressure  = tcp_leave_memory_pressure,
2166         .stream_memory_free     = tcp_stream_memory_free,
2167         .sockets_allocated      = &tcp_sockets_allocated,
2168
2169         .memory_allocated       = &tcp_memory_allocated,
2170         .per_cpu_fw_alloc       = &tcp_memory_per_cpu_fw_alloc,
2171
2172         .memory_pressure        = &tcp_memory_pressure,
2173         .orphan_count           = &tcp_orphan_count,
2174         .sysctl_mem             = sysctl_tcp_mem,
2175         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2176         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2177         .max_header             = MAX_TCP_HEADER,
2178         .obj_size               = sizeof(struct tcp6_sock),
2179         .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2180         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2181         .twsk_prot              = &tcp6_timewait_sock_ops,
2182         .rsk_prot               = &tcp6_request_sock_ops,
2183         .h.hashinfo             = NULL,
2184         .no_autobind            = true,
2185         .diag_destroy           = tcp_abort,
2186 };
2187 EXPORT_SYMBOL_GPL(tcpv6_prot);
2188
2189 static const struct inet6_protocol tcpv6_protocol = {
2190         .handler        =       tcp_v6_rcv,
2191         .err_handler    =       tcp_v6_err,
2192         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2193 };
2194
2195 static struct inet_protosw tcpv6_protosw = {
2196         .type           =       SOCK_STREAM,
2197         .protocol       =       IPPROTO_TCP,
2198         .prot           =       &tcpv6_prot,
2199         .ops            =       &inet6_stream_ops,
2200         .flags          =       INET_PROTOSW_PERMANENT |
2201                                 INET_PROTOSW_ICSK,
2202 };
2203
2204 static int __net_init tcpv6_net_init(struct net *net)
2205 {
2206         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2207                                     SOCK_RAW, IPPROTO_TCP, net);
2208 }
2209
2210 static void __net_exit tcpv6_net_exit(struct net *net)
2211 {
2212         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2213 }
2214
2215 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2216 {
2217         tcp_twsk_purge(net_exit_list, AF_INET6);
2218 }
2219
2220 static struct pernet_operations tcpv6_net_ops = {
2221         .init       = tcpv6_net_init,
2222         .exit       = tcpv6_net_exit,
2223         .exit_batch = tcpv6_net_exit_batch,
2224 };
2225
2226 int __init tcpv6_init(void)
2227 {
2228         int ret;
2229
2230         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2231         if (ret)
2232                 goto out;
2233
2234         /* register inet6 protocol */
2235         ret = inet6_register_protosw(&tcpv6_protosw);
2236         if (ret)
2237                 goto out_tcpv6_protocol;
2238
2239         ret = register_pernet_subsys(&tcpv6_net_ops);
2240         if (ret)
2241                 goto out_tcpv6_protosw;
2242
2243         ret = mptcpv6_init();
2244         if (ret)
2245                 goto out_tcpv6_pernet_subsys;
2246
2247 out:
2248         return ret;
2249
2250 out_tcpv6_pernet_subsys:
2251         unregister_pernet_subsys(&tcpv6_net_ops);
2252 out_tcpv6_protosw:
2253         inet6_unregister_protosw(&tcpv6_protosw);
2254 out_tcpv6_protocol:
2255         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2256         goto out;
2257 }
2258
2259 void tcpv6_exit(void)
2260 {
2261         unregister_pernet_subsys(&tcpv6_net_ops);
2262         inet6_unregister_protosw(&tcpv6_protosw);
2263         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2264 }