3fd19a84b358d169bbdc351c43ede830c60afcf3
[linux-2.6-block.git] / net / ipv6 / icmp.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      Internet Control Message Protocol (ICMPv6)
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on net/ipv4/icmp.c
10  *
11  *      RFC 1885
12  */
13
14 /*
15  *      Changes:
16  *
17  *      Andi Kleen              :       exception handling
18  *      Andi Kleen                      add rate limits. never reply to a icmp.
19  *                                      add more length checks and other fixes.
20  *      yoshfuji                :       ensure to sent parameter problem for
21  *                                      fragments.
22  *      YOSHIFUJI Hideaki @USAGI:       added sysctl for icmp rate limit.
23  *      Randy Dunlap and
24  *      YOSHIFUJI Hideaki @USAGI:       Per-interface statistics support
25  *      Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27
28 #define pr_fmt(fmt) "IPv6: " fmt
29
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50
51 #include <net/ip.h>
52 #include <net/sock.h>
53
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/seg6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 #include <net/dsfield.h>
68 #include <net/l3mdev.h>
69
70 #include <linux/uaccess.h>
71
72 static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
73
74 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75                        u8 type, u8 code, int offset, __be32 info)
76 {
77         /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
78         struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
79         struct net *net = dev_net_rcu(skb->dev);
80
81         if (type == ICMPV6_PKT_TOOBIG)
82                 ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
83         else if (type == NDISC_REDIRECT)
84                 ip6_redirect(skb, net, skb->dev->ifindex, 0,
85                              sock_net_uid(net, NULL));
86
87         if (!(type & ICMPV6_INFOMSG_MASK))
88                 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
89                         ping_err(skb, offset, ntohl(info));
90
91         return 0;
92 }
93
94 static int icmpv6_rcv(struct sk_buff *skb);
95
96 static const struct inet6_protocol icmpv6_protocol = {
97         .handler        =       icmpv6_rcv,
98         .err_handler    =       icmpv6_err,
99         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
100 };
101
102 /* Called with BH disabled */
103 static struct sock *icmpv6_xmit_lock(struct net *net)
104 {
105         struct sock *sk;
106
107         sk = this_cpu_read(ipv6_icmp_sk);
108         if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
109                 /* This can happen if the output path (f.e. SIT or
110                  * ip6ip6 tunnel) signals dst_link_failure() for an
111                  * outgoing ICMP6 packet.
112                  */
113                 return NULL;
114         }
115         sock_net_set(sk, net);
116         return sk;
117 }
118
119 static void icmpv6_xmit_unlock(struct sock *sk)
120 {
121         sock_net_set(sk, &init_net);
122         spin_unlock(&sk->sk_lock.slock);
123 }
124
125 /*
126  * Figure out, may we reply to this packet with icmp error.
127  *
128  * We do not reply, if:
129  *      - it was icmp error message.
130  *      - it is truncated, so that it is known, that protocol is ICMPV6
131  *        (i.e. in the middle of some exthdr)
132  *
133  *      --ANK (980726)
134  */
135
136 static bool is_ineligible(const struct sk_buff *skb)
137 {
138         int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
139         int len = skb->len - ptr;
140         __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141         __be16 frag_off;
142
143         if (len < 0)
144                 return true;
145
146         ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
147         if (ptr < 0)
148                 return false;
149         if (nexthdr == IPPROTO_ICMPV6) {
150                 u8 _type, *tp;
151                 tp = skb_header_pointer(skb,
152                         ptr+offsetof(struct icmp6hdr, icmp6_type),
153                         sizeof(_type), &_type);
154
155                 /* Based on RFC 8200, Section 4.5 Fragment Header, return
156                  * false if this is a fragment packet with no icmp header info.
157                  */
158                 if (!tp && frag_off != 0)
159                         return false;
160                 else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161                         return true;
162         }
163         return false;
164 }
165
166 static bool icmpv6_mask_allow(struct net *net, int type)
167 {
168         if (type > ICMPV6_MSG_MAX)
169                 return true;
170
171         /* Limit if icmp type is set in ratemask. */
172         if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
173                 return true;
174
175         return false;
176 }
177
178 static bool icmpv6_global_allow(struct net *net, int type,
179                                 bool *apply_ratelimit)
180 {
181         if (icmpv6_mask_allow(net, type))
182                 return true;
183
184         if (icmp_global_allow(net)) {
185                 *apply_ratelimit = true;
186                 return true;
187         }
188         __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
189         return false;
190 }
191
192 /*
193  * Check the ICMP output rate limit
194  */
195 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
196                                struct flowi6 *fl6, bool apply_ratelimit)
197 {
198         struct net *net = sock_net(sk);
199         struct dst_entry *dst;
200         bool res = false;
201
202         if (!apply_ratelimit)
203                 return true;
204
205         /*
206          * Look up the output route.
207          * XXX: perhaps the expire for routing entries cloned by
208          * this lookup should be more aggressive (not longer than timeout).
209          */
210         dst = ip6_route_output(net, sk, fl6);
211         if (dst->error) {
212                 IP6_INC_STATS(net, ip6_dst_idev(dst),
213                               IPSTATS_MIB_OUTNOROUTES);
214         } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
215                 res = true;
216         } else {
217                 struct rt6_info *rt = dst_rt6_info(dst);
218                 int tmo = net->ipv6.sysctl.icmpv6_time;
219                 struct inet_peer *peer;
220
221                 /* Give more bandwidth to wider prefixes. */
222                 if (rt->rt6i_dst.plen < 128)
223                         tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
224
225                 rcu_read_lock();
226                 peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
227                 res = inet_peer_xrlim_allow(peer, tmo);
228                 rcu_read_unlock();
229         }
230         if (!res)
231                 __ICMP6_INC_STATS(net, ip6_dst_idev(dst),
232                                   ICMP6_MIB_RATELIMITHOST);
233         else
234                 icmp_global_consume(net);
235         dst_release(dst);
236         return res;
237 }
238
239 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
240                                   struct flowi6 *fl6)
241 {
242         struct net *net = sock_net(sk);
243         struct dst_entry *dst;
244         bool res = false;
245
246         dst = ip6_route_output(net, sk, fl6);
247         if (!dst->error) {
248                 struct rt6_info *rt = dst_rt6_info(dst);
249                 struct in6_addr prefsrc;
250
251                 rt6_get_prefsrc(rt, &prefsrc);
252                 res = !ipv6_addr_any(&prefsrc);
253         }
254         dst_release(dst);
255         return res;
256 }
257
258 /*
259  *      an inline helper for the "simple" if statement below
260  *      checks if parameter problem report is caused by an
261  *      unrecognized IPv6 option that has the Option Type
262  *      highest-order two bits set to 10
263  */
264
265 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
266 {
267         u8 _optval, *op;
268
269         offset += skb_network_offset(skb);
270         op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
271         if (!op)
272                 return true;
273         return (*op & 0xC0) == 0x80;
274 }
275
276 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
277                                 struct icmp6hdr *thdr, int len)
278 {
279         struct sk_buff *skb;
280         struct icmp6hdr *icmp6h;
281
282         skb = skb_peek(&sk->sk_write_queue);
283         if (!skb)
284                 return;
285
286         icmp6h = icmp6_hdr(skb);
287         memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
288         icmp6h->icmp6_cksum = 0;
289
290         if (skb_queue_len(&sk->sk_write_queue) == 1) {
291                 skb->csum = csum_partial(icmp6h,
292                                         sizeof(struct icmp6hdr), skb->csum);
293                 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
294                                                       &fl6->daddr,
295                                                       len, fl6->flowi6_proto,
296                                                       skb->csum);
297         } else {
298                 __wsum tmp_csum = 0;
299
300                 skb_queue_walk(&sk->sk_write_queue, skb) {
301                         tmp_csum = csum_add(tmp_csum, skb->csum);
302                 }
303
304                 tmp_csum = csum_partial(icmp6h,
305                                         sizeof(struct icmp6hdr), tmp_csum);
306                 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
307                                                       &fl6->daddr,
308                                                       len, fl6->flowi6_proto,
309                                                       tmp_csum);
310         }
311         ip6_push_pending_frames(sk);
312 }
313
314 struct icmpv6_msg {
315         struct sk_buff  *skb;
316         int             offset;
317         uint8_t         type;
318 };
319
320 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
321 {
322         struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
323         struct sk_buff *org_skb = msg->skb;
324         __wsum csum;
325
326         csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
327                                       to, len);
328         skb->csum = csum_block_add(skb->csum, csum, odd);
329         if (!(msg->type & ICMPV6_INFOMSG_MASK))
330                 nf_ct_attach(skb, org_skb);
331         return 0;
332 }
333
334 #if IS_ENABLED(CONFIG_IPV6_MIP6)
335 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
336 {
337         struct ipv6hdr *iph = ipv6_hdr(skb);
338         struct ipv6_destopt_hao *hao;
339         int off;
340
341         if (opt->dsthao) {
342                 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
343                 if (likely(off >= 0)) {
344                         hao = (struct ipv6_destopt_hao *)
345                                         (skb_network_header(skb) + off);
346                         swap(iph->saddr, hao->addr);
347                 }
348         }
349 }
350 #else
351 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
352 #endif
353
354 static struct dst_entry *icmpv6_route_lookup(struct net *net,
355                                              struct sk_buff *skb,
356                                              struct sock *sk,
357                                              struct flowi6 *fl6)
358 {
359         struct dst_entry *dst, *dst2;
360         struct flowi6 fl2;
361         int err;
362
363         err = ip6_dst_lookup(net, sk, &dst, fl6);
364         if (err)
365                 return ERR_PTR(err);
366
367         /*
368          * We won't send icmp if the destination is known
369          * anycast unless we need to treat anycast as unicast.
370          */
371         if (!READ_ONCE(net->ipv6.sysctl.icmpv6_error_anycast_as_unicast) &&
372             ipv6_anycast_destination(dst, &fl6->daddr)) {
373                 net_dbg_ratelimited("icmp6_send: acast source\n");
374                 dst_release(dst);
375                 return ERR_PTR(-EINVAL);
376         }
377
378         /* No need to clone since we're just using its address. */
379         dst2 = dst;
380
381         dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
382         if (!IS_ERR(dst)) {
383                 if (dst != dst2)
384                         return dst;
385         } else {
386                 if (PTR_ERR(dst) == -EPERM)
387                         dst = NULL;
388                 else
389                         return dst;
390         }
391
392         err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6);
393         if (err)
394                 goto relookup_failed;
395
396         err = ip6_dst_lookup(net, sk, &dst2, &fl2);
397         if (err)
398                 goto relookup_failed;
399
400         dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
401         if (!IS_ERR(dst2)) {
402                 dst_release(dst);
403                 dst = dst2;
404         } else {
405                 err = PTR_ERR(dst2);
406                 if (err == -EPERM) {
407                         dst_release(dst);
408                         return dst2;
409                 } else
410                         goto relookup_failed;
411         }
412
413 relookup_failed:
414         if (dst)
415                 return dst;
416         return ERR_PTR(err);
417 }
418
419 static struct net_device *icmp6_dev(const struct sk_buff *skb)
420 {
421         struct net_device *dev = skb->dev;
422
423         /* for local traffic to local address, skb dev is the loopback
424          * device. Check if there is a dst attached to the skb and if so
425          * get the real device index. Same is needed for replies to a link
426          * local address on a device enslaved to an L3 master device
427          */
428         if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
429                 const struct rt6_info *rt6 = skb_rt6_info(skb);
430
431                 /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
432                  * and ip6_null_entry could be set to skb if no route is found.
433                  */
434                 if (rt6 && rt6->rt6i_idev)
435                         dev = rt6->rt6i_idev->dev;
436         }
437
438         return dev;
439 }
440
441 static int icmp6_iif(const struct sk_buff *skb)
442 {
443         return icmp6_dev(skb)->ifindex;
444 }
445
446 /*
447  *      Send an ICMP message in response to a packet in error
448  */
449 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
450                 const struct in6_addr *force_saddr,
451                 const struct inet6_skb_parm *parm)
452 {
453         struct inet6_dev *idev = NULL;
454         struct ipv6hdr *hdr = ipv6_hdr(skb);
455         struct sock *sk;
456         struct net *net;
457         struct ipv6_pinfo *np;
458         const struct in6_addr *saddr = NULL;
459         bool apply_ratelimit = false;
460         struct dst_entry *dst;
461         struct icmp6hdr tmp_hdr;
462         struct flowi6 fl6;
463         struct icmpv6_msg msg;
464         struct ipcm6_cookie ipc6;
465         int iif = 0;
466         int addr_type = 0;
467         int len;
468         u32 mark;
469
470         if ((u8 *)hdr < skb->head ||
471             (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
472                 return;
473
474         if (!skb->dev)
475                 return;
476
477         rcu_read_lock();
478
479         net = dev_net_rcu(skb->dev);
480         mark = IP6_REPLY_MARK(net, skb->mark);
481         /*
482          *      Make sure we respect the rules
483          *      i.e. RFC 1885 2.4(e)
484          *      Rule (e.1) is enforced by not using icmp6_send
485          *      in any code that processes icmp errors.
486          */
487         addr_type = ipv6_addr_type(&hdr->daddr);
488
489         if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
490             ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
491                 saddr = &hdr->daddr;
492
493         /*
494          *      Dest addr check
495          */
496
497         if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
498                 if (type != ICMPV6_PKT_TOOBIG &&
499                     !(type == ICMPV6_PARAMPROB &&
500                       code == ICMPV6_UNK_OPTION &&
501                       (opt_unrec(skb, info))))
502                         goto out;
503
504                 saddr = NULL;
505         }
506
507         addr_type = ipv6_addr_type(&hdr->saddr);
508
509         /*
510          *      Source addr check
511          */
512
513         if (__ipv6_addr_needs_scope_id(addr_type)) {
514                 iif = icmp6_iif(skb);
515         } else {
516                 /*
517                  * The source device is used for looking up which routing table
518                  * to use for sending an ICMP error.
519                  */
520                 iif = l3mdev_master_ifindex(skb->dev);
521         }
522
523         /*
524          *      Must not send error if the source does not uniquely
525          *      identify a single node (RFC2463 Section 2.4).
526          *      We check unspecified / multicast addresses here,
527          *      and anycast addresses will be checked later.
528          */
529         if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
530                 net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
531                                     &hdr->saddr, &hdr->daddr);
532                 goto out;
533         }
534
535         /*
536          *      Never answer to a ICMP packet.
537          */
538         if (is_ineligible(skb)) {
539                 net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
540                                     &hdr->saddr, &hdr->daddr);
541                 goto out;
542         }
543
544         /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
545         local_bh_disable();
546
547         /* Check global sysctl_icmp_msgs_per_sec ratelimit */
548         if (!(skb->dev->flags & IFF_LOOPBACK) &&
549             !icmpv6_global_allow(net, type, &apply_ratelimit))
550                 goto out_bh_enable;
551
552         mip6_addr_swap(skb, parm);
553
554         sk = icmpv6_xmit_lock(net);
555         if (!sk)
556                 goto out_bh_enable;
557
558         memset(&fl6, 0, sizeof(fl6));
559         fl6.flowi6_proto = IPPROTO_ICMPV6;
560         fl6.daddr = hdr->saddr;
561         if (force_saddr)
562                 saddr = force_saddr;
563         if (saddr) {
564                 fl6.saddr = *saddr;
565         } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
566                 /* select a more meaningful saddr from input if */
567                 struct net_device *in_netdev;
568
569                 in_netdev = dev_get_by_index(net, parm->iif);
570                 if (in_netdev) {
571                         ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
572                                            inet6_sk(sk)->srcprefs,
573                                            &fl6.saddr);
574                         dev_put(in_netdev);
575                 }
576         }
577         fl6.flowi6_mark = mark;
578         fl6.flowi6_oif = iif;
579         fl6.fl6_icmp_type = type;
580         fl6.fl6_icmp_code = code;
581         fl6.flowi6_uid = sock_net_uid(net, NULL);
582         fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
583         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
584
585         np = inet6_sk(sk);
586
587         if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
588                 goto out_unlock;
589
590         tmp_hdr.icmp6_type = type;
591         tmp_hdr.icmp6_code = code;
592         tmp_hdr.icmp6_cksum = 0;
593         tmp_hdr.icmp6_pointer = htonl(info);
594
595         if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
596                 fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
597         else if (!fl6.flowi6_oif)
598                 fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
599
600         ipcm6_init_sk(&ipc6, sk);
601         ipc6.sockc.mark = mark;
602         fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
603
604         dst = icmpv6_route_lookup(net, skb, sk, &fl6);
605         if (IS_ERR(dst))
606                 goto out_unlock;
607
608         ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
609
610         msg.skb = skb;
611         msg.offset = skb_network_offset(skb);
612         msg.type = type;
613
614         len = skb->len - msg.offset;
615         len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
616         if (len < 0) {
617                 net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
618                                     &hdr->saddr, &hdr->daddr);
619                 goto out_dst_release;
620         }
621
622         idev = __in6_dev_get(skb->dev);
623
624         if (ip6_append_data(sk, icmpv6_getfrag, &msg,
625                             len + sizeof(struct icmp6hdr),
626                             sizeof(struct icmp6hdr),
627                             &ipc6, &fl6, dst_rt6_info(dst),
628                             MSG_DONTWAIT)) {
629                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
630                 ip6_flush_pending_frames(sk);
631         } else {
632                 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
633                                            len + sizeof(struct icmp6hdr));
634         }
635
636 out_dst_release:
637         dst_release(dst);
638 out_unlock:
639         icmpv6_xmit_unlock(sk);
640 out_bh_enable:
641         local_bh_enable();
642 out:
643         rcu_read_unlock();
644 }
645 EXPORT_SYMBOL(icmp6_send);
646
647 /* Slightly more convenient version of icmp6_send with drop reasons.
648  */
649 void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
650                               enum skb_drop_reason reason)
651 {
652         icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
653         kfree_skb_reason(skb, reason);
654 }
655
656 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
657  * if sufficient data bytes are available
658  * @nhs is the size of the tunnel header(s) :
659  *  Either an IPv4 header for SIT encap
660  *         an IPv4 header + GRE header for GRE encap
661  */
662 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
663                                unsigned int data_len)
664 {
665         struct in6_addr temp_saddr;
666         struct rt6_info *rt;
667         struct sk_buff *skb2;
668         u32 info = 0;
669
670         if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
671                 return 1;
672
673         /* RFC 4884 (partial) support for ICMP extensions */
674         if (data_len < 128 || (data_len & 7) || skb->len < data_len)
675                 data_len = 0;
676
677         skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
678
679         if (!skb2)
680                 return 1;
681
682         skb_dst_drop(skb2);
683         skb_pull(skb2, nhs);
684         skb_reset_network_header(skb2);
685
686         rt = rt6_lookup(dev_net_rcu(skb->dev), &ipv6_hdr(skb2)->saddr,
687                         NULL, 0, skb, 0);
688
689         if (rt && rt->dst.dev)
690                 skb2->dev = rt->dst.dev;
691
692         ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
693
694         if (data_len) {
695                 /* RFC 4884 (partial) support :
696                  * insert 0 padding at the end, before the extensions
697                  */
698                 __skb_push(skb2, nhs);
699                 skb_reset_network_header(skb2);
700                 memmove(skb2->data, skb2->data + nhs, data_len - nhs);
701                 memset(skb2->data + data_len - nhs, 0, nhs);
702                 /* RFC 4884 4.5 : Length is measured in 64-bit words,
703                  * and stored in reserved[0]
704                  */
705                 info = (data_len/8) << 24;
706         }
707         if (type == ICMP_TIME_EXCEEDED)
708                 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
709                            info, &temp_saddr, IP6CB(skb2));
710         else
711                 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
712                            info, &temp_saddr, IP6CB(skb2));
713         if (rt)
714                 ip6_rt_put(rt);
715
716         kfree_skb(skb2);
717
718         return 0;
719 }
720 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
721
722 static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
723 {
724         struct net *net = dev_net_rcu(skb->dev);
725         struct sock *sk;
726         struct inet6_dev *idev;
727         struct ipv6_pinfo *np;
728         const struct in6_addr *saddr = NULL;
729         struct icmp6hdr *icmph = icmp6_hdr(skb);
730         bool apply_ratelimit = false;
731         struct icmp6hdr tmp_hdr;
732         struct flowi6 fl6;
733         struct icmpv6_msg msg;
734         struct dst_entry *dst;
735         struct ipcm6_cookie ipc6;
736         u32 mark = IP6_REPLY_MARK(net, skb->mark);
737         SKB_DR(reason);
738         bool acast;
739         u8 type;
740
741         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
742             net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
743                 return reason;
744
745         saddr = &ipv6_hdr(skb)->daddr;
746
747         acast = ipv6_anycast_destination(skb_dst(skb), saddr);
748         if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
749                 return reason;
750
751         if (!ipv6_unicast_destination(skb) &&
752             !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
753                 saddr = NULL;
754
755         if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
756                 type = ICMPV6_EXT_ECHO_REPLY;
757         else
758                 type = ICMPV6_ECHO_REPLY;
759
760         memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
761         tmp_hdr.icmp6_type = type;
762
763         memset(&fl6, 0, sizeof(fl6));
764         if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
765                 fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
766
767         fl6.flowi6_proto = IPPROTO_ICMPV6;
768         fl6.daddr = ipv6_hdr(skb)->saddr;
769         if (saddr)
770                 fl6.saddr = *saddr;
771         fl6.flowi6_oif = icmp6_iif(skb);
772         fl6.fl6_icmp_type = type;
773         fl6.flowi6_mark = mark;
774         fl6.flowi6_uid = sock_net_uid(net, NULL);
775         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
776
777         local_bh_disable();
778         sk = icmpv6_xmit_lock(net);
779         if (!sk)
780                 goto out_bh_enable;
781         np = inet6_sk(sk);
782
783         if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
784                 fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
785         else if (!fl6.flowi6_oif)
786                 fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
787
788         if (ip6_dst_lookup(net, sk, &dst, &fl6))
789                 goto out;
790         dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
791         if (IS_ERR(dst))
792                 goto out;
793
794         /* Check the ratelimit */
795         if ((!(skb->dev->flags & IFF_LOOPBACK) &&
796             !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) ||
797             !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit))
798                 goto out_dst_release;
799
800         idev = __in6_dev_get(skb->dev);
801
802         msg.skb = skb;
803         msg.offset = 0;
804         msg.type = type;
805
806         ipcm6_init_sk(&ipc6, sk);
807         ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
808         ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
809         ipc6.sockc.mark = mark;
810
811         if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
812                 if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
813                         goto out_dst_release;
814
815         if (ip6_append_data(sk, icmpv6_getfrag, &msg,
816                             skb->len + sizeof(struct icmp6hdr),
817                             sizeof(struct icmp6hdr), &ipc6, &fl6,
818                             dst_rt6_info(dst), MSG_DONTWAIT)) {
819                 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
820                 ip6_flush_pending_frames(sk);
821         } else {
822                 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
823                                            skb->len + sizeof(struct icmp6hdr));
824                 reason = SKB_CONSUMED;
825         }
826 out_dst_release:
827         dst_release(dst);
828 out:
829         icmpv6_xmit_unlock(sk);
830 out_bh_enable:
831         local_bh_enable();
832         return reason;
833 }
834
835 enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
836                                    u8 code, __be32 info)
837 {
838         struct inet6_skb_parm *opt = IP6CB(skb);
839         struct net *net = dev_net_rcu(skb->dev);
840         const struct inet6_protocol *ipprot;
841         enum skb_drop_reason reason;
842         int inner_offset;
843         __be16 frag_off;
844         u8 nexthdr;
845
846         reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr));
847         if (reason != SKB_NOT_DROPPED_YET)
848                 goto out;
849
850         seg6_icmp_srh(skb, opt);
851
852         nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
853         if (ipv6_ext_hdr(nexthdr)) {
854                 /* now skip over extension headers */
855                 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
856                                                 &nexthdr, &frag_off);
857                 if (inner_offset < 0) {
858                         SKB_DR_SET(reason, IPV6_BAD_EXTHDR);
859                         goto out;
860                 }
861         } else {
862                 inner_offset = sizeof(struct ipv6hdr);
863         }
864
865         /* Checkin header including 8 bytes of inner protocol header. */
866         reason = pskb_may_pull_reason(skb, inner_offset + 8);
867         if (reason != SKB_NOT_DROPPED_YET)
868                 goto out;
869
870         /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
871            Without this we will not able f.e. to make source routed
872            pmtu discovery.
873            Corresponding argument (opt) to notifiers is already added.
874            --ANK (980726)
875          */
876
877         ipprot = rcu_dereference(inet6_protos[nexthdr]);
878         if (ipprot && ipprot->err_handler)
879                 ipprot->err_handler(skb, opt, type, code, inner_offset, info);
880
881         raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
882         return SKB_CONSUMED;
883
884 out:
885         __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
886         return reason;
887 }
888
889 /*
890  *      Handle icmp messages
891  */
892
893 static int icmpv6_rcv(struct sk_buff *skb)
894 {
895         enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
896         struct net *net = dev_net_rcu(skb->dev);
897         struct net_device *dev = icmp6_dev(skb);
898         struct inet6_dev *idev = __in6_dev_get(dev);
899         const struct in6_addr *saddr, *daddr;
900         struct icmp6hdr *hdr;
901         u8 type;
902
903         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
904                 struct sec_path *sp = skb_sec_path(skb);
905                 int nh;
906
907                 if (!(sp && sp->xvec[sp->len - 1]->props.flags &
908                                  XFRM_STATE_ICMP)) {
909                         reason = SKB_DROP_REASON_XFRM_POLICY;
910                         goto drop_no_count;
911                 }
912
913                 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
914                         goto drop_no_count;
915
916                 nh = skb_network_offset(skb);
917                 skb_set_network_header(skb, sizeof(*hdr));
918
919                 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
920                                                 skb)) {
921                         reason = SKB_DROP_REASON_XFRM_POLICY;
922                         goto drop_no_count;
923                 }
924
925                 skb_set_network_header(skb, nh);
926         }
927
928         __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS);
929
930         saddr = &ipv6_hdr(skb)->saddr;
931         daddr = &ipv6_hdr(skb)->daddr;
932
933         if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
934                 net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
935                                     saddr, daddr);
936                 goto csum_error;
937         }
938
939         if (!pskb_pull(skb, sizeof(*hdr)))
940                 goto discard_it;
941
942         hdr = icmp6_hdr(skb);
943
944         type = hdr->icmp6_type;
945
946         ICMP6MSGIN_INC_STATS(dev_net_rcu(dev), idev, type);
947
948         switch (type) {
949         case ICMPV6_ECHO_REQUEST:
950                 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
951                         reason = icmpv6_echo_reply(skb);
952                 break;
953         case ICMPV6_EXT_ECHO_REQUEST:
954                 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
955                     READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
956                         reason = icmpv6_echo_reply(skb);
957                 break;
958
959         case ICMPV6_ECHO_REPLY:
960         case ICMPV6_EXT_ECHO_REPLY:
961                 ping_rcv(skb);
962                 return 0;
963
964         case ICMPV6_PKT_TOOBIG:
965                 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
966                    standard destination cache. Seems, only "advanced"
967                    destination cache will allow to solve this problem
968                    --ANK (980726)
969                  */
970                 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
971                         goto discard_it;
972                 hdr = icmp6_hdr(skb);
973
974                 /* to notify */
975                 fallthrough;
976         case ICMPV6_DEST_UNREACH:
977         case ICMPV6_TIME_EXCEED:
978         case ICMPV6_PARAMPROB:
979                 reason = icmpv6_notify(skb, type, hdr->icmp6_code,
980                                        hdr->icmp6_mtu);
981                 break;
982
983         case NDISC_ROUTER_SOLICITATION:
984         case NDISC_ROUTER_ADVERTISEMENT:
985         case NDISC_NEIGHBOUR_SOLICITATION:
986         case NDISC_NEIGHBOUR_ADVERTISEMENT:
987         case NDISC_REDIRECT:
988                 reason = ndisc_rcv(skb);
989                 break;
990
991         case ICMPV6_MGM_QUERY:
992                 igmp6_event_query(skb);
993                 return 0;
994
995         case ICMPV6_MGM_REPORT:
996                 igmp6_event_report(skb);
997                 return 0;
998
999         case ICMPV6_MGM_REDUCTION:
1000         case ICMPV6_NI_QUERY:
1001         case ICMPV6_NI_REPLY:
1002         case ICMPV6_MLD2_REPORT:
1003         case ICMPV6_DHAAD_REQUEST:
1004         case ICMPV6_DHAAD_REPLY:
1005         case ICMPV6_MOBILE_PREFIX_SOL:
1006         case ICMPV6_MOBILE_PREFIX_ADV:
1007                 break;
1008
1009         default:
1010                 /* informational */
1011                 if (type & ICMPV6_INFOMSG_MASK)
1012                         break;
1013
1014                 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
1015                                     saddr, daddr);
1016
1017                 /*
1018                  * error of unknown type.
1019                  * must pass to upper level
1020                  */
1021
1022                 reason = icmpv6_notify(skb, type, hdr->icmp6_code,
1023                                        hdr->icmp6_mtu);
1024         }
1025
1026         /* until the v6 path can be better sorted assume failure and
1027          * preserve the status quo behaviour for the rest of the paths to here
1028          */
1029         if (reason)
1030                 kfree_skb_reason(skb, reason);
1031         else
1032                 consume_skb(skb);
1033
1034         return 0;
1035
1036 csum_error:
1037         reason = SKB_DROP_REASON_ICMP_CSUM;
1038         __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_CSUMERRORS);
1039 discard_it:
1040         __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INERRORS);
1041 drop_no_count:
1042         kfree_skb_reason(skb, reason);
1043         return 0;
1044 }
1045
1046 void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type,
1047                       const struct in6_addr *saddr,
1048                       const struct in6_addr *daddr, int oif)
1049 {
1050         memset(fl6, 0, sizeof(*fl6));
1051         fl6->saddr = *saddr;
1052         fl6->daddr = *daddr;
1053         fl6->flowi6_proto       = IPPROTO_ICMPV6;
1054         fl6->fl6_icmp_type      = type;
1055         fl6->fl6_icmp_code      = 0;
1056         fl6->flowi6_oif         = oif;
1057         security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1058 }
1059
1060 int __init icmpv6_init(void)
1061 {
1062         struct sock *sk;
1063         int err, i;
1064
1065         for_each_possible_cpu(i) {
1066                 err = inet_ctl_sock_create(&sk, PF_INET6,
1067                                            SOCK_RAW, IPPROTO_ICMPV6, &init_net);
1068                 if (err < 0) {
1069                         pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1070                                err);
1071                         return err;
1072                 }
1073
1074                 per_cpu(ipv6_icmp_sk, i) = sk;
1075
1076                 /* Enough space for 2 64K ICMP packets, including
1077                  * sk_buff struct overhead.
1078                  */
1079                 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1080         }
1081
1082         err = -EAGAIN;
1083         if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1084                 goto fail;
1085
1086         err = inet6_register_icmp_sender(icmp6_send);
1087         if (err)
1088                 goto sender_reg_err;
1089         return 0;
1090
1091 sender_reg_err:
1092         inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1093 fail:
1094         pr_err("Failed to register ICMP6 protocol\n");
1095         return err;
1096 }
1097
1098 void icmpv6_cleanup(void)
1099 {
1100         inet6_unregister_icmp_sender(icmp6_send);
1101         inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1102 }
1103
1104
1105 static const struct icmp6_err {
1106         int err;
1107         int fatal;
1108 } tab_unreach[] = {
1109         {       /* NOROUTE */
1110                 .err    = ENETUNREACH,
1111                 .fatal  = 0,
1112         },
1113         {       /* ADM_PROHIBITED */
1114                 .err    = EACCES,
1115                 .fatal  = 1,
1116         },
1117         {       /* Was NOT_NEIGHBOUR, now reserved */
1118                 .err    = EHOSTUNREACH,
1119                 .fatal  = 0,
1120         },
1121         {       /* ADDR_UNREACH */
1122                 .err    = EHOSTUNREACH,
1123                 .fatal  = 0,
1124         },
1125         {       /* PORT_UNREACH */
1126                 .err    = ECONNREFUSED,
1127                 .fatal  = 1,
1128         },
1129         {       /* POLICY_FAIL */
1130                 .err    = EACCES,
1131                 .fatal  = 1,
1132         },
1133         {       /* REJECT_ROUTE */
1134                 .err    = EACCES,
1135                 .fatal  = 1,
1136         },
1137 };
1138
1139 int icmpv6_err_convert(u8 type, u8 code, int *err)
1140 {
1141         int fatal = 0;
1142
1143         *err = EPROTO;
1144
1145         switch (type) {
1146         case ICMPV6_DEST_UNREACH:
1147                 fatal = 1;
1148                 if (code < ARRAY_SIZE(tab_unreach)) {
1149                         *err  = tab_unreach[code].err;
1150                         fatal = tab_unreach[code].fatal;
1151                 }
1152                 break;
1153
1154         case ICMPV6_PKT_TOOBIG:
1155                 *err = EMSGSIZE;
1156                 break;
1157
1158         case ICMPV6_PARAMPROB:
1159                 *err = EPROTO;
1160                 fatal = 1;
1161                 break;
1162
1163         case ICMPV6_TIME_EXCEED:
1164                 *err = EHOSTUNREACH;
1165                 break;
1166         }
1167
1168         return fatal;
1169 }
1170 EXPORT_SYMBOL(icmpv6_err_convert);
1171
1172 #ifdef CONFIG_SYSCTL
1173 static struct ctl_table ipv6_icmp_table_template[] = {
1174         {
1175                 .procname       = "ratelimit",
1176                 .data           = &init_net.ipv6.sysctl.icmpv6_time,
1177                 .maxlen         = sizeof(int),
1178                 .mode           = 0644,
1179                 .proc_handler   = proc_dointvec_ms_jiffies,
1180         },
1181         {
1182                 .procname       = "echo_ignore_all",
1183                 .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1184                 .maxlen         = sizeof(u8),
1185                 .mode           = 0644,
1186                 .proc_handler = proc_dou8vec_minmax,
1187         },
1188         {
1189                 .procname       = "echo_ignore_multicast",
1190                 .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1191                 .maxlen         = sizeof(u8),
1192                 .mode           = 0644,
1193                 .proc_handler = proc_dou8vec_minmax,
1194         },
1195         {
1196                 .procname       = "echo_ignore_anycast",
1197                 .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1198                 .maxlen         = sizeof(u8),
1199                 .mode           = 0644,
1200                 .proc_handler = proc_dou8vec_minmax,
1201         },
1202         {
1203                 .procname       = "ratemask",
1204                 .data           = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1205                 .maxlen         = ICMPV6_MSG_MAX + 1,
1206                 .mode           = 0644,
1207                 .proc_handler = proc_do_large_bitmap,
1208         },
1209         {
1210                 .procname       = "error_anycast_as_unicast",
1211                 .data           = &init_net.ipv6.sysctl.icmpv6_error_anycast_as_unicast,
1212                 .maxlen         = sizeof(u8),
1213                 .mode           = 0644,
1214                 .proc_handler   = proc_dou8vec_minmax,
1215                 .extra1         = SYSCTL_ZERO,
1216                 .extra2         = SYSCTL_ONE,
1217         },
1218 };
1219
1220 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1221 {
1222         struct ctl_table *table;
1223
1224         table = kmemdup(ipv6_icmp_table_template,
1225                         sizeof(ipv6_icmp_table_template),
1226                         GFP_KERNEL);
1227
1228         if (table) {
1229                 table[0].data = &net->ipv6.sysctl.icmpv6_time;
1230                 table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1231                 table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1232                 table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1233                 table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1234                 table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast;
1235         }
1236         return table;
1237 }
1238
1239 size_t ipv6_icmp_sysctl_table_size(void)
1240 {
1241         return ARRAY_SIZE(ipv6_icmp_table_template);
1242 }
1243 #endif