2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on net/ipv4/icmp.c
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
21 * Andi Kleen : exception handling
22 * Andi Kleen add rate limits. never reply to a icmp.
23 * add more length checks and other fixes.
24 * yoshfuji : ensure to sent parameter problem for
26 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
28 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
29 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
32 #include <linux/module.h>
33 #include <linux/errno.h>
34 #include <linux/types.h>
35 #include <linux/socket.h>
37 #include <linux/kernel.h>
38 #include <linux/sockios.h>
39 #include <linux/net.h>
40 #include <linux/skbuff.h>
41 #include <linux/init.h>
42 #include <linux/netfilter.h>
43 #include <linux/slab.h>
46 #include <linux/sysctl.h>
49 #include <linux/inet.h>
50 #include <linux/netdevice.h>
51 #include <linux/icmpv6.h>
57 #include <net/ip6_checksum.h>
58 #include <net/protocol.h>
60 #include <net/rawv6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
66 #include <net/inet_common.h>
68 #include <asm/uaccess.h>
69 #include <asm/system.h>
72 * The ICMP socket(s). This is the most convenient way to flow control
73 * our ICMP output as well as maintain a clean interface throughout
74 * all layers. All Socketless IP sends will soon be gone.
76 * On SMP we have one ICMP socket per-cpu.
78 static inline struct sock *icmpv6_sk(struct net *net)
80 return net->ipv6.icmp_sk[smp_processor_id()];
83 static int icmpv6_rcv(struct sk_buff *skb);
85 static const struct inet6_protocol icmpv6_protocol = {
86 .handler = icmpv6_rcv,
87 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
90 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
97 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
98 /* This can happen if the output path (f.e. SIT or
99 * ip6ip6 tunnel) signals dst_link_failure() for an
100 * outgoing ICMP6 packet.
108 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
110 spin_unlock_bh(&sk->sk_lock.slock);
114 * Slightly more convenient version of icmpv6_send.
116 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
118 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
123 * Figure out, may we reply to this packet with icmp error.
125 * We do not reply, if:
126 * - it was icmp error message.
127 * - it is truncated, so that it is known, that protocol is ICMPV6
128 * (i.e. in the middle of some exthdr)
133 static int is_ineligible(struct sk_buff *skb)
135 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
136 int len = skb->len - ptr;
137 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
142 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
145 if (nexthdr == IPPROTO_ICMPV6) {
147 tp = skb_header_pointer(skb,
148 ptr+offsetof(struct icmp6hdr, icmp6_type),
149 sizeof(_type), &_type);
151 !(*tp & ICMPV6_INFOMSG_MASK))
158 * Check the ICMP output rate limit
160 static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type,
163 struct dst_entry *dst;
164 struct net *net = sock_net(sk);
167 /* Informational messages are not limited. */
168 if (type & ICMPV6_INFOMSG_MASK)
171 /* Do not limit pmtu discovery, it would break it. */
172 if (type == ICMPV6_PKT_TOOBIG)
176 * Look up the output route.
177 * XXX: perhaps the expire for routing entries cloned by
178 * this lookup should be more aggressive (not longer than timeout).
180 dst = ip6_route_output(net, sk, fl);
182 IP6_INC_STATS(net, ip6_dst_idev(dst),
183 IPSTATS_MIB_OUTNOROUTES);
184 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
187 struct rt6_info *rt = (struct rt6_info *)dst;
188 int tmo = net->ipv6.sysctl.icmpv6_time;
190 /* Give more bandwidth to wider prefixes. */
191 if (rt->rt6i_dst.plen < 128)
192 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
194 res = xrlim_allow(dst, tmo);
201 * an inline helper for the "simple" if statement below
202 * checks if parameter problem report is caused by an
203 * unrecognized IPv6 option that has the Option Type
204 * highest-order two bits set to 10
207 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
211 offset += skb_network_offset(skb);
212 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
215 return (*op & 0xC0) == 0x80;
218 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
221 struct icmp6hdr *icmp6h;
224 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
227 icmp6h = icmp6_hdr(skb);
228 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
229 icmp6h->icmp6_cksum = 0;
231 if (skb_queue_len(&sk->sk_write_queue) == 1) {
232 skb->csum = csum_partial(icmp6h,
233 sizeof(struct icmp6hdr), skb->csum);
234 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
241 skb_queue_walk(&sk->sk_write_queue, skb) {
242 tmp_csum = csum_add(tmp_csum, skb->csum);
245 tmp_csum = csum_partial(icmp6h,
246 sizeof(struct icmp6hdr), tmp_csum);
247 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
252 ip6_push_pending_frames(sk);
263 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
265 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
266 struct sk_buff *org_skb = msg->skb;
269 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
271 skb->csum = csum_block_add(skb->csum, csum, odd);
272 if (!(msg->type & ICMPV6_INFOMSG_MASK))
273 nf_ct_attach(skb, org_skb);
277 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
278 static void mip6_addr_swap(struct sk_buff *skb)
280 struct ipv6hdr *iph = ipv6_hdr(skb);
281 struct inet6_skb_parm *opt = IP6CB(skb);
282 struct ipv6_destopt_hao *hao;
287 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
288 if (likely(off >= 0)) {
289 hao = (struct ipv6_destopt_hao *)
290 (skb_network_header(skb) + off);
291 ipv6_addr_copy(&tmp, &iph->saddr);
292 ipv6_addr_copy(&iph->saddr, &hao->addr);
293 ipv6_addr_copy(&hao->addr, &tmp);
298 static inline void mip6_addr_swap(struct sk_buff *skb) {}
302 * Send an ICMP message in response to a packet in error
304 void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
306 struct net *net = dev_net(skb->dev);
307 struct inet6_dev *idev = NULL;
308 struct ipv6hdr *hdr = ipv6_hdr(skb);
310 struct ipv6_pinfo *np;
311 struct in6_addr *saddr = NULL;
312 struct dst_entry *dst;
313 struct dst_entry *dst2;
314 struct icmp6hdr tmp_hdr;
317 struct icmpv6_msg msg;
324 if ((u8 *)hdr < skb->head ||
325 (skb->network_header + sizeof(*hdr)) > skb->tail)
329 * Make sure we respect the rules
330 * i.e. RFC 1885 2.4(e)
331 * Rule (e.1) is enforced by not using icmpv6_send
332 * in any code that processes icmp errors.
334 addr_type = ipv6_addr_type(&hdr->daddr);
336 if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
343 if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
344 if (type != ICMPV6_PKT_TOOBIG &&
345 !(type == ICMPV6_PARAMPROB &&
346 code == ICMPV6_UNK_OPTION &&
347 (opt_unrec(skb, info))))
353 addr_type = ipv6_addr_type(&hdr->saddr);
359 if (addr_type & IPV6_ADDR_LINKLOCAL)
360 iif = skb->dev->ifindex;
363 * Must not send error if the source does not uniquely
364 * identify a single node (RFC2463 Section 2.4).
365 * We check unspecified / multicast addresses here,
366 * and anycast addresses will be checked later.
368 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
369 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
374 * Never answer to a ICMP packet.
376 if (is_ineligible(skb)) {
377 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
383 memset(&fl, 0, sizeof(fl));
384 fl.proto = IPPROTO_ICMPV6;
385 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
387 ipv6_addr_copy(&fl.fl6_src, saddr);
389 fl.fl_icmp_type = type;
390 fl.fl_icmp_code = code;
391 security_skb_classify_flow(skb, &fl);
393 sk = icmpv6_xmit_lock(net);
398 if (!icmpv6_xrlim_allow(sk, type, &fl))
401 tmp_hdr.icmp6_type = type;
402 tmp_hdr.icmp6_code = code;
403 tmp_hdr.icmp6_cksum = 0;
404 tmp_hdr.icmp6_pointer = htonl(info);
406 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
407 fl.oif = np->mcast_oif;
409 err = ip6_dst_lookup(sk, &dst, &fl);
414 * We won't send icmp if the destination is known
417 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
418 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
419 goto out_dst_release;
422 /* No need to clone since we're just using its address. */
425 err = xfrm_lookup(net, &dst, &fl, sk, 0);
438 if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6))
439 goto relookup_failed;
441 if (ip6_dst_lookup(sk, &dst2, &fl2))
442 goto relookup_failed;
444 err = xfrm_lookup(net, &dst2, &fl2, sk, XFRM_LOOKUP_ICMP);
451 goto out_dst_release;
460 if (ipv6_addr_is_multicast(&fl.fl6_dst))
461 hlimit = np->mcast_hops;
463 hlimit = np->hop_limit;
465 hlimit = ip6_dst_hoplimit(dst);
468 msg.offset = skb_network_offset(skb);
471 len = skb->len - msg.offset;
472 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
474 LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
475 goto out_dst_release;
478 idev = in6_dev_get(skb->dev);
480 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
481 len + sizeof(struct icmp6hdr),
482 sizeof(struct icmp6hdr), hlimit,
483 np->tclass, NULL, &fl, (struct rt6_info*)dst,
486 ip6_flush_pending_frames(sk);
489 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
492 if (likely(idev != NULL))
497 icmpv6_xmit_unlock(sk);
500 EXPORT_SYMBOL(icmpv6_send);
502 static void icmpv6_echo_reply(struct sk_buff *skb)
504 struct net *net = dev_net(skb->dev);
506 struct inet6_dev *idev;
507 struct ipv6_pinfo *np;
508 struct in6_addr *saddr = NULL;
509 struct icmp6hdr *icmph = icmp6_hdr(skb);
510 struct icmp6hdr tmp_hdr;
512 struct icmpv6_msg msg;
513 struct dst_entry *dst;
517 saddr = &ipv6_hdr(skb)->daddr;
519 if (!ipv6_unicast_destination(skb))
522 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
523 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
525 memset(&fl, 0, sizeof(fl));
526 fl.proto = IPPROTO_ICMPV6;
527 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
529 ipv6_addr_copy(&fl.fl6_src, saddr);
530 fl.oif = skb->dev->ifindex;
531 fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
532 security_skb_classify_flow(skb, &fl);
534 sk = icmpv6_xmit_lock(net);
539 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
540 fl.oif = np->mcast_oif;
542 err = ip6_dst_lookup(sk, &dst, &fl);
545 if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0)
548 if (ipv6_addr_is_multicast(&fl.fl6_dst))
549 hlimit = np->mcast_hops;
551 hlimit = np->hop_limit;
553 hlimit = ip6_dst_hoplimit(dst);
555 idev = in6_dev_get(skb->dev);
559 msg.type = ICMPV6_ECHO_REPLY;
561 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
562 sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl,
563 (struct rt6_info*)dst, MSG_DONTWAIT);
566 ip6_flush_pending_frames(sk);
569 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
572 if (likely(idev != NULL))
576 icmpv6_xmit_unlock(sk);
579 static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
581 const struct inet6_protocol *ipprot;
586 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
589 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
590 if (ipv6_ext_hdr(nexthdr)) {
591 /* now skip over extension headers */
592 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
596 inner_offset = sizeof(struct ipv6hdr);
599 /* Checkin header including 8 bytes of inner protocol header. */
600 if (!pskb_may_pull(skb, inner_offset+8))
603 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
604 Without this we will not able f.e. to make source routed
606 Corresponding argument (opt) to notifiers is already added.
610 hash = nexthdr & (MAX_INET_PROTOS - 1);
613 ipprot = rcu_dereference(inet6_protos[hash]);
614 if (ipprot && ipprot->err_handler)
615 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
618 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
622 * Handle icmp messages
625 static int icmpv6_rcv(struct sk_buff *skb)
627 struct net_device *dev = skb->dev;
628 struct inet6_dev *idev = __in6_dev_get(dev);
629 struct in6_addr *saddr, *daddr;
630 struct ipv6hdr *orig_hdr;
631 struct icmp6hdr *hdr;
634 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
635 struct sec_path *sp = skb_sec_path(skb);
638 if (!(sp && sp->xvec[sp->len - 1]->props.flags &
642 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
645 nh = skb_network_offset(skb);
646 skb_set_network_header(skb, sizeof(*hdr));
648 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
651 skb_set_network_header(skb, nh);
654 ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
656 saddr = &ipv6_hdr(skb)->saddr;
657 daddr = &ipv6_hdr(skb)->daddr;
659 /* Perform checksum. */
660 switch (skb->ip_summed) {
661 case CHECKSUM_COMPLETE:
662 if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
667 skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
669 if (__skb_checksum_complete(skb)) {
670 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
676 if (!pskb_pull(skb, sizeof(*hdr)))
679 hdr = icmp6_hdr(skb);
681 type = hdr->icmp6_type;
683 ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
686 case ICMPV6_ECHO_REQUEST:
687 icmpv6_echo_reply(skb);
690 case ICMPV6_ECHO_REPLY:
691 /* we couldn't care less */
694 case ICMPV6_PKT_TOOBIG:
695 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
696 standard destination cache. Seems, only "advanced"
697 destination cache will allow to solve this problem
700 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
702 hdr = icmp6_hdr(skb);
703 orig_hdr = (struct ipv6hdr *) (hdr + 1);
704 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
705 ntohl(hdr->icmp6_mtu));
708 * Drop through to notify
711 case ICMPV6_DEST_UNREACH:
712 case ICMPV6_TIME_EXCEED:
713 case ICMPV6_PARAMPROB:
714 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
717 case NDISC_ROUTER_SOLICITATION:
718 case NDISC_ROUTER_ADVERTISEMENT:
719 case NDISC_NEIGHBOUR_SOLICITATION:
720 case NDISC_NEIGHBOUR_ADVERTISEMENT:
725 case ICMPV6_MGM_QUERY:
726 igmp6_event_query(skb);
729 case ICMPV6_MGM_REPORT:
730 igmp6_event_report(skb);
733 case ICMPV6_MGM_REDUCTION:
734 case ICMPV6_NI_QUERY:
735 case ICMPV6_NI_REPLY:
736 case ICMPV6_MLD2_REPORT:
737 case ICMPV6_DHAAD_REQUEST:
738 case ICMPV6_DHAAD_REPLY:
739 case ICMPV6_MOBILE_PREFIX_SOL:
740 case ICMPV6_MOBILE_PREFIX_ADV:
744 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
747 if (type & ICMPV6_INFOMSG_MASK)
751 * error of unknown type.
752 * must pass to upper level
755 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
762 ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
768 void icmpv6_flow_init(struct sock *sk, struct flowi *fl,
770 const struct in6_addr *saddr,
771 const struct in6_addr *daddr,
774 memset(fl, 0, sizeof(*fl));
775 ipv6_addr_copy(&fl->fl6_src, saddr);
776 ipv6_addr_copy(&fl->fl6_dst, daddr);
777 fl->proto = IPPROTO_ICMPV6;
778 fl->fl_icmp_type = type;
779 fl->fl_icmp_code = 0;
781 security_sk_classify_flow(sk, fl);
785 * Special lock-class for __icmpv6_sk:
787 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
789 static int __net_init icmpv6_sk_init(struct net *net)
795 kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
796 if (net->ipv6.icmp_sk == NULL)
799 for_each_possible_cpu(i) {
800 err = inet_ctl_sock_create(&sk, PF_INET6,
801 SOCK_RAW, IPPROTO_ICMPV6, net);
804 "Failed to initialize the ICMP6 control socket "
810 net->ipv6.icmp_sk[i] = sk;
813 * Split off their lock-class, because sk->sk_dst_lock
814 * gets used from softirqs, which is safe for
815 * __icmpv6_sk (because those never get directly used
816 * via userspace syscalls), but unsafe for normal sockets.
818 lockdep_set_class(&sk->sk_dst_lock,
819 &icmpv6_socket_sk_dst_lock_key);
821 /* Enough space for 2 64K ICMP packets, including
822 * sk_buff struct overhead.
825 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
830 for (j = 0; j < i; j++)
831 inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
832 kfree(net->ipv6.icmp_sk);
836 static void __net_exit icmpv6_sk_exit(struct net *net)
840 for_each_possible_cpu(i) {
841 inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
843 kfree(net->ipv6.icmp_sk);
846 static struct pernet_operations icmpv6_sk_ops = {
847 .init = icmpv6_sk_init,
848 .exit = icmpv6_sk_exit,
851 int __init icmpv6_init(void)
855 err = register_pernet_subsys(&icmpv6_sk_ops);
860 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
865 printk(KERN_ERR "Failed to register ICMP6 protocol\n");
866 unregister_pernet_subsys(&icmpv6_sk_ops);
870 void icmpv6_cleanup(void)
872 unregister_pernet_subsys(&icmpv6_sk_ops);
873 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
877 static const struct icmp6_err {
885 { /* ADM_PROHIBITED */
889 { /* Was NOT_NEIGHBOUR, now reserved */
903 int icmpv6_err_convert(u8 type, u8 code, int *err)
910 case ICMPV6_DEST_UNREACH:
912 if (code <= ICMPV6_PORT_UNREACH) {
913 *err = tab_unreach[code].err;
914 fatal = tab_unreach[code].fatal;
918 case ICMPV6_PKT_TOOBIG:
922 case ICMPV6_PARAMPROB:
927 case ICMPV6_TIME_EXCEED:
935 EXPORT_SYMBOL(icmpv6_err_convert);
938 ctl_table ipv6_icmp_table_template[] = {
940 .procname = "ratelimit",
941 .data = &init_net.ipv6.sysctl.icmpv6_time,
942 .maxlen = sizeof(int),
944 .proc_handler = proc_dointvec_ms_jiffies,
949 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
951 struct ctl_table *table;
953 table = kmemdup(ipv6_icmp_table_template,
954 sizeof(ipv6_icmp_table_template),
958 table[0].data = &net->ipv6.sysctl.icmpv6_time;