1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Linux IPv6 multicast routing support for BSD pim6sd
4 * Based on net/ipv4/ipmr.c.
6 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
7 * LSIIT Laboratory, Strasbourg, France
8 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
10 * Copyright (C)2007,2008 USAGI/WIDE Project
11 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
14 #include <linux/uaccess.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/errno.h>
19 #include <linux/kernel.h>
20 #include <linux/fcntl.h>
21 #include <linux/stat.h>
22 #include <linux/socket.h>
23 #include <linux/inet.h>
24 #include <linux/netdevice.h>
25 #include <linux/inetdevice.h>
26 #include <linux/proc_fs.h>
27 #include <linux/seq_file.h>
28 #include <linux/init.h>
29 #include <linux/compat.h>
30 #include <linux/rhashtable.h>
31 #include <net/protocol.h>
32 #include <linux/skbuff.h>
34 #include <linux/notifier.h>
35 #include <linux/if_arp.h>
36 #include <net/checksum.h>
37 #include <net/netlink.h>
38 #include <net/fib_rules.h>
41 #include <net/ip6_route.h>
42 #include <linux/mroute6.h>
43 #include <linux/pim.h>
44 #include <net/addrconf.h>
45 #include <linux/netfilter_ipv6.h>
46 #include <linux/export.h>
47 #include <net/ip6_checksum.h>
48 #include <linux/netconf.h>
49 #include <net/ip_tunnels.h>
51 #include <linux/nospec.h>
54 struct fib_rule common;
61 /* Big lock, protecting vif table, mrt cache and mroute socket state.
62 Note that the changes are semaphored via rtnl_lock.
65 static DEFINE_SPINLOCK(mrt_lock);
67 static struct net_device *vif_dev_read(const struct vif_device *vif)
69 return rcu_dereference(vif->dev);
72 /* Multicast router control variables */
74 /* Special spinlock for queue of unresolved entries */
75 static DEFINE_SPINLOCK(mfc_unres_lock);
77 /* We return to original Alan's scheme. Hash table of resolved
78 entries is changed only in process context and protected
79 with weak lock mrt_lock. Queue of unresolved entries is protected
80 with strong spinlock mfc_unres_lock.
82 In this case data path is free of exclusive locks at all.
85 static struct kmem_cache *mrt_cachep __read_mostly;
87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
88 static void ip6mr_free_table(struct mr_table *mrt);
90 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
91 struct net_device *dev, struct sk_buff *skb,
92 struct mfc6_cache *cache);
93 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
94 mifi_t mifi, int assert);
95 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
97 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
98 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
99 struct netlink_ext_ack *extack);
100 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
101 struct netlink_callback *cb);
102 static void mroute_clean_tables(struct mr_table *mrt, int flags);
103 static void ipmr_expire_process(struct timer_list *t);
105 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
106 #define ip6mr_for_each_table(mrt, net) \
107 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
108 lockdep_rtnl_is_held() || \
109 list_empty(&net->ipv6.mr6_tables))
111 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
112 struct mr_table *mrt)
114 struct mr_table *ret;
117 ret = list_entry_rcu(net->ipv6.mr6_tables.next,
118 struct mr_table, list);
120 ret = list_entry_rcu(mrt->list.next,
121 struct mr_table, list);
123 if (&ret->list == &net->ipv6.mr6_tables)
128 static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
130 struct mr_table *mrt;
132 ip6mr_for_each_table(mrt, net) {
139 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
141 struct mr_table *mrt;
144 mrt = __ip6mr_get_table(net, id);
149 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
150 struct mr_table **mrt)
153 struct ip6mr_result res;
154 struct fib_lookup_arg arg = {
156 .flags = FIB_LOOKUP_NOREF,
159 /* update flow if oif or iif point to device enslaved to l3mdev */
160 l3mdev_update_flow(net, flowi6_to_flowi(flp6));
162 err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
163 flowi6_to_flowi(flp6), 0, &arg);
170 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
171 int flags, struct fib_lookup_arg *arg)
173 struct ip6mr_result *res = arg->result;
174 struct mr_table *mrt;
176 switch (rule->action) {
179 case FR_ACT_UNREACHABLE:
181 case FR_ACT_PROHIBIT:
183 case FR_ACT_BLACKHOLE:
188 arg->table = fib_rule_get_table(rule, arg);
190 mrt = __ip6mr_get_table(rule->fr_net, arg->table);
197 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
202 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
203 struct fib_rule_hdr *frh, struct nlattr **tb,
204 struct netlink_ext_ack *extack)
209 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
215 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
216 struct fib_rule_hdr *frh)
224 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
225 .family = RTNL_FAMILY_IP6MR,
226 .rule_size = sizeof(struct ip6mr_rule),
227 .addr_size = sizeof(struct in6_addr),
228 .action = ip6mr_rule_action,
229 .match = ip6mr_rule_match,
230 .configure = ip6mr_rule_configure,
231 .compare = ip6mr_rule_compare,
232 .fill = ip6mr_rule_fill,
233 .nlgroup = RTNLGRP_IPV6_RULE,
234 .owner = THIS_MODULE,
237 static int __net_init ip6mr_rules_init(struct net *net)
239 struct fib_rules_ops *ops;
240 struct mr_table *mrt;
243 ops = fib_rules_register(&ip6mr_rules_ops_template, net);
247 INIT_LIST_HEAD(&net->ipv6.mr6_tables);
249 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
255 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT);
259 net->ipv6.mr6_rules_ops = ops;
264 ip6mr_free_table(mrt);
267 fib_rules_unregister(ops);
271 static void __net_exit ip6mr_rules_exit(struct net *net)
273 struct mr_table *mrt, *next;
276 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
277 list_del(&mrt->list);
278 ip6mr_free_table(mrt);
280 fib_rules_unregister(net->ipv6.mr6_rules_ops);
283 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
284 struct netlink_ext_ack *extack)
286 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
289 static unsigned int ip6mr_rules_seq_read(const struct net *net)
291 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
294 bool ip6mr_rule_default(const struct fib_rule *rule)
296 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
297 rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
299 EXPORT_SYMBOL(ip6mr_rule_default);
301 #define ip6mr_for_each_table(mrt, net) \
302 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
304 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
305 struct mr_table *mrt)
308 return net->ipv6.mrt6;
312 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
314 return net->ipv6.mrt6;
317 #define __ip6mr_get_table ip6mr_get_table
319 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
320 struct mr_table **mrt)
322 *mrt = net->ipv6.mrt6;
326 static int __net_init ip6mr_rules_init(struct net *net)
328 struct mr_table *mrt;
330 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
333 net->ipv6.mrt6 = mrt;
337 static void __net_exit ip6mr_rules_exit(struct net *net)
340 ip6mr_free_table(net->ipv6.mrt6);
341 net->ipv6.mrt6 = NULL;
344 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
345 struct netlink_ext_ack *extack)
350 static unsigned int ip6mr_rules_seq_read(const struct net *net)
356 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
359 const struct mfc6_cache_cmp_arg *cmparg = arg->key;
360 struct mfc6_cache *c = (struct mfc6_cache *)ptr;
362 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
363 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
366 static const struct rhashtable_params ip6mr_rht_params = {
367 .head_offset = offsetof(struct mr_mfc, mnode),
368 .key_offset = offsetof(struct mfc6_cache, cmparg),
369 .key_len = sizeof(struct mfc6_cache_cmp_arg),
371 .obj_cmpfn = ip6mr_hash_cmp,
372 .automatic_shrinking = true,
375 static void ip6mr_new_table_set(struct mr_table *mrt,
378 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
379 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
383 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
384 .mf6c_origin = IN6ADDR_ANY_INIT,
385 .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
388 static struct mr_table_ops ip6mr_mr_table_ops = {
389 .rht_params = &ip6mr_rht_params,
390 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
393 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
395 struct mr_table *mrt;
397 mrt = __ip6mr_get_table(net, id);
401 return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
402 ipmr_expire_process, ip6mr_new_table_set);
405 static void ip6mr_free_table(struct mr_table *mrt)
407 struct net *net = read_pnet(&mrt->net);
409 WARN_ON_ONCE(!mr_can_free_table(net));
411 timer_shutdown_sync(&mrt->ipmr_expire_timer);
412 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
413 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
414 rhltable_destroy(&mrt->mfc_hash);
418 #ifdef CONFIG_PROC_FS
419 /* The /proc interfaces to multicast routing
420 * /proc/ip6_mr_cache /proc/ip6_mr_vif
423 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
426 struct mr_vif_iter *iter = seq->private;
427 struct net *net = seq_file_net(seq);
428 struct mr_table *mrt;
431 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
434 return ERR_PTR(-ENOENT);
439 return mr_vif_seq_start(seq, pos);
442 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
448 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
450 struct mr_vif_iter *iter = seq->private;
451 struct mr_table *mrt = iter->mrt;
453 if (v == SEQ_START_TOKEN) {
455 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
457 const struct vif_device *vif = v;
458 const struct net_device *vif_dev;
461 vif_dev = vif_dev_read(vif);
462 name = vif_dev ? vif_dev->name : "none";
465 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
466 vif - mrt->vif_table,
467 name, vif->bytes_in, vif->pkt_in,
468 vif->bytes_out, vif->pkt_out,
474 static const struct seq_operations ip6mr_vif_seq_ops = {
475 .start = ip6mr_vif_seq_start,
476 .next = mr_vif_seq_next,
477 .stop = ip6mr_vif_seq_stop,
478 .show = ip6mr_vif_seq_show,
481 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
483 struct net *net = seq_file_net(seq);
484 struct mr_table *mrt;
486 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
488 return ERR_PTR(-ENOENT);
490 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
493 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
497 if (v == SEQ_START_TOKEN) {
501 "Iif Pkts Bytes Wrong Oifs\n");
503 const struct mfc6_cache *mfc = v;
504 const struct mr_mfc_iter *it = seq->private;
505 struct mr_table *mrt = it->mrt;
507 seq_printf(seq, "%pI6 %pI6 %-3hd",
508 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
511 if (it->cache != &mrt->mfc_unres_queue) {
512 seq_printf(seq, " %8lu %8lu %8lu",
513 atomic_long_read(&mfc->_c.mfc_un.res.pkt),
514 atomic_long_read(&mfc->_c.mfc_un.res.bytes),
515 atomic_long_read(&mfc->_c.mfc_un.res.wrong_if));
516 for (n = mfc->_c.mfc_un.res.minvif;
517 n < mfc->_c.mfc_un.res.maxvif; n++) {
518 if (VIF_EXISTS(mrt, n) &&
519 mfc->_c.mfc_un.res.ttls[n] < 255)
522 mfc->_c.mfc_un.res.ttls[n]);
525 /* unresolved mfc_caches don't contain
526 * pkt, bytes and wrong_if values
528 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
535 static const struct seq_operations ipmr_mfc_seq_ops = {
536 .start = ipmr_mfc_seq_start,
537 .next = mr_mfc_seq_next,
538 .stop = mr_mfc_seq_stop,
539 .show = ipmr_mfc_seq_show,
543 #ifdef CONFIG_IPV6_PIMSM_V2
545 static int pim6_rcv(struct sk_buff *skb)
547 struct pimreghdr *pim;
548 struct ipv6hdr *encap;
549 struct net_device *reg_dev = NULL;
550 struct net *net = dev_net(skb->dev);
551 struct mr_table *mrt;
552 struct flowi6 fl6 = {
553 .flowi6_iif = skb->dev->ifindex,
554 .flowi6_mark = skb->mark,
558 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
561 pim = (struct pimreghdr *)skb_transport_header(skb);
562 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
563 (pim->flags & PIM_NULL_REGISTER) ||
564 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
565 sizeof(*pim), IPPROTO_PIM,
566 csum_partial((void *)pim, sizeof(*pim), 0)) &&
567 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
570 /* check if the inner packet is destined to mcast group */
571 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
574 if (!ipv6_addr_is_multicast(&encap->daddr) ||
575 encap->payload_len == 0 ||
576 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
579 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
582 /* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */
583 reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
584 if (reg_vif_num >= 0)
585 reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]);
590 skb->mac_header = skb->network_header;
591 skb_pull(skb, (u8 *)encap - skb->data);
592 skb_reset_network_header(skb);
593 skb->protocol = htons(ETH_P_IPV6);
594 skb->ip_summed = CHECKSUM_NONE;
596 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
606 static const struct inet6_protocol pim6_protocol = {
610 /* Service routines creating virtual interfaces: PIMREG */
612 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
613 struct net_device *dev)
615 struct net *net = dev_net(dev);
616 struct mr_table *mrt;
617 struct flowi6 fl6 = {
618 .flowi6_oif = dev->ifindex,
619 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
620 .flowi6_mark = skb->mark,
623 if (!pskb_inet_may_pull(skb))
626 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
629 DEV_STATS_ADD(dev, tx_bytes, skb->len);
630 DEV_STATS_INC(dev, tx_packets);
632 ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
639 DEV_STATS_INC(dev, tx_errors);
644 static int reg_vif_get_iflink(const struct net_device *dev)
649 static const struct net_device_ops reg_vif_netdev_ops = {
650 .ndo_start_xmit = reg_vif_xmit,
651 .ndo_get_iflink = reg_vif_get_iflink,
654 static void reg_vif_setup(struct net_device *dev)
656 dev->type = ARPHRD_PIMREG;
657 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
658 dev->flags = IFF_NOARP;
659 dev->netdev_ops = ®_vif_netdev_ops;
660 dev->needs_free_netdev = true;
661 dev->netns_immutable = true;
664 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
666 struct net_device *dev;
669 if (mrt->id == RT6_TABLE_DFLT)
670 sprintf(name, "pim6reg");
672 sprintf(name, "pim6reg%u", mrt->id);
674 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
678 dev_net_set(dev, net);
680 if (register_netdevice(dev)) {
685 if (dev_open(dev, NULL))
692 unregister_netdevice(dev);
697 static int call_ip6mr_vif_entry_notifiers(struct net *net,
698 enum fib_event_type event_type,
699 struct vif_device *vif,
700 struct net_device *vif_dev,
701 mifi_t vif_index, u32 tb_id)
703 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
704 vif, vif_dev, vif_index, tb_id,
705 &net->ipv6.ipmr_seq);
708 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
709 enum fib_event_type event_type,
710 struct mfc6_cache *mfc, u32 tb_id)
712 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
713 &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
716 /* Delete a VIF entry */
717 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
718 struct list_head *head)
720 struct vif_device *v;
721 struct net_device *dev;
722 struct inet6_dev *in6_dev;
724 if (vifi < 0 || vifi >= mrt->maxvif)
725 return -EADDRNOTAVAIL;
727 v = &mrt->vif_table[vifi];
729 dev = rtnl_dereference(v->dev);
731 return -EADDRNOTAVAIL;
733 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
734 FIB_EVENT_VIF_DEL, v, dev,
736 spin_lock(&mrt_lock);
737 RCU_INIT_POINTER(v->dev, NULL);
739 #ifdef CONFIG_IPV6_PIMSM_V2
740 if (vifi == mrt->mroute_reg_vif_num) {
741 /* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */
742 WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
746 if (vifi + 1 == mrt->maxvif) {
748 for (tmp = vifi - 1; tmp >= 0; tmp--) {
749 if (VIF_EXISTS(mrt, tmp))
752 WRITE_ONCE(mrt->maxvif, tmp + 1);
755 spin_unlock(&mrt_lock);
757 dev_set_allmulti(dev, -1);
759 in6_dev = __in6_dev_get(dev);
761 atomic_dec(&in6_dev->cnf.mc_forwarding);
762 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
763 NETCONFA_MC_FORWARDING,
764 dev->ifindex, &in6_dev->cnf);
767 if ((v->flags & MIFF_REGISTER) && !notify)
768 unregister_netdevice_queue(dev, head);
770 netdev_put(dev, &v->dev_tracker);
774 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
776 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
778 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
781 static inline void ip6mr_cache_free(struct mfc6_cache *c)
783 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
786 /* Destroy an unresolved cache entry, killing queued skbs
787 and reporting error to netlink readers.
790 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
792 struct net *net = read_pnet(&mrt->net);
795 atomic_dec(&mrt->cache_resolve_queue_len);
797 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
798 if (ipv6_hdr(skb)->version == 0) {
799 struct nlmsghdr *nlh = skb_pull(skb,
800 sizeof(struct ipv6hdr));
801 nlh->nlmsg_type = NLMSG_ERROR;
802 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
803 skb_trim(skb, nlh->nlmsg_len);
804 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
805 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
814 /* Timer process for all the unresolved queue. */
816 static void ipmr_do_expire_process(struct mr_table *mrt)
818 unsigned long now = jiffies;
819 unsigned long expires = 10 * HZ;
820 struct mr_mfc *c, *next;
822 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
823 if (time_after(c->mfc_un.unres.expires, now)) {
825 unsigned long interval = c->mfc_un.unres.expires - now;
826 if (interval < expires)
832 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
833 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
836 if (!list_empty(&mrt->mfc_unres_queue))
837 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
840 static void ipmr_expire_process(struct timer_list *t)
842 struct mr_table *mrt = timer_container_of(mrt, t, ipmr_expire_timer);
844 if (!spin_trylock(&mfc_unres_lock)) {
845 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
849 if (!list_empty(&mrt->mfc_unres_queue))
850 ipmr_do_expire_process(mrt);
852 spin_unlock(&mfc_unres_lock);
855 /* Fill oifs list. It is called under locked mrt_lock. */
857 static void ip6mr_update_thresholds(struct mr_table *mrt,
858 struct mr_mfc *cache,
863 cache->mfc_un.res.minvif = MAXMIFS;
864 cache->mfc_un.res.maxvif = 0;
865 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
867 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
868 if (VIF_EXISTS(mrt, vifi) &&
869 ttls[vifi] && ttls[vifi] < 255) {
870 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
871 if (cache->mfc_un.res.minvif > vifi)
872 cache->mfc_un.res.minvif = vifi;
873 if (cache->mfc_un.res.maxvif <= vifi)
874 cache->mfc_un.res.maxvif = vifi + 1;
877 WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies);
880 static int mif6_add(struct net *net, struct mr_table *mrt,
881 struct mif6ctl *vifc, int mrtsock)
883 int vifi = vifc->mif6c_mifi;
884 struct vif_device *v = &mrt->vif_table[vifi];
885 struct net_device *dev;
886 struct inet6_dev *in6_dev;
890 if (VIF_EXISTS(mrt, vifi))
893 switch (vifc->mif6c_flags) {
894 #ifdef CONFIG_IPV6_PIMSM_V2
897 * Special Purpose VIF in PIM
898 * All the packets will be sent to the daemon
900 if (mrt->mroute_reg_vif_num >= 0)
902 dev = ip6mr_reg_vif(net, mrt);
905 err = dev_set_allmulti(dev, 1);
907 unregister_netdevice(dev);
914 dev = dev_get_by_index(net, vifc->mif6c_pifi);
916 return -EADDRNOTAVAIL;
917 err = dev_set_allmulti(dev, 1);
927 in6_dev = __in6_dev_get(dev);
929 atomic_inc(&in6_dev->cnf.mc_forwarding);
930 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
931 NETCONFA_MC_FORWARDING,
932 dev->ifindex, &in6_dev->cnf);
935 /* Fill in the VIF structures */
936 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
937 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
940 /* And finish update writing critical data */
941 spin_lock(&mrt_lock);
942 rcu_assign_pointer(v->dev, dev);
943 netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
944 #ifdef CONFIG_IPV6_PIMSM_V2
945 if (v->flags & MIFF_REGISTER)
946 WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
948 if (vifi + 1 > mrt->maxvif)
949 WRITE_ONCE(mrt->maxvif, vifi + 1);
950 spin_unlock(&mrt_lock);
951 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
952 v, dev, vifi, mrt->id);
956 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
957 const struct in6_addr *origin,
958 const struct in6_addr *mcastgrp)
960 struct mfc6_cache_cmp_arg arg = {
961 .mf6c_origin = *origin,
962 .mf6c_mcastgrp = *mcastgrp,
965 return mr_mfc_find(mrt, &arg);
968 /* Look for a (*,G) entry */
969 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
970 struct in6_addr *mcastgrp,
973 struct mfc6_cache_cmp_arg arg = {
974 .mf6c_origin = in6addr_any,
975 .mf6c_mcastgrp = *mcastgrp,
978 if (ipv6_addr_any(mcastgrp))
979 return mr_mfc_find_any_parent(mrt, mifi);
980 return mr_mfc_find_any(mrt, mifi, &arg);
983 /* Look for a (S,G,iif) entry if parent != -1 */
984 static struct mfc6_cache *
985 ip6mr_cache_find_parent(struct mr_table *mrt,
986 const struct in6_addr *origin,
987 const struct in6_addr *mcastgrp,
990 struct mfc6_cache_cmp_arg arg = {
991 .mf6c_origin = *origin,
992 .mf6c_mcastgrp = *mcastgrp,
995 return mr_mfc_find_parent(mrt, &arg, parent);
998 /* Allocate a multicast cache entry */
999 static struct mfc6_cache *ip6mr_cache_alloc(void)
1001 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1004 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
1005 c->_c.mfc_un.res.minvif = MAXMIFS;
1006 c->_c.free = ip6mr_cache_free_rcu;
1007 refcount_set(&c->_c.mfc_un.res.refcount, 1);
1011 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1013 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1016 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
1017 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1022 * A cache entry has gone into a resolved state from queued
1025 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1026 struct mfc6_cache *uc, struct mfc6_cache *c)
1028 struct sk_buff *skb;
1031 * Play the pending entries through our router
1034 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1035 if (ipv6_hdr(skb)->version == 0) {
1036 struct nlmsghdr *nlh = skb_pull(skb,
1037 sizeof(struct ipv6hdr));
1039 if (mr_fill_mroute(mrt, skb, &c->_c,
1040 nlmsg_data(nlh)) > 0) {
1041 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1043 nlh->nlmsg_type = NLMSG_ERROR;
1044 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1045 skb_trim(skb, nlh->nlmsg_len);
1046 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1048 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1051 ip6_mr_forward(net, mrt, skb->dev, skb, c);
1058 * Bounce a cache query up to pim6sd and netlink.
1060 * Called under rcu_read_lock()
1063 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
1064 mifi_t mifi, int assert)
1066 struct sock *mroute6_sk;
1067 struct sk_buff *skb;
1068 struct mrt6msg *msg;
1071 #ifdef CONFIG_IPV6_PIMSM_V2
1072 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE)
1073 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1077 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1082 /* I suppose that internal messages
1083 * do not require checksums */
1085 skb->ip_summed = CHECKSUM_UNNECESSARY;
1087 #ifdef CONFIG_IPV6_PIMSM_V2
1088 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) {
1089 /* Ugly, but we have no choice with this interface.
1090 Duplicate old header, fix length etc.
1091 And all this only to mangle msg->im6_msgtype and
1092 to set msg->im6_mbz to "mbz" :-)
1094 __skb_pull(skb, skb_network_offset(pkt));
1096 skb_push(skb, sizeof(*msg));
1097 skb_reset_transport_header(skb);
1098 msg = (struct mrt6msg *)skb_transport_header(skb);
1100 msg->im6_msgtype = assert;
1101 if (assert == MRT6MSG_WRMIFWHOLE)
1102 msg->im6_mif = mifi;
1104 msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num);
1106 msg->im6_src = ipv6_hdr(pkt)->saddr;
1107 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1109 skb->ip_summed = CHECKSUM_UNNECESSARY;
1114 * Copy the IP header
1117 skb_put(skb, sizeof(struct ipv6hdr));
1118 skb_reset_network_header(skb);
1119 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1124 skb_put(skb, sizeof(*msg));
1125 skb_reset_transport_header(skb);
1126 msg = (struct mrt6msg *)skb_transport_header(skb);
1129 msg->im6_msgtype = assert;
1130 msg->im6_mif = mifi;
1132 msg->im6_src = ipv6_hdr(pkt)->saddr;
1133 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1135 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1136 skb->ip_summed = CHECKSUM_UNNECESSARY;
1139 mroute6_sk = rcu_dereference(mrt->mroute_sk);
1145 mrt6msg_netlink_event(mrt, skb);
1147 /* Deliver to user space multicast routing algorithms */
1148 ret = sock_queue_rcv_skb(mroute6_sk, skb);
1151 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1158 /* Queue a packet for resolution. It gets locked cache entry! */
1159 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1160 struct sk_buff *skb, struct net_device *dev)
1162 struct mfc6_cache *c;
1166 spin_lock_bh(&mfc_unres_lock);
1167 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1168 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1169 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1177 * Create a new entry if allowable
1180 c = ip6mr_cache_alloc_unres();
1182 spin_unlock_bh(&mfc_unres_lock);
1188 /* Fill in the new cache entry */
1189 c->_c.mfc_parent = -1;
1190 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1191 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1194 * Reflect first query at pim6sd
1196 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1198 /* If the report failed throw the cache entry
1201 spin_unlock_bh(&mfc_unres_lock);
1203 ip6mr_cache_free(c);
1208 atomic_inc(&mrt->cache_resolve_queue_len);
1209 list_add(&c->_c.list, &mrt->mfc_unres_queue);
1210 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1212 ipmr_do_expire_process(mrt);
1215 /* See if we can append the packet */
1216 if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1222 skb->skb_iif = dev->ifindex;
1224 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1228 spin_unlock_bh(&mfc_unres_lock);
1233 * MFC6 cache manipulation by user space
1236 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1239 struct mfc6_cache *c;
1241 /* The entries are added/deleted only under RTNL */
1243 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1244 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1248 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1249 list_del_rcu(&c->_c.list);
1251 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1252 FIB_EVENT_ENTRY_DEL, c, mrt->id);
1253 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1254 mr_cache_put(&c->_c);
1258 static int ip6mr_device_event(struct notifier_block *this,
1259 unsigned long event, void *ptr)
1261 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1262 struct net *net = dev_net(dev);
1263 struct mr_table *mrt;
1264 struct vif_device *v;
1267 if (event != NETDEV_UNREGISTER)
1270 ip6mr_for_each_table(mrt, net) {
1271 v = &mrt->vif_table[0];
1272 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1273 if (rcu_access_pointer(v->dev) == dev)
1274 mif6_delete(mrt, ct, 1, NULL);
1281 static unsigned int ip6mr_seq_read(const struct net *net)
1283 return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net);
1286 static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1287 struct netlink_ext_ack *extack)
1289 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1290 ip6mr_mr_table_iter, extack);
1293 static struct notifier_block ip6_mr_notifier = {
1294 .notifier_call = ip6mr_device_event
1297 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1298 .family = RTNL_FAMILY_IP6MR,
1299 .fib_seq_read = ip6mr_seq_read,
1300 .fib_dump = ip6mr_dump,
1301 .owner = THIS_MODULE,
1304 static int __net_init ip6mr_notifier_init(struct net *net)
1306 struct fib_notifier_ops *ops;
1308 net->ipv6.ipmr_seq = 0;
1310 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1312 return PTR_ERR(ops);
1314 net->ipv6.ip6mr_notifier_ops = ops;
1319 static void __net_exit ip6mr_notifier_exit(struct net *net)
1321 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1322 net->ipv6.ip6mr_notifier_ops = NULL;
1325 /* Setup for IP multicast routing */
1326 static int __net_init ip6mr_net_init(struct net *net)
1330 err = ip6mr_notifier_init(net);
1334 err = ip6mr_rules_init(net);
1336 goto ip6mr_rules_fail;
1338 #ifdef CONFIG_PROC_FS
1340 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1341 sizeof(struct mr_vif_iter)))
1343 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1344 sizeof(struct mr_mfc_iter)))
1345 goto proc_cache_fail;
1350 #ifdef CONFIG_PROC_FS
1352 remove_proc_entry("ip6_mr_vif", net->proc_net);
1355 ip6mr_rules_exit(net);
1359 ip6mr_notifier_exit(net);
1363 static void __net_exit ip6mr_net_exit(struct net *net)
1365 #ifdef CONFIG_PROC_FS
1366 remove_proc_entry("ip6_mr_cache", net->proc_net);
1367 remove_proc_entry("ip6_mr_vif", net->proc_net);
1369 ip6mr_notifier_exit(net);
1372 static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
1377 list_for_each_entry(net, net_list, exit_list)
1378 ip6mr_rules_exit(net);
1382 static struct pernet_operations ip6mr_net_ops = {
1383 .init = ip6mr_net_init,
1384 .exit = ip6mr_net_exit,
1385 .exit_batch = ip6mr_net_exit_batch,
1388 static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = {
1389 {.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR,
1390 .msgtype = RTM_GETROUTE,
1391 .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute},
1394 int __init ip6_mr_init(void)
1398 mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN);
1402 err = register_pernet_subsys(&ip6mr_net_ops);
1404 goto reg_pernet_fail;
1406 err = register_netdevice_notifier(&ip6_mr_notifier);
1408 goto reg_notif_fail;
1409 #ifdef CONFIG_IPV6_PIMSM_V2
1410 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1411 pr_err("%s: can't add PIM protocol\n", __func__);
1413 goto add_proto_fail;
1416 err = rtnl_register_many(ip6mr_rtnl_msg_handlers);
1420 #ifdef CONFIG_IPV6_PIMSM_V2
1421 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1423 unregister_netdevice_notifier(&ip6_mr_notifier);
1426 unregister_pernet_subsys(&ip6mr_net_ops);
1428 kmem_cache_destroy(mrt_cachep);
1432 void __init ip6_mr_cleanup(void)
1434 rtnl_unregister_many(ip6mr_rtnl_msg_handlers);
1435 #ifdef CONFIG_IPV6_PIMSM_V2
1436 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1438 unregister_netdevice_notifier(&ip6_mr_notifier);
1439 unregister_pernet_subsys(&ip6mr_net_ops);
1440 kmem_cache_destroy(mrt_cachep);
1443 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1444 struct mf6cctl *mfc, int mrtsock, int parent)
1446 unsigned char ttls[MAXMIFS];
1447 struct mfc6_cache *uc, *c;
1452 if (mfc->mf6cc_parent >= MAXMIFS)
1455 memset(ttls, 255, MAXMIFS);
1456 for (i = 0; i < MAXMIFS; i++) {
1457 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1461 /* The entries are added/deleted only under RTNL */
1463 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1464 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1467 spin_lock(&mrt_lock);
1468 c->_c.mfc_parent = mfc->mf6cc_parent;
1469 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1471 c->_c.mfc_flags |= MFC_STATIC;
1472 spin_unlock(&mrt_lock);
1473 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1475 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1479 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1480 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1483 c = ip6mr_cache_alloc();
1487 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1488 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1489 c->_c.mfc_parent = mfc->mf6cc_parent;
1490 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1492 c->_c.mfc_flags |= MFC_STATIC;
1494 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1497 pr_err("ip6mr: rhtable insert error %d\n", err);
1498 ip6mr_cache_free(c);
1501 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1503 /* Check to see if we resolved a queued list. If so we
1504 * need to send on the frames and tidy up.
1507 spin_lock_bh(&mfc_unres_lock);
1508 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1509 uc = (struct mfc6_cache *)_uc;
1510 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1511 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1512 list_del(&_uc->list);
1513 atomic_dec(&mrt->cache_resolve_queue_len);
1518 if (list_empty(&mrt->mfc_unres_queue))
1519 timer_delete(&mrt->ipmr_expire_timer);
1520 spin_unlock_bh(&mfc_unres_lock);
1523 ip6mr_cache_resolve(net, mrt, uc, c);
1524 ip6mr_cache_free(uc);
1526 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1528 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1533 * Close the multicast socket, and clear the vif tables etc
1536 static void mroute_clean_tables(struct mr_table *mrt, int flags)
1538 struct mr_mfc *c, *tmp;
1542 /* Shut down all active vif entries */
1543 if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1544 for (i = 0; i < mrt->maxvif; i++) {
1545 if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1546 !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1547 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1549 mif6_delete(mrt, i, 0, &list);
1551 unregister_netdevice_many(&list);
1554 /* Wipe the cache */
1555 if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1556 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1557 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1558 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1560 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1561 list_del_rcu(&c->list);
1562 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1563 FIB_EVENT_ENTRY_DEL,
1564 (struct mfc6_cache *)c, mrt->id);
1565 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1570 if (flags & MRT6_FLUSH_MFC) {
1571 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1572 spin_lock_bh(&mfc_unres_lock);
1573 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1575 mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1577 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1579 spin_unlock_bh(&mfc_unres_lock);
1584 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1587 struct net *net = sock_net(sk);
1590 spin_lock(&mrt_lock);
1591 if (rtnl_dereference(mrt->mroute_sk)) {
1594 rcu_assign_pointer(mrt->mroute_sk, sk);
1595 sock_set_flag(sk, SOCK_RCU_FREE);
1596 atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
1598 spin_unlock(&mrt_lock);
1601 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1602 NETCONFA_MC_FORWARDING,
1603 NETCONFA_IFINDEX_ALL,
1604 net->ipv6.devconf_all);
1610 int ip6mr_sk_done(struct sock *sk)
1612 struct net *net = sock_net(sk);
1613 struct ipv6_devconf *devconf;
1614 struct mr_table *mrt;
1617 if (sk->sk_type != SOCK_RAW ||
1618 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1621 devconf = net->ipv6.devconf_all;
1622 if (!devconf || !atomic_read(&devconf->mc_forwarding))
1626 ip6mr_for_each_table(mrt, net) {
1627 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1628 spin_lock(&mrt_lock);
1629 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1630 /* Note that mroute_sk had SOCK_RCU_FREE set,
1631 * so the RCU grace period before sk freeing
1632 * is guaranteed by sk_destruct()
1634 atomic_dec(&devconf->mc_forwarding);
1635 spin_unlock(&mrt_lock);
1636 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1637 NETCONFA_MC_FORWARDING,
1638 NETCONFA_IFINDEX_ALL,
1639 net->ipv6.devconf_all);
1641 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
1651 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1653 struct mr_table *mrt;
1654 struct flowi6 fl6 = {
1655 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
1656 .flowi6_oif = skb->dev->ifindex,
1657 .flowi6_mark = skb->mark,
1660 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1663 return rcu_access_pointer(mrt->mroute_sk);
1665 EXPORT_SYMBOL(mroute6_is_socket);
1668 * Socket options and virtual interface manipulation. The whole
1669 * virtual interface system is a complete heap, but unfortunately
1670 * that's how BSD mrouted happens to think. Maybe one day with a proper
1671 * MOSPF/PIM router set up we can clean this up.
1674 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1675 unsigned int optlen)
1677 int ret, parent = 0;
1681 struct net *net = sock_net(sk);
1682 struct mr_table *mrt;
1684 if (sk->sk_type != SOCK_RAW ||
1685 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1688 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1692 if (optname != MRT6_INIT) {
1693 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1694 !ns_capable(net->user_ns, CAP_NET_ADMIN))
1700 if (optlen < sizeof(int))
1703 return ip6mr_sk_init(mrt, sk);
1706 return ip6mr_sk_done(sk);
1709 if (optlen < sizeof(vif))
1711 if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1713 if (vif.mif6c_mifi >= MAXMIFS)
1716 ret = mif6_add(net, mrt, &vif,
1717 sk == rtnl_dereference(mrt->mroute_sk));
1722 if (optlen < sizeof(mifi_t))
1724 if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1727 ret = mif6_delete(mrt, mifi, 0, NULL);
1732 * Manipulate the forwarding caches. These live
1733 * in a sort of kernel/user symbiosis.
1739 case MRT6_ADD_MFC_PROXY:
1740 case MRT6_DEL_MFC_PROXY:
1741 if (optlen < sizeof(mfc))
1743 if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1746 parent = mfc.mf6cc_parent;
1748 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1749 ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1751 ret = ip6mr_mfc_add(net, mrt, &mfc,
1753 rtnl_dereference(mrt->mroute_sk),
1762 if (optlen != sizeof(flags))
1764 if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1767 mroute_clean_tables(mrt, flags);
1773 * Control PIM assert (to activate pim will activate assert)
1779 if (optlen != sizeof(v))
1781 if (copy_from_sockptr(&v, optval, sizeof(v)))
1783 mrt->mroute_do_assert = v;
1787 #ifdef CONFIG_IPV6_PIMSM_V2
1793 if (optlen != sizeof(v))
1795 if (copy_from_sockptr(&v, optval, sizeof(v)))
1798 do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE);
1802 if (v != mrt->mroute_do_pim) {
1803 mrt->mroute_do_pim = v;
1804 mrt->mroute_do_assert = v;
1805 mrt->mroute_do_wrvifwhole = do_wrmifwhole;
1812 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1817 if (optlen != sizeof(u32))
1819 if (copy_from_sockptr(&v, optval, sizeof(v)))
1821 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1822 if (v != RT_TABLE_DEFAULT && v >= 100000000)
1824 if (sk == rcu_access_pointer(mrt->mroute_sk))
1829 mrt = ip6mr_new_table(net, v);
1833 raw6_sk(sk)->ip6mr_table = v;
1839 * Spurious command, or MRT6_VERSION which you cannot
1843 return -ENOPROTOOPT;
1848 * Getsock opt support for the multicast routing system.
1851 int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
1856 struct net *net = sock_net(sk);
1857 struct mr_table *mrt;
1859 if (sk->sk_type != SOCK_RAW ||
1860 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1863 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1871 #ifdef CONFIG_IPV6_PIMSM_V2
1873 val = mrt->mroute_do_pim;
1877 val = mrt->mroute_do_assert;
1880 return -ENOPROTOOPT;
1883 if (copy_from_sockptr(&olr, optlen, sizeof(int)))
1886 olr = min_t(int, olr, sizeof(int));
1890 if (copy_to_sockptr(optlen, &olr, sizeof(int)))
1892 if (copy_to_sockptr(optval, &val, olr))
1898 * The IP multicast ioctl support routines.
1900 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
1902 struct sioc_sg_req6 *sr;
1903 struct sioc_mif_req6 *vr;
1904 struct vif_device *vif;
1905 struct mfc6_cache *c;
1906 struct net *net = sock_net(sk);
1907 struct mr_table *mrt;
1909 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1914 case SIOCGETMIFCNT_IN6:
1915 vr = (struct sioc_mif_req6 *)arg;
1916 if (vr->mifi >= mrt->maxvif)
1918 vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif);
1920 vif = &mrt->vif_table[vr->mifi];
1921 if (VIF_EXISTS(mrt, vr->mifi)) {
1922 vr->icount = READ_ONCE(vif->pkt_in);
1923 vr->ocount = READ_ONCE(vif->pkt_out);
1924 vr->ibytes = READ_ONCE(vif->bytes_in);
1925 vr->obytes = READ_ONCE(vif->bytes_out);
1930 return -EADDRNOTAVAIL;
1931 case SIOCGETSGCNT_IN6:
1932 sr = (struct sioc_sg_req6 *)arg;
1935 c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
1936 &sr->grp.sin6_addr);
1938 sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
1939 sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
1940 sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
1945 return -EADDRNOTAVAIL;
1947 return -ENOIOCTLCMD;
1951 #ifdef CONFIG_COMPAT
1952 struct compat_sioc_sg_req6 {
1953 struct sockaddr_in6 src;
1954 struct sockaddr_in6 grp;
1955 compat_ulong_t pktcnt;
1956 compat_ulong_t bytecnt;
1957 compat_ulong_t wrong_if;
1960 struct compat_sioc_mif_req6 {
1962 compat_ulong_t icount;
1963 compat_ulong_t ocount;
1964 compat_ulong_t ibytes;
1965 compat_ulong_t obytes;
1968 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1970 struct compat_sioc_sg_req6 sr;
1971 struct compat_sioc_mif_req6 vr;
1972 struct vif_device *vif;
1973 struct mfc6_cache *c;
1974 struct net *net = sock_net(sk);
1975 struct mr_table *mrt;
1977 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1982 case SIOCGETMIFCNT_IN6:
1983 if (copy_from_user(&vr, arg, sizeof(vr)))
1985 if (vr.mifi >= mrt->maxvif)
1987 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1989 vif = &mrt->vif_table[vr.mifi];
1990 if (VIF_EXISTS(mrt, vr.mifi)) {
1991 vr.icount = READ_ONCE(vif->pkt_in);
1992 vr.ocount = READ_ONCE(vif->pkt_out);
1993 vr.ibytes = READ_ONCE(vif->bytes_in);
1994 vr.obytes = READ_ONCE(vif->bytes_out);
1997 if (copy_to_user(arg, &vr, sizeof(vr)))
2002 return -EADDRNOTAVAIL;
2003 case SIOCGETSGCNT_IN6:
2004 if (copy_from_user(&sr, arg, sizeof(sr)))
2008 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
2010 sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
2011 sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
2012 sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
2015 if (copy_to_user(arg, &sr, sizeof(sr)))
2020 return -EADDRNOTAVAIL;
2022 return -ENOIOCTLCMD;
2027 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
2029 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
2030 IPSTATS_MIB_OUTFORWDATAGRAMS);
2031 return dst_output(net, sk, skb);
2035 * Processing handlers for ip6mr_forward
2038 static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
2039 struct sk_buff *skb, int vifi)
2041 struct vif_device *vif = &mrt->vif_table[vifi];
2042 struct net_device *vif_dev;
2043 struct ipv6hdr *ipv6h;
2044 struct dst_entry *dst;
2047 vif_dev = vif_dev_read(vif);
2051 #ifdef CONFIG_IPV6_PIMSM_V2
2052 if (vif->flags & MIFF_REGISTER) {
2053 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2054 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2055 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
2056 DEV_STATS_INC(vif_dev, tx_packets);
2057 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2062 ipv6h = ipv6_hdr(skb);
2064 fl6 = (struct flowi6) {
2065 .flowi6_oif = vif->link,
2066 .daddr = ipv6h->daddr,
2069 dst = ip6_route_output(net, NULL, &fl6);
2076 skb_dst_set(skb, dst);
2079 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2080 * not only before forwarding, but after forwarding on all output
2081 * interfaces. It is clear, if mrouter runs a multicasting
2082 * program, it should receive packets not depending to what interface
2083 * program is joined.
2084 * If we will not make it, the program will have to join on all
2085 * interfaces. On the other hand, multihoming host (or router, but
2086 * not mrouter) cannot join to more than one interface - it will
2087 * result in receiving multiple packets.
2090 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2091 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2093 /* We are about to write */
2094 /* XXX: extension headers? */
2095 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
2098 ipv6h = ipv6_hdr(skb);
2101 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2103 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2104 net, NULL, skb, skb->dev, vif_dev,
2105 ip6mr_forward2_finish);
2112 /* Called with rcu_read_lock() */
2113 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2117 /* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */
2118 for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
2119 if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
2125 /* Called under rcu_read_lock() */
2126 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2127 struct net_device *dev, struct sk_buff *skb,
2128 struct mfc6_cache *c)
2132 int true_vifi = ip6mr_find_vif(mrt, dev);
2134 vif = c->_c.mfc_parent;
2135 atomic_long_inc(&c->_c.mfc_un.res.pkt);
2136 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
2137 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
2139 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2140 struct mfc6_cache *cache_proxy;
2142 /* For an (*,G) entry, we only check that the incoming
2143 * interface is part of the static tree.
2145 cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2147 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
2152 * Wrong interface: drop packet and (maybe) send PIM assert.
2154 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
2155 atomic_long_inc(&c->_c.mfc_un.res.wrong_if);
2157 if (true_vifi >= 0 && mrt->mroute_do_assert &&
2158 /* pimsm uses asserts, when switching from RPT to SPT,
2159 so that we cannot check that packet arrived on an oif.
2160 It is bad, but otherwise we would need to move pretty
2161 large chunk of pimd to kernel. Ough... --ANK
2163 (mrt->mroute_do_pim ||
2164 c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2166 c->_c.mfc_un.res.last_assert +
2167 MFC_ASSERT_THRESH)) {
2168 c->_c.mfc_un.res.last_assert = jiffies;
2169 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2170 if (mrt->mroute_do_wrvifwhole)
2171 ip6mr_cache_report(mrt, skb, true_vifi,
2172 MRT6MSG_WRMIFWHOLE);
2178 WRITE_ONCE(mrt->vif_table[vif].pkt_in,
2179 mrt->vif_table[vif].pkt_in + 1);
2180 WRITE_ONCE(mrt->vif_table[vif].bytes_in,
2181 mrt->vif_table[vif].bytes_in + skb->len);
2186 if (ipv6_addr_any(&c->mf6c_origin) &&
2187 ipv6_addr_any(&c->mf6c_mcastgrp)) {
2188 if (true_vifi >= 0 &&
2189 true_vifi != c->_c.mfc_parent &&
2190 ipv6_hdr(skb)->hop_limit >
2191 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2192 /* It's an (*,*) entry and the packet is not coming from
2193 * the upstream: forward the packet to the upstream
2196 psend = c->_c.mfc_parent;
2201 for (ct = c->_c.mfc_un.res.maxvif - 1;
2202 ct >= c->_c.mfc_un.res.minvif; ct--) {
2203 /* For (*,G) entry, don't forward to the incoming interface */
2204 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2205 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2207 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2209 ip6mr_forward2(net, mrt, skb2, psend);
2216 ip6mr_forward2(net, mrt, skb, psend);
2226 * Multicast packets for forwarding arrive here
2229 int ip6_mr_input(struct sk_buff *skb)
2231 struct mfc6_cache *cache;
2232 struct net *net = dev_net(skb->dev);
2233 struct mr_table *mrt;
2234 struct flowi6 fl6 = {
2235 .flowi6_iif = skb->dev->ifindex,
2236 .flowi6_mark = skb->mark,
2239 struct net_device *dev;
2241 /* skb->dev passed in is the master dev for vrfs.
2242 * Get the proper interface that does have a vif associated with it.
2245 if (netif_is_l3_master(skb->dev)) {
2246 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2253 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2259 cache = ip6mr_cache_find(mrt,
2260 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2262 int vif = ip6mr_find_vif(mrt, dev);
2265 cache = ip6mr_cache_find_any(mrt,
2266 &ipv6_hdr(skb)->daddr,
2271 * No usable cache entry
2276 vif = ip6mr_find_vif(mrt, dev);
2278 int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2286 ip6_mr_forward(net, mrt, dev, skb, cache);
2291 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2295 struct mr_table *mrt;
2296 struct mfc6_cache *cache;
2297 struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
2300 mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
2306 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2307 if (!cache && skb->dev) {
2308 int vif = ip6mr_find_vif(mrt, skb->dev);
2311 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2316 struct sk_buff *skb2;
2317 struct ipv6hdr *iph;
2318 struct net_device *dev;
2322 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2327 /* really correct? */
2328 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2334 NETLINK_CB(skb2).portid = portid;
2335 skb_reset_transport_header(skb2);
2337 skb_put(skb2, sizeof(struct ipv6hdr));
2338 skb_reset_network_header(skb2);
2340 iph = ipv6_hdr(skb2);
2343 iph->flow_lbl[0] = 0;
2344 iph->flow_lbl[1] = 0;
2345 iph->flow_lbl[2] = 0;
2346 iph->payload_len = 0;
2347 iph->nexthdr = IPPROTO_NONE;
2349 iph->saddr = rt->rt6i_src.addr;
2350 iph->daddr = rt->rt6i_dst.addr;
2352 err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2358 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2363 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2364 u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2367 struct nlmsghdr *nlh;
2371 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2375 rtm = nlmsg_data(nlh);
2376 rtm->rtm_family = RTNL_FAMILY_IP6MR;
2377 rtm->rtm_dst_len = 128;
2378 rtm->rtm_src_len = 128;
2380 rtm->rtm_table = mrt->id;
2381 if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2382 goto nla_put_failure;
2383 rtm->rtm_type = RTN_MULTICAST;
2384 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2385 if (c->_c.mfc_flags & MFC_STATIC)
2386 rtm->rtm_protocol = RTPROT_STATIC;
2388 rtm->rtm_protocol = RTPROT_MROUTED;
2391 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2392 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2393 goto nla_put_failure;
2394 err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2395 /* do not break the dump if cache is unresolved */
2396 if (err < 0 && err != -ENOENT)
2397 goto nla_put_failure;
2399 nlmsg_end(skb, nlh);
2403 nlmsg_cancel(skb, nlh);
2407 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2408 u32 portid, u32 seq, struct mr_mfc *c,
2411 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2415 static int mr6_msgsize(bool unresolved, int maxvif)
2418 NLMSG_ALIGN(sizeof(struct rtmsg))
2419 + nla_total_size(4) /* RTA_TABLE */
2420 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */
2421 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */
2426 + nla_total_size(4) /* RTA_IIF */
2427 + nla_total_size(0) /* RTA_MULTIPATH */
2428 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2430 + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2436 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2439 struct net *net = read_pnet(&mrt->net);
2440 struct sk_buff *skb;
2443 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2448 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2452 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2457 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2460 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2463 NLMSG_ALIGN(sizeof(struct rtgenmsg))
2464 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */
2465 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */
2466 /* IP6MRA_CREPORT_SRC_ADDR */
2467 + nla_total_size(sizeof(struct in6_addr))
2468 /* IP6MRA_CREPORT_DST_ADDR */
2469 + nla_total_size(sizeof(struct in6_addr))
2470 /* IP6MRA_CREPORT_PKT */
2471 + nla_total_size(payloadlen)
2477 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
2479 struct net *net = read_pnet(&mrt->net);
2480 struct nlmsghdr *nlh;
2481 struct rtgenmsg *rtgenm;
2482 struct mrt6msg *msg;
2483 struct sk_buff *skb;
2487 payloadlen = pkt->len - sizeof(struct mrt6msg);
2488 msg = (struct mrt6msg *)skb_transport_header(pkt);
2490 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2494 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2495 sizeof(struct rtgenmsg), 0);
2498 rtgenm = nlmsg_data(nlh);
2499 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2500 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2501 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2502 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2504 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2506 goto nla_put_failure;
2508 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2509 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2510 nla_data(nla), payloadlen))
2511 goto nla_put_failure;
2513 nlmsg_end(skb, nlh);
2515 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2519 nlmsg_cancel(skb, nlh);
2522 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2525 static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = {
2526 [RTA_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2527 [RTA_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2528 [RTA_TABLE] = { .type = NLA_U32 },
2531 static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb,
2532 const struct nlmsghdr *nlh,
2534 struct netlink_ext_ack *extack)
2539 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy,
2544 rtm = nlmsg_data(nlh);
2545 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
2546 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
2547 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
2548 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
2549 NL_SET_ERR_MSG_MOD(extack,
2550 "Invalid values in header for multicast route get request");
2554 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2555 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2556 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
2563 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2564 struct netlink_ext_ack *extack)
2566 struct net *net = sock_net(in_skb->sk);
2567 struct in6_addr src = {}, grp = {};
2568 struct nlattr *tb[RTA_MAX + 1];
2569 struct mfc6_cache *cache;
2570 struct mr_table *mrt;
2571 struct sk_buff *skb;
2575 err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2580 src = nla_get_in6_addr(tb[RTA_SRC]);
2582 grp = nla_get_in6_addr(tb[RTA_DST]);
2583 tableid = nla_get_u32_default(tb[RTA_TABLE], 0);
2585 mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
2587 NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
2591 /* entries are added/deleted only under RTNL */
2593 cache = ip6mr_cache_find(mrt, &src, &grp);
2596 NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found");
2600 skb = nlmsg_new(mr6_msgsize(false, mrt->maxvif), GFP_KERNEL);
2604 err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2605 nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0);
2611 return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2614 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2616 const struct nlmsghdr *nlh = cb->nlh;
2617 struct fib_dump_filter filter = {
2622 if (cb->strict_check) {
2623 err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2629 if (filter.table_id) {
2630 struct mr_table *mrt;
2632 mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2634 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
2637 NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2640 err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2641 &mfc_unres_lock, &filter);
2642 return skb->len ? : err;
2645 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2646 _ip6mr_fill_mroute, &mfc_unres_lock, &filter);