1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * NET3 IP device support routines.
5 * Derived from the IP parts of dev.c 1.0.19
7 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8 * Mark Evans, <evansmp@uhura.aston.ac.uk>
11 * Alan Cox, <gw4pts@gw4pts.ampr.org>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
15 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
17 * Cyrus Durgin: updated for kmod
18 * Matthias Andree: in devinet_ioctl, compare label and
19 * address (4.4BSD alias style support),
20 * fall back to comparing just the label
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include "igmp_internal.h"
50 #include <linux/slab.h>
51 #include <linux/hash.h>
53 #include <linux/sysctl.h>
55 #include <linux/kmod.h>
56 #include <linux/netconf.h>
60 #include <net/route.h>
61 #include <net/ip_fib.h>
62 #include <net/rtnetlink.h>
63 #include <net/net_namespace.h>
64 #include <net/addrconf.h>
66 #define IPV6ONLY_FLAGS \
67 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
68 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
69 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
71 static struct ipv4_devconf ipv4_devconf = {
73 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
76 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
77 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
78 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
79 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
83 static struct ipv4_devconf ipv4_devconf_dflt = {
85 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
86 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
87 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
88 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
89 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
90 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
91 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
92 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
96 #define IPV4_DEVCONF_DFLT(net, attr) \
97 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
99 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
100 [IFA_LOCAL] = { .type = NLA_U32 },
101 [IFA_ADDRESS] = { .type = NLA_U32 },
102 [IFA_BROADCAST] = { .type = NLA_U32 },
103 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
104 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
105 [IFA_FLAGS] = { .type = NLA_U32 },
106 [IFA_RT_PRIORITY] = { .type = NLA_U32 },
107 [IFA_TARGET_NETNSID] = { .type = NLA_S32 },
108 [IFA_PROTO] = { .type = NLA_U8 },
111 #define IN4_ADDR_HSIZE_SHIFT 8
112 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
114 static u32 inet_addr_hash(const struct net *net, __be32 addr)
116 u32 val = __ipv4_addr_hash(addr, net_hash_mix(net));
118 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
121 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
123 u32 hash = inet_addr_hash(net, ifa->ifa_local);
126 hlist_add_head_rcu(&ifa->addr_lst, &net->ipv4.inet_addr_lst[hash]);
129 static void inet_hash_remove(struct in_ifaddr *ifa)
132 hlist_del_init_rcu(&ifa->addr_lst);
136 * __ip_dev_find - find the first device with a given source address.
137 * @net: the net namespace
138 * @addr: the source address
139 * @devref: if true, take a reference on the found device
141 * If a caller uses devref=false, it should be protected by RCU, or RTNL
143 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
145 struct net_device *result = NULL;
146 struct in_ifaddr *ifa;
149 ifa = inet_lookup_ifaddr_rcu(net, addr);
151 struct flowi4 fl4 = { .daddr = addr };
152 struct fib_result res = { 0 };
153 struct fib_table *local;
155 /* Fallback to FIB local table so that communication
156 * over loopback subnets work.
158 local = fib_get_table(net, RT_TABLE_LOCAL);
160 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
161 res.type == RTN_LOCAL)
162 result = FIB_RES_DEV(res);
164 result = ifa->ifa_dev->dev;
166 if (result && devref)
171 EXPORT_SYMBOL(__ip_dev_find);
173 /* called under RCU lock */
174 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
176 u32 hash = inet_addr_hash(net, addr);
177 struct in_ifaddr *ifa;
179 hlist_for_each_entry_rcu(ifa, &net->ipv4.inet_addr_lst[hash], addr_lst)
180 if (ifa->ifa_local == addr)
186 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
188 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
189 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
190 static void inet_del_ifa(struct in_device *in_dev,
191 struct in_ifaddr __rcu **ifap,
194 static int devinet_sysctl_register(struct in_device *idev);
195 static void devinet_sysctl_unregister(struct in_device *idev);
197 static int devinet_sysctl_register(struct in_device *idev)
201 static void devinet_sysctl_unregister(struct in_device *idev)
206 /* Locks all the inet devices. */
208 static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev)
210 struct in_ifaddr *ifa;
212 ifa = kzalloc(sizeof(*ifa), GFP_KERNEL_ACCOUNT);
217 ifa->ifa_dev = in_dev;
219 INIT_HLIST_NODE(&ifa->addr_lst);
224 static void inet_rcu_free_ifa(struct rcu_head *head)
226 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
228 in_dev_put(ifa->ifa_dev);
232 static void inet_free_ifa(struct in_ifaddr *ifa)
234 /* Our reference to ifa->ifa_dev must be freed ASAP
235 * to release the reference to the netdev the same way.
236 * in_dev_put() -> in_dev_finish_destroy() -> netdev_put()
238 call_rcu_hurry(&ifa->rcu_head, inet_rcu_free_ifa);
241 static void in_dev_free_rcu(struct rcu_head *head)
243 struct in_device *idev = container_of(head, struct in_device, rcu_head);
245 kfree(rcu_dereference_protected(idev->mc_hash, 1));
249 void in_dev_finish_destroy(struct in_device *idev)
251 struct net_device *dev = idev->dev;
253 WARN_ON(idev->ifa_list);
254 WARN_ON(idev->mc_list);
255 #ifdef NET_REFCNT_DEBUG
256 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
258 netdev_put(dev, &idev->dev_tracker);
260 pr_err("Freeing alive in_device %p\n", idev);
262 call_rcu(&idev->rcu_head, in_dev_free_rcu);
264 EXPORT_SYMBOL(in_dev_finish_destroy);
266 static struct in_device *inetdev_init(struct net_device *dev)
268 struct in_device *in_dev;
273 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
276 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
277 sizeof(in_dev->cnf));
278 in_dev->cnf.sysctl = NULL;
280 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
281 if (!in_dev->arp_parms)
283 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
284 netif_disable_lro(dev);
285 /* Reference in_dev->dev */
286 netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
287 /* Account for reference dev->ip_ptr (below) */
288 refcount_set(&in_dev->refcnt, 1);
290 if (dev != blackhole_netdev) {
291 err = devinet_sysctl_register(in_dev);
294 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
299 ip_mc_init_dev(in_dev);
300 if (dev->flags & IFF_UP)
304 /* we can receive as soon as ip_ptr is set -- do this last */
305 rcu_assign_pointer(dev->ip_ptr, in_dev);
307 return in_dev ?: ERR_PTR(err);
314 static void inetdev_destroy(struct in_device *in_dev)
316 struct net_device *dev;
317 struct in_ifaddr *ifa;
325 ip_mc_destroy_dev(in_dev);
327 while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
328 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
332 RCU_INIT_POINTER(dev->ip_ptr, NULL);
334 devinet_sysctl_unregister(in_dev);
335 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
341 static int __init inet_blackhole_dev_init(void)
346 if (!inetdev_init(blackhole_netdev))
352 late_initcall(inet_blackhole_dev_init);
354 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
356 const struct in_ifaddr *ifa;
359 in_dev_for_each_ifa_rcu(ifa, in_dev) {
360 if (inet_ifa_match(a, ifa)) {
361 if (!b || inet_ifa_match(b, ifa)) {
371 static void __inet_del_ifa(struct in_device *in_dev,
372 struct in_ifaddr __rcu **ifap,
373 int destroy, struct nlmsghdr *nlh, u32 portid)
375 struct in_ifaddr *promote = NULL;
376 struct in_ifaddr *ifa, *ifa1;
377 struct in_ifaddr __rcu **last_prim;
378 struct in_ifaddr *prev_prom = NULL;
379 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
383 ifa1 = rtnl_dereference(*ifap);
388 /* 1. Deleting primary ifaddr forces deletion all secondaries
389 * unless alias promotion is set
392 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
393 struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
395 while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
396 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
397 ifa1->ifa_scope <= ifa->ifa_scope)
398 last_prim = &ifa->ifa_next;
400 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
401 ifa1->ifa_mask != ifa->ifa_mask ||
402 !inet_ifa_match(ifa1->ifa_address, ifa)) {
403 ifap1 = &ifa->ifa_next;
409 inet_hash_remove(ifa);
410 *ifap1 = ifa->ifa_next;
412 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
413 blocking_notifier_call_chain(&inetaddr_chain,
423 /* On promotion all secondaries from subnet are changing
424 * the primary IP, we must remove all their routes silently
425 * and later to add them back with new prefsrc. Do this
426 * while all addresses are on the device list.
428 for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
429 if (ifa1->ifa_mask == ifa->ifa_mask &&
430 inet_ifa_match(ifa1->ifa_address, ifa))
431 fib_del_ifaddr(ifa, ifa1);
437 *ifap = ifa1->ifa_next;
438 inet_hash_remove(ifa1);
440 /* 3. Announce address deletion */
442 /* Send message first, then call notifier.
443 At first sight, FIB update triggered by notifier
444 will refer to already deleted ifaddr, that could confuse
445 netlink listeners. It is not true: look, gated sees
446 that route deleted and if it still thinks that ifaddr
447 is valid, it will try to restore deleted routes... Grr.
448 So that, this order is correct.
450 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
451 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
454 struct in_ifaddr *next_sec;
456 next_sec = rtnl_dereference(promote->ifa_next);
458 struct in_ifaddr *last_sec;
460 rcu_assign_pointer(prev_prom->ifa_next, next_sec);
462 last_sec = rtnl_dereference(*last_prim);
463 rcu_assign_pointer(promote->ifa_next, last_sec);
464 rcu_assign_pointer(*last_prim, promote);
467 promote->ifa_flags &= ~IFA_F_SECONDARY;
468 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
469 blocking_notifier_call_chain(&inetaddr_chain,
471 for (ifa = next_sec; ifa;
472 ifa = rtnl_dereference(ifa->ifa_next)) {
473 if (ifa1->ifa_mask != ifa->ifa_mask ||
474 !inet_ifa_match(ifa1->ifa_address, ifa))
484 static void inet_del_ifa(struct in_device *in_dev,
485 struct in_ifaddr __rcu **ifap,
488 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
491 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
492 u32 portid, struct netlink_ext_ack *extack)
494 struct in_ifaddr __rcu **last_primary, **ifap;
495 struct in_device *in_dev = ifa->ifa_dev;
496 struct net *net = dev_net(in_dev->dev);
497 struct in_validator_info ivi;
498 struct in_ifaddr *ifa1;
503 ifa->ifa_flags &= ~IFA_F_SECONDARY;
504 last_primary = &in_dev->ifa_list;
506 /* Don't set IPv6 only flags to IPv4 addresses */
507 ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
509 ifap = &in_dev->ifa_list;
510 ifa1 = rtnl_dereference(*ifap);
513 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
514 ifa->ifa_scope <= ifa1->ifa_scope)
515 last_primary = &ifa1->ifa_next;
516 if (ifa1->ifa_mask == ifa->ifa_mask &&
517 inet_ifa_match(ifa1->ifa_address, ifa)) {
518 if (ifa1->ifa_local == ifa->ifa_local) {
522 if (ifa1->ifa_scope != ifa->ifa_scope) {
523 NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
527 ifa->ifa_flags |= IFA_F_SECONDARY;
530 ifap = &ifa1->ifa_next;
531 ifa1 = rtnl_dereference(*ifap);
534 /* Allow any devices that wish to register ifaddr validtors to weigh
535 * in now, before changes are committed. The rntl lock is serializing
536 * access here, so the state should not change between a validator call
537 * and a final notify on commit. This isn't invoked on promotion under
538 * the assumption that validators are checking the address itself, and
541 ivi.ivi_addr = ifa->ifa_address;
542 ivi.ivi_dev = ifa->ifa_dev;
544 ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
546 ret = notifier_to_errno(ret);
552 if (!(ifa->ifa_flags & IFA_F_SECONDARY))
555 rcu_assign_pointer(ifa->ifa_next, *ifap);
556 rcu_assign_pointer(*ifap, ifa);
558 inet_hash_insert(dev_net(in_dev->dev), ifa);
560 cancel_delayed_work(&net->ipv4.addr_chk_work);
561 queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, 0);
563 /* Send message first, then call notifier.
564 Notifier will trigger FIB update, so that
565 listeners of netlink will know about new ifaddr */
566 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
567 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
572 static int inet_insert_ifa(struct in_ifaddr *ifa)
574 if (!ifa->ifa_local) {
579 return __inet_insert_ifa(ifa, NULL, 0, NULL);
582 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
584 struct in_device *in_dev = __in_dev_get_rtnl_net(dev);
586 ipv4_devconf_setall(in_dev);
587 neigh_parms_data_state_setall(in_dev->arp_parms);
589 if (ipv4_is_loopback(ifa->ifa_local))
590 ifa->ifa_scope = RT_SCOPE_HOST;
591 return inet_insert_ifa(ifa);
594 /* Caller must hold RCU or RTNL :
595 * We dont take a reference on found in_device
597 struct in_device *inetdev_by_index(struct net *net, int ifindex)
599 struct net_device *dev;
600 struct in_device *in_dev = NULL;
603 dev = dev_get_by_index_rcu(net, ifindex);
605 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
609 EXPORT_SYMBOL(inetdev_by_index);
611 /* Called only from RTNL semaphored context. No locks. */
613 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
616 struct in_ifaddr *ifa;
620 in_dev_for_each_ifa_rtnl(ifa, in_dev) {
621 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
627 static int ip_mc_autojoin_config(struct net *net, bool join,
628 const struct in_ifaddr *ifa)
630 #if defined(CONFIG_IP_MULTICAST)
631 struct ip_mreqn mreq = {
632 .imr_multiaddr.s_addr = ifa->ifa_address,
633 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
635 struct sock *sk = net->ipv4.mc_autojoin_sk;
638 ASSERT_RTNL_NET(net);
642 ret = ip_mc_join_group(sk, &mreq);
644 ret = ip_mc_leave_group(sk, &mreq);
653 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
654 struct netlink_ext_ack *extack)
656 struct net *net = sock_net(skb->sk);
657 struct in_ifaddr __rcu **ifap;
658 struct nlattr *tb[IFA_MAX+1];
659 struct in_device *in_dev;
660 struct ifaddrmsg *ifm;
661 struct in_ifaddr *ifa;
664 err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
665 ifa_ipv4_policy, extack);
669 ifm = nlmsg_data(nlh);
673 in_dev = inetdev_by_index(net, ifm->ifa_index);
675 NL_SET_ERR_MSG(extack, "ipv4: Device not found");
680 for (ifap = &in_dev->ifa_list;
681 (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
682 ifap = &ifa->ifa_next) {
684 ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
687 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
690 if (tb[IFA_ADDRESS] &&
691 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
692 !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
695 if (ipv4_is_multicast(ifa->ifa_address))
696 ip_mc_autojoin_config(net, false, ifa);
698 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
702 NL_SET_ERR_MSG(extack, "ipv4: Address not found");
703 err = -EADDRNOTAVAIL;
705 rtnl_net_unlock(net);
710 static void check_lifetime(struct work_struct *work)
712 unsigned long now, next, next_sec, next_sched;
713 struct in_ifaddr *ifa;
714 struct hlist_node *n;
718 net = container_of(to_delayed_work(work), struct net, ipv4.addr_chk_work);
720 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
722 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
723 struct hlist_head *head = &net->ipv4.inet_addr_lst[i];
724 bool change_needed = false;
727 hlist_for_each_entry_rcu(ifa, head, addr_lst) {
728 unsigned long age, tstamp;
733 flags = READ_ONCE(ifa->ifa_flags);
734 if (flags & IFA_F_PERMANENT)
737 preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
738 valid_lft = READ_ONCE(ifa->ifa_valid_lft);
739 tstamp = READ_ONCE(ifa->ifa_tstamp);
740 /* We try to batch several events at once. */
741 age = (now - tstamp +
742 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
744 if (valid_lft != INFINITY_LIFE_TIME &&
746 change_needed = true;
747 } else if (preferred_lft ==
748 INFINITY_LIFE_TIME) {
750 } else if (age >= preferred_lft) {
751 if (time_before(tstamp + valid_lft * HZ, next))
752 next = tstamp + valid_lft * HZ;
754 if (!(flags & IFA_F_DEPRECATED))
755 change_needed = true;
756 } else if (time_before(tstamp + preferred_lft * HZ,
758 next = tstamp + preferred_lft * HZ;
766 hlist_for_each_entry_safe(ifa, n, head, addr_lst) {
769 if (ifa->ifa_flags & IFA_F_PERMANENT)
772 /* We try to batch several events at once. */
773 age = (now - ifa->ifa_tstamp +
774 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
776 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
777 age >= ifa->ifa_valid_lft) {
778 struct in_ifaddr __rcu **ifap;
779 struct in_ifaddr *tmp;
781 ifap = &ifa->ifa_dev->ifa_list;
782 tmp = rtnl_net_dereference(net, *ifap);
785 inet_del_ifa(ifa->ifa_dev,
789 ifap = &tmp->ifa_next;
790 tmp = rtnl_net_dereference(net, *ifap);
792 } else if (ifa->ifa_preferred_lft !=
793 INFINITY_LIFE_TIME &&
794 age >= ifa->ifa_preferred_lft &&
795 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
796 ifa->ifa_flags |= IFA_F_DEPRECATED;
797 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
800 rtnl_net_unlock(net);
803 next_sec = round_jiffies_up(next);
806 /* If rounded timeout is accurate enough, accept it. */
807 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
808 next_sched = next_sec;
811 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
812 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
813 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
815 queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work,
819 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
822 unsigned long timeout;
825 flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
827 timeout = addrconf_timeout_fixup(valid_lft, HZ);
828 if (addrconf_finite_timeout(timeout))
829 WRITE_ONCE(ifa->ifa_valid_lft, timeout);
831 flags |= IFA_F_PERMANENT;
833 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
834 if (addrconf_finite_timeout(timeout)) {
836 flags |= IFA_F_DEPRECATED;
837 WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
839 WRITE_ONCE(ifa->ifa_flags, flags);
840 WRITE_ONCE(ifa->ifa_tstamp, jiffies);
841 if (!ifa->ifa_cstamp)
842 WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
845 static int inet_validate_rtm(struct nlmsghdr *nlh, struct nlattr **tb,
846 struct netlink_ext_ack *extack,
847 __u32 *valid_lft, __u32 *prefered_lft)
849 struct ifaddrmsg *ifm = nlmsg_data(nlh);
852 err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
853 ifa_ipv4_policy, extack);
857 if (ifm->ifa_prefixlen > 32) {
858 NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
862 if (!tb[IFA_LOCAL]) {
863 NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
867 if (tb[IFA_CACHEINFO]) {
868 struct ifa_cacheinfo *ci;
870 ci = nla_data(tb[IFA_CACHEINFO]);
871 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
872 NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
876 *valid_lft = ci->ifa_valid;
877 *prefered_lft = ci->ifa_prefered;
883 static struct in_ifaddr *inet_rtm_to_ifa(struct net *net, struct nlmsghdr *nlh,
885 struct netlink_ext_ack *extack)
887 struct ifaddrmsg *ifm = nlmsg_data(nlh);
888 struct in_device *in_dev;
889 struct net_device *dev;
890 struct in_ifaddr *ifa;
893 dev = __dev_get_by_index(net, ifm->ifa_index);
896 NL_SET_ERR_MSG(extack, "ipv4: Device not found");
900 in_dev = __in_dev_get_rtnl_net(dev);
905 ifa = inet_alloc_ifa(in_dev);
908 * A potential indev allocation can be left alive, it stays
909 * assigned to its device and is destroy with it.
913 ipv4_devconf_setall(in_dev);
914 neigh_parms_data_state_setall(in_dev->arp_parms);
916 if (!tb[IFA_ADDRESS])
917 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
919 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
920 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
921 ifa->ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags);
922 ifa->ifa_scope = ifm->ifa_scope;
923 ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
924 ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
926 if (tb[IFA_BROADCAST])
927 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
930 nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
932 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
934 if (tb[IFA_RT_PRIORITY])
935 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
938 ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
946 static struct in_ifaddr *find_matching_ifa(struct net *net, struct in_ifaddr *ifa)
948 struct in_device *in_dev = ifa->ifa_dev;
949 struct in_ifaddr *ifa1;
951 in_dev_for_each_ifa_rtnl_net(net, ifa1, in_dev) {
952 if (ifa1->ifa_mask == ifa->ifa_mask &&
953 inet_ifa_match(ifa1->ifa_address, ifa) &&
954 ifa1->ifa_local == ifa->ifa_local)
961 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
962 struct netlink_ext_ack *extack)
964 __u32 prefered_lft = INFINITY_LIFE_TIME;
965 __u32 valid_lft = INFINITY_LIFE_TIME;
966 struct net *net = sock_net(skb->sk);
967 struct in_ifaddr *ifa_existing;
968 struct nlattr *tb[IFA_MAX + 1];
969 struct in_ifaddr *ifa;
972 ret = inet_validate_rtm(nlh, tb, extack, &valid_lft, &prefered_lft);
976 if (!nla_get_in_addr(tb[IFA_LOCAL]))
981 ifa = inet_rtm_to_ifa(net, nlh, tb, extack);
987 ifa_existing = find_matching_ifa(net, ifa);
989 /* It would be best to check for !NLM_F_CREATE here but
990 * userspace already relies on not having to provide this.
992 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
993 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
994 ret = ip_mc_autojoin_config(net, true, ifa);
996 NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
1002 ret = __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid, extack);
1004 u32 new_metric = ifa->ifa_rt_priority;
1005 u8 new_proto = ifa->ifa_proto;
1009 if (nlh->nlmsg_flags & NLM_F_EXCL ||
1010 !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
1011 NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
1017 if (ifa->ifa_rt_priority != new_metric) {
1018 fib_modify_prefix_metric(ifa, new_metric);
1019 ifa->ifa_rt_priority = new_metric;
1022 ifa->ifa_proto = new_proto;
1024 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1025 cancel_delayed_work(&net->ipv4.addr_chk_work);
1026 queue_delayed_work(system_power_efficient_wq,
1027 &net->ipv4.addr_chk_work, 0);
1028 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1032 rtnl_net_unlock(net);
1038 * Determine a default network mask, based on the IP address.
1041 static int inet_abc_len(__be32 addr)
1043 int rc = -1; /* Something else, probably a multicast. */
1045 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1048 __u32 haddr = ntohl(addr);
1049 if (IN_CLASSA(haddr))
1051 else if (IN_CLASSB(haddr))
1053 else if (IN_CLASSC(haddr))
1055 else if (IN_CLASSE(haddr))
1063 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1065 struct sockaddr_in sin_orig;
1066 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1067 struct in_ifaddr __rcu **ifap = NULL;
1068 struct in_device *in_dev;
1069 struct in_ifaddr *ifa = NULL;
1070 struct net_device *dev;
1073 int tryaddrmatch = 0;
1075 ifr->ifr_name[IFNAMSIZ - 1] = 0;
1077 /* save original address for comparison */
1078 memcpy(&sin_orig, sin, sizeof(*sin));
1080 colon = strchr(ifr->ifr_name, ':');
1084 dev_load(net, ifr->ifr_name);
1087 case SIOCGIFADDR: /* Get interface address */
1088 case SIOCGIFBRDADDR: /* Get the broadcast address */
1089 case SIOCGIFDSTADDR: /* Get the destination address */
1090 case SIOCGIFNETMASK: /* Get the netmask for the interface */
1091 /* Note that these ioctls will not sleep,
1092 so that we do not impose a lock.
1093 One day we will be forced to put shlock here (I mean SMP)
1095 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1096 memset(sin, 0, sizeof(*sin));
1097 sin->sin_family = AF_INET;
1102 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1105 case SIOCSIFADDR: /* Set interface address (and family) */
1106 case SIOCSIFBRDADDR: /* Set the broadcast address */
1107 case SIOCSIFDSTADDR: /* Set the destination address */
1108 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1110 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1113 if (sin->sin_family != AF_INET)
1124 dev = __dev_get_by_name(net, ifr->ifr_name);
1131 in_dev = __in_dev_get_rtnl_net(dev);
1134 /* Matthias Andree */
1135 /* compare label and address (4.4BSD style) */
1136 /* note: we only do this for a limited set of ioctls
1137 and only if the original address family was AF_INET.
1138 This is checked above. */
1140 for (ifap = &in_dev->ifa_list;
1141 (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
1142 ifap = &ifa->ifa_next) {
1143 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1144 sin_orig.sin_addr.s_addr ==
1150 /* we didn't get a match, maybe the application is
1151 4.3BSD-style and passed in junk so we fall back to
1152 comparing just the label */
1154 for (ifap = &in_dev->ifa_list;
1155 (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
1156 ifap = &ifa->ifa_next)
1157 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1162 ret = -EADDRNOTAVAIL;
1163 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1167 case SIOCGIFADDR: /* Get interface address */
1169 sin->sin_addr.s_addr = ifa->ifa_local;
1172 case SIOCGIFBRDADDR: /* Get the broadcast address */
1174 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1177 case SIOCGIFDSTADDR: /* Get the destination address */
1179 sin->sin_addr.s_addr = ifa->ifa_address;
1182 case SIOCGIFNETMASK: /* Get the netmask for the interface */
1184 sin->sin_addr.s_addr = ifa->ifa_mask;
1189 ret = -EADDRNOTAVAIL;
1193 if (!(ifr->ifr_flags & IFF_UP))
1194 inet_del_ifa(in_dev, ifap, 1);
1198 /* NETDEV_UP/DOWN/CHANGE could touch a peer dev */
1200 ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1203 case SIOCSIFADDR: /* Set interface address (and family) */
1205 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1212 ifa = inet_alloc_ifa(in_dev);
1217 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1219 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1222 if (ifa->ifa_local == sin->sin_addr.s_addr)
1224 inet_del_ifa(in_dev, ifap, 0);
1225 ifa->ifa_broadcast = 0;
1229 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1231 if (!(dev->flags & IFF_POINTOPOINT)) {
1232 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1233 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1234 if ((dev->flags & IFF_BROADCAST) &&
1235 ifa->ifa_prefixlen < 31)
1236 ifa->ifa_broadcast = ifa->ifa_address |
1239 ifa->ifa_prefixlen = 32;
1240 ifa->ifa_mask = inet_make_mask(32);
1242 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1243 ret = inet_set_ifa(dev, ifa);
1246 case SIOCSIFBRDADDR: /* Set the broadcast address */
1248 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1249 inet_del_ifa(in_dev, ifap, 0);
1250 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1251 inet_insert_ifa(ifa);
1255 case SIOCSIFDSTADDR: /* Set the destination address */
1257 if (ifa->ifa_address == sin->sin_addr.s_addr)
1260 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1263 inet_del_ifa(in_dev, ifap, 0);
1264 ifa->ifa_address = sin->sin_addr.s_addr;
1265 inet_insert_ifa(ifa);
1268 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1271 * The mask we set must be legal.
1274 if (bad_mask(sin->sin_addr.s_addr, 0))
1277 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1278 __be32 old_mask = ifa->ifa_mask;
1279 inet_del_ifa(in_dev, ifap, 0);
1280 ifa->ifa_mask = sin->sin_addr.s_addr;
1281 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1283 /* See if current broadcast address matches
1284 * with current netmask, then recalculate
1285 * the broadcast address. Otherwise it's a
1286 * funny address, so don't touch it since
1287 * the user seems to know what (s)he's doing...
1289 if ((dev->flags & IFF_BROADCAST) &&
1290 (ifa->ifa_prefixlen < 31) &&
1291 (ifa->ifa_broadcast ==
1292 (ifa->ifa_local|~old_mask))) {
1293 ifa->ifa_broadcast = (ifa->ifa_local |
1294 ~sin->sin_addr.s_addr);
1296 inet_insert_ifa(ifa);
1301 rtnl_net_unlock(net);
1306 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1308 struct in_device *in_dev = __in_dev_get_rtnl_net(dev);
1309 const struct in_ifaddr *ifa;
1313 if (WARN_ON(size > sizeof(struct ifreq)))
1319 in_dev_for_each_ifa_rtnl_net(dev_net(dev), ifa, in_dev) {
1326 memset(&ifr, 0, sizeof(struct ifreq));
1327 strcpy(ifr.ifr_name, ifa->ifa_label);
1329 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1330 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1333 if (copy_to_user(buf + done, &ifr, size)) {
1344 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1347 const struct in_ifaddr *ifa;
1349 in_dev_for_each_ifa_rcu(ifa, in_dev) {
1350 if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1352 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1353 ifa->ifa_scope <= scope)
1354 return ifa->ifa_local;
1360 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1362 const struct in_ifaddr *ifa;
1364 unsigned char localnet_scope = RT_SCOPE_HOST;
1365 struct in_device *in_dev;
1370 net = dev_net_rcu(dev);
1371 in_dev = __in_dev_get_rcu(dev);
1375 if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1376 localnet_scope = RT_SCOPE_LINK;
1378 in_dev_for_each_ifa_rcu(ifa, in_dev) {
1379 if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1381 if (min(ifa->ifa_scope, localnet_scope) > scope)
1383 if (!dst || inet_ifa_match(dst, ifa)) {
1384 addr = ifa->ifa_local;
1388 addr = ifa->ifa_local;
1394 master_idx = l3mdev_master_ifindex_rcu(dev);
1396 /* For VRFs, the VRF device takes the place of the loopback device,
1397 * with addresses on it being preferred. Note in such cases the
1398 * loopback device will be among the devices that fail the master_idx
1399 * equality check in the loop below.
1402 (dev = dev_get_by_index_rcu(net, master_idx)) &&
1403 (in_dev = __in_dev_get_rcu(dev))) {
1404 addr = in_dev_select_addr(in_dev, scope);
1409 /* Not loopback addresses on loopback should be preferred
1410 in this case. It is important that lo is the first interface
1413 for_each_netdev_rcu(net, dev) {
1414 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1417 in_dev = __in_dev_get_rcu(dev);
1421 addr = in_dev_select_addr(in_dev, scope);
1429 EXPORT_SYMBOL(inet_select_addr);
1431 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1432 __be32 local, int scope)
1434 unsigned char localnet_scope = RT_SCOPE_HOST;
1435 const struct in_ifaddr *ifa;
1439 if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1440 localnet_scope = RT_SCOPE_LINK;
1442 in_dev_for_each_ifa_rcu(ifa, in_dev) {
1443 unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1446 (local == ifa->ifa_local || !local) &&
1447 min_scope <= scope) {
1448 addr = ifa->ifa_local;
1453 same = (!local || inet_ifa_match(local, ifa)) &&
1454 (!dst || inet_ifa_match(dst, ifa));
1458 /* Is the selected addr into dst subnet? */
1459 if (inet_ifa_match(addr, ifa))
1461 /* No, then can we use new local src? */
1462 if (min_scope <= scope) {
1463 addr = ifa->ifa_local;
1466 /* search for large dst subnet for addr */
1472 return same ? addr : 0;
1476 * Confirm that local IP address exists using wildcards:
1477 * - net: netns to check, cannot be NULL
1478 * - in_dev: only on this interface, NULL=any interface
1479 * - dst: only in the same subnet as dst, 0=any dst
1480 * - local: address, 0=autoselect the local address
1481 * - scope: maximum allowed scope value for the local address
1483 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1484 __be32 dst, __be32 local, int scope)
1487 struct net_device *dev;
1490 return confirm_addr_indev(in_dev, dst, local, scope);
1493 for_each_netdev_rcu(net, dev) {
1494 in_dev = __in_dev_get_rcu(dev);
1496 addr = confirm_addr_indev(in_dev, dst, local, scope);
1505 EXPORT_SYMBOL(inet_confirm_addr);
1511 int register_inetaddr_notifier(struct notifier_block *nb)
1513 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1515 EXPORT_SYMBOL(register_inetaddr_notifier);
1517 int unregister_inetaddr_notifier(struct notifier_block *nb)
1519 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1521 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1523 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1525 return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1527 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1529 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1531 return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1534 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1536 /* Rename ifa_labels for a device name change. Make some effort to preserve
1537 * existing alias numbering and to create unique labels if possible.
1539 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1541 struct in_ifaddr *ifa;
1544 in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1545 char old[IFNAMSIZ], *dot;
1547 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1548 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1551 dot = strchr(old, ':');
1553 sprintf(old, ":%d", named);
1556 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1557 strcat(ifa->ifa_label, dot);
1559 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1561 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1565 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1566 struct in_device *in_dev)
1569 const struct in_ifaddr *ifa;
1571 in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1572 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1573 ifa->ifa_local, dev,
1574 ifa->ifa_local, NULL,
1575 dev->dev_addr, NULL);
1579 /* Called only under RTNL semaphore */
1581 static int inetdev_event(struct notifier_block *this, unsigned long event,
1584 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1585 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1590 if (event == NETDEV_REGISTER) {
1591 in_dev = inetdev_init(dev);
1593 return notifier_from_errno(PTR_ERR(in_dev));
1594 if (dev->flags & IFF_LOOPBACK) {
1595 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1596 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1598 } else if (event == NETDEV_CHANGEMTU) {
1599 /* Re-enabling IP */
1600 if (inetdev_valid_mtu(dev->mtu))
1601 in_dev = inetdev_init(dev);
1607 case NETDEV_REGISTER:
1608 pr_debug("%s: bug\n", __func__);
1609 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1612 if (!inetdev_valid_mtu(dev->mtu))
1614 if (dev->flags & IFF_LOOPBACK) {
1615 struct in_ifaddr *ifa = inet_alloc_ifa(in_dev);
1619 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1620 ifa->ifa_prefixlen = 8;
1621 ifa->ifa_mask = inet_make_mask(8);
1622 ifa->ifa_scope = RT_SCOPE_HOST;
1623 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1624 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1625 INFINITY_LIFE_TIME);
1626 ipv4_devconf_setall(in_dev);
1627 neigh_parms_data_state_setall(in_dev->arp_parms);
1628 inet_insert_ifa(ifa);
1633 case NETDEV_CHANGEADDR:
1634 if (!IN_DEV_ARP_NOTIFY(in_dev))
1637 case NETDEV_NOTIFY_PEERS:
1638 /* Send gratuitous ARP to notify of link change */
1639 inetdev_send_gratuitous_arp(dev, in_dev);
1644 case NETDEV_PRE_TYPE_CHANGE:
1645 ip_mc_unmap(in_dev);
1647 case NETDEV_POST_TYPE_CHANGE:
1648 ip_mc_remap(in_dev);
1650 case NETDEV_CHANGEMTU:
1651 if (inetdev_valid_mtu(dev->mtu))
1653 /* disable IP when MTU is not enough */
1655 case NETDEV_UNREGISTER:
1656 inetdev_destroy(in_dev);
1658 case NETDEV_CHANGENAME:
1659 /* Do not notify about label change, this event is
1660 * not interesting to applications using netlink.
1662 inetdev_changename(dev, in_dev);
1664 devinet_sysctl_unregister(in_dev);
1665 devinet_sysctl_register(in_dev);
1672 static struct notifier_block ip_netdev_notifier = {
1673 .notifier_call = inetdev_event,
1676 static size_t inet_nlmsg_size(void)
1678 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1679 + nla_total_size(4) /* IFA_ADDRESS */
1680 + nla_total_size(4) /* IFA_LOCAL */
1681 + nla_total_size(4) /* IFA_BROADCAST */
1682 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1683 + nla_total_size(4) /* IFA_FLAGS */
1684 + nla_total_size(1) /* IFA_PROTO */
1685 + nla_total_size(4) /* IFA_RT_PRIORITY */
1686 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1689 static inline u32 cstamp_delta(unsigned long cstamp)
1691 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1694 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1695 unsigned long tstamp, u32 preferred, u32 valid)
1697 struct ifa_cacheinfo ci;
1699 ci.cstamp = cstamp_delta(cstamp);
1700 ci.tstamp = cstamp_delta(tstamp);
1701 ci.ifa_prefered = preferred;
1702 ci.ifa_valid = valid;
1704 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1707 static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
1708 struct inet_fill_args *args)
1710 struct ifaddrmsg *ifm;
1711 struct nlmsghdr *nlh;
1712 unsigned long tstamp;
1713 u32 preferred, valid;
1716 nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1721 ifm = nlmsg_data(nlh);
1722 ifm->ifa_family = AF_INET;
1723 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1725 flags = READ_ONCE(ifa->ifa_flags);
1726 /* Warning : ifm->ifa_flags is an __u8, it holds only 8 bits.
1727 * The 32bit value is given in IFA_FLAGS attribute.
1729 ifm->ifa_flags = (__u8)flags;
1731 ifm->ifa_scope = ifa->ifa_scope;
1732 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1734 if (args->netnsid >= 0 &&
1735 nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1736 goto nla_put_failure;
1738 tstamp = READ_ONCE(ifa->ifa_tstamp);
1739 if (!(flags & IFA_F_PERMANENT)) {
1740 preferred = READ_ONCE(ifa->ifa_preferred_lft);
1741 valid = READ_ONCE(ifa->ifa_valid_lft);
1742 if (preferred != INFINITY_LIFE_TIME) {
1743 long tval = (jiffies - tstamp) / HZ;
1745 if (preferred > tval)
1749 if (valid != INFINITY_LIFE_TIME) {
1757 preferred = INFINITY_LIFE_TIME;
1758 valid = INFINITY_LIFE_TIME;
1760 if ((ifa->ifa_address &&
1761 nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1763 nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1764 (ifa->ifa_broadcast &&
1765 nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1766 (ifa->ifa_label[0] &&
1767 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1769 nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1770 nla_put_u32(skb, IFA_FLAGS, flags) ||
1771 (ifa->ifa_rt_priority &&
1772 nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1773 put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
1775 goto nla_put_failure;
1777 nlmsg_end(skb, nlh);
1781 nlmsg_cancel(skb, nlh);
1785 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1786 struct inet_fill_args *fillargs,
1787 struct net **tgt_net, struct sock *sk,
1788 struct netlink_callback *cb)
1790 struct netlink_ext_ack *extack = cb->extack;
1791 struct nlattr *tb[IFA_MAX+1];
1792 struct ifaddrmsg *ifm;
1795 ifm = nlmsg_payload(nlh, sizeof(*ifm));
1797 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1801 if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1802 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1806 fillargs->ifindex = ifm->ifa_index;
1807 if (fillargs->ifindex) {
1808 cb->answer_flags |= NLM_F_DUMP_FILTERED;
1809 fillargs->flags |= NLM_F_DUMP_FILTERED;
1812 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1813 ifa_ipv4_policy, extack);
1817 for (i = 0; i <= IFA_MAX; ++i) {
1821 if (i == IFA_TARGET_NETNSID) {
1824 fillargs->netnsid = nla_get_s32(tb[i]);
1826 net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1828 fillargs->netnsid = -1;
1829 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1830 return PTR_ERR(net);
1834 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1842 static int in_dev_dump_ifmcaddr(struct in_device *in_dev, struct sk_buff *skb,
1843 struct netlink_callback *cb, int *s_ip_idx,
1844 struct inet_fill_args *fillargs)
1846 struct ip_mc_list *im;
1850 for (im = rcu_dereference(in_dev->mc_list);
1852 im = rcu_dereference(im->next_rcu)) {
1853 if (ip_idx < *s_ip_idx) {
1857 err = inet_fill_ifmcaddr(skb, in_dev->dev, im, fillargs);
1861 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1871 static int in_dev_dump_ifaddr(struct in_device *in_dev, struct sk_buff *skb,
1872 struct netlink_callback *cb, int *s_ip_idx,
1873 struct inet_fill_args *fillargs)
1875 struct in_ifaddr *ifa;
1879 in_dev_for_each_ifa_rcu(ifa, in_dev) {
1880 if (ip_idx < *s_ip_idx) {
1884 err = inet_fill_ifaddr(skb, ifa, fillargs);
1888 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1899 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1900 struct netlink_callback *cb, int *s_ip_idx,
1901 struct inet_fill_args *fillargs)
1903 switch (fillargs->event) {
1905 return in_dev_dump_ifaddr(in_dev, skb, cb, s_ip_idx, fillargs);
1906 case RTM_GETMULTICAST:
1907 return in_dev_dump_ifmcaddr(in_dev, skb, cb, s_ip_idx,
1914 /* Combine dev_addr_genid and dev_base_seq to detect changes.
1916 static u32 inet_base_seq(const struct net *net)
1918 u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1919 READ_ONCE(net->dev_base_seq);
1921 /* Must not return 0 (see nl_dump_check_consistent()).
1922 * Chose a value far away from 0.
1929 static int inet_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
1932 const struct nlmsghdr *nlh = cb->nlh;
1933 struct inet_fill_args fillargs = {
1934 .portid = NETLINK_CB(cb->skb).portid,
1935 .seq = nlh->nlmsg_seq,
1937 .flags = NLM_F_MULTI,
1940 struct net *net = sock_net(skb->sk);
1941 struct net *tgt_net = net;
1943 unsigned long ifindex;
1945 } *ctx = (void *)cb->ctx;
1946 struct in_device *in_dev;
1947 struct net_device *dev;
1951 if (cb->strict_check) {
1952 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1957 if (fillargs.ifindex) {
1958 dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
1963 in_dev = __in_dev_get_rcu(dev);
1966 err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1972 cb->seq = inet_base_seq(tgt_net);
1974 for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
1975 in_dev = __in_dev_get_rcu(dev);
1978 err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1984 if (fillargs.netnsid >= 0)
1990 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1992 return inet_dump_addr(skb, cb, RTM_NEWADDR);
1995 static int inet_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
1997 return inet_dump_addr(skb, cb, RTM_GETMULTICAST);
2000 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
2003 struct inet_fill_args fillargs = {
2005 .seq = nlh ? nlh->nlmsg_seq : 0,
2010 struct sk_buff *skb;
2014 net = dev_net(ifa->ifa_dev->dev);
2015 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
2019 err = inet_fill_ifaddr(skb, ifa, &fillargs);
2021 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
2022 WARN_ON(err == -EMSGSIZE);
2026 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
2029 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
2032 static size_t inet_get_link_af_size(const struct net_device *dev,
2033 u32 ext_filter_mask)
2035 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
2040 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
2043 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
2044 u32 ext_filter_mask)
2046 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
2053 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
2057 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
2058 ((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
2063 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
2064 [IFLA_INET_CONF] = { .type = NLA_NESTED },
2067 static int inet_validate_link_af(const struct net_device *dev,
2068 const struct nlattr *nla,
2069 struct netlink_ext_ack *extack)
2071 struct nlattr *a, *tb[IFLA_INET_MAX+1];
2074 if (dev && !__in_dev_get_rtnl(dev))
2075 return -EAFNOSUPPORT;
2077 err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
2078 inet_af_policy, extack);
2082 if (tb[IFLA_INET_CONF]) {
2083 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
2084 int cfgid = nla_type(a);
2089 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2097 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2098 struct netlink_ext_ack *extack)
2100 struct in_device *in_dev = __in_dev_get_rtnl(dev);
2101 struct nlattr *a, *tb[IFLA_INET_MAX+1];
2105 return -EAFNOSUPPORT;
2107 if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2110 if (tb[IFLA_INET_CONF]) {
2111 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2112 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2118 static int inet_netconf_msgsize_devconf(int type)
2120 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2121 + nla_total_size(4); /* NETCONFA_IFINDEX */
2124 if (type == NETCONFA_ALL)
2127 if (all || type == NETCONFA_FORWARDING)
2128 size += nla_total_size(4);
2129 if (all || type == NETCONFA_RP_FILTER)
2130 size += nla_total_size(4);
2131 if (all || type == NETCONFA_MC_FORWARDING)
2132 size += nla_total_size(4);
2133 if (all || type == NETCONFA_BC_FORWARDING)
2134 size += nla_total_size(4);
2135 if (all || type == NETCONFA_PROXY_NEIGH)
2136 size += nla_total_size(4);
2137 if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2138 size += nla_total_size(4);
2143 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2144 const struct ipv4_devconf *devconf,
2145 u32 portid, u32 seq, int event,
2146 unsigned int flags, int type)
2148 struct nlmsghdr *nlh;
2149 struct netconfmsg *ncm;
2152 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2157 if (type == NETCONFA_ALL)
2160 ncm = nlmsg_data(nlh);
2161 ncm->ncm_family = AF_INET;
2163 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2164 goto nla_put_failure;
2169 if ((all || type == NETCONFA_FORWARDING) &&
2170 nla_put_s32(skb, NETCONFA_FORWARDING,
2171 IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
2172 goto nla_put_failure;
2173 if ((all || type == NETCONFA_RP_FILTER) &&
2174 nla_put_s32(skb, NETCONFA_RP_FILTER,
2175 IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
2176 goto nla_put_failure;
2177 if ((all || type == NETCONFA_MC_FORWARDING) &&
2178 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2179 IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
2180 goto nla_put_failure;
2181 if ((all || type == NETCONFA_BC_FORWARDING) &&
2182 nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2183 IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
2184 goto nla_put_failure;
2185 if ((all || type == NETCONFA_PROXY_NEIGH) &&
2186 nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2187 IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
2188 goto nla_put_failure;
2189 if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2190 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2191 IPV4_DEVCONF_RO(*devconf,
2192 IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2193 goto nla_put_failure;
2196 nlmsg_end(skb, nlh);
2200 nlmsg_cancel(skb, nlh);
2204 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2205 int ifindex, struct ipv4_devconf *devconf)
2207 struct sk_buff *skb;
2210 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2214 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2217 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2218 WARN_ON(err == -EMSGSIZE);
2222 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2225 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2228 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2229 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
2230 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
2231 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
2232 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
2233 [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) },
2236 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2237 const struct nlmsghdr *nlh,
2239 struct netlink_ext_ack *extack)
2243 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2244 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2248 if (!netlink_strict_get_check(skb))
2249 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2251 devconf_ipv4_policy, extack);
2253 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2255 devconf_ipv4_policy, extack);
2259 for (i = 0; i <= NETCONFA_MAX; i++) {
2264 case NETCONFA_IFINDEX:
2267 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2275 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2276 struct nlmsghdr *nlh,
2277 struct netlink_ext_ack *extack)
2279 struct net *net = sock_net(in_skb->sk);
2280 struct nlattr *tb[NETCONFA_MAX + 1];
2281 const struct ipv4_devconf *devconf;
2282 struct in_device *in_dev = NULL;
2283 struct net_device *dev = NULL;
2284 struct sk_buff *skb;
2288 err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2292 if (!tb[NETCONFA_IFINDEX])
2295 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2297 case NETCONFA_IFINDEX_ALL:
2298 devconf = net->ipv4.devconf_all;
2300 case NETCONFA_IFINDEX_DEFAULT:
2301 devconf = net->ipv4.devconf_dflt;
2305 dev = dev_get_by_index(net, ifindex);
2307 in_dev = in_dev_get(dev);
2310 devconf = &in_dev->cnf;
2315 skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2319 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2320 NETLINK_CB(in_skb).portid,
2321 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2324 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2325 WARN_ON(err == -EMSGSIZE);
2329 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2337 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2338 struct netlink_callback *cb)
2340 const struct nlmsghdr *nlh = cb->nlh;
2341 struct net *net = sock_net(skb->sk);
2343 unsigned long ifindex;
2344 unsigned int all_default;
2345 } *ctx = (void *)cb->ctx;
2346 const struct in_device *in_dev;
2347 struct net_device *dev;
2350 if (cb->strict_check) {
2351 struct netlink_ext_ack *extack = cb->extack;
2352 struct netconfmsg *ncm;
2354 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2355 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2359 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2360 NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2366 for_each_netdev_dump(net, dev, ctx->ifindex) {
2367 in_dev = __in_dev_get_rcu(dev);
2370 err = inet_netconf_fill_devconf(skb, dev->ifindex,
2372 NETLINK_CB(cb->skb).portid,
2374 RTM_NEWNETCONF, NLM_F_MULTI,
2379 if (ctx->all_default == 0) {
2380 err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2381 net->ipv4.devconf_all,
2382 NETLINK_CB(cb->skb).portid,
2384 RTM_NEWNETCONF, NLM_F_MULTI,
2390 if (ctx->all_default == 1) {
2391 err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2392 net->ipv4.devconf_dflt,
2393 NETLINK_CB(cb->skb).portid,
2395 RTM_NEWNETCONF, NLM_F_MULTI,
2406 #ifdef CONFIG_SYSCTL
2408 static void devinet_copy_dflt_conf(struct net *net, int i)
2410 struct net_device *dev;
2413 for_each_netdev_rcu(net, dev) {
2414 struct in_device *in_dev;
2416 in_dev = __in_dev_get_rcu(dev);
2417 if (in_dev && !test_bit(i, in_dev->cnf.state))
2418 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2423 /* called with RTNL locked */
2424 static void inet_forward_change(struct net *net)
2426 struct net_device *dev;
2427 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2429 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2430 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2431 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2432 NETCONFA_FORWARDING,
2433 NETCONFA_IFINDEX_ALL,
2434 net->ipv4.devconf_all);
2435 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2436 NETCONFA_FORWARDING,
2437 NETCONFA_IFINDEX_DEFAULT,
2438 net->ipv4.devconf_dflt);
2440 for_each_netdev(net, dev) {
2441 struct in_device *in_dev;
2444 dev_disable_lro(dev);
2446 in_dev = __in_dev_get_rtnl_net(dev);
2448 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2449 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2450 NETCONFA_FORWARDING,
2451 dev->ifindex, &in_dev->cnf);
2456 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2458 if (cnf == net->ipv4.devconf_dflt)
2459 return NETCONFA_IFINDEX_DEFAULT;
2460 else if (cnf == net->ipv4.devconf_all)
2461 return NETCONFA_IFINDEX_ALL;
2463 struct in_device *idev
2464 = container_of(cnf, struct in_device, cnf);
2465 return idev->dev->ifindex;
2469 static int devinet_conf_proc(const struct ctl_table *ctl, int write,
2470 void *buffer, size_t *lenp, loff_t *ppos)
2472 int old_value = *(int *)ctl->data;
2473 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2474 int new_value = *(int *)ctl->data;
2477 struct ipv4_devconf *cnf = ctl->extra1;
2478 struct net *net = ctl->extra2;
2479 int i = (int *)ctl->data - cnf->data;
2482 set_bit(i, cnf->state);
2484 if (cnf == net->ipv4.devconf_dflt)
2485 devinet_copy_dflt_conf(net, i);
2486 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2487 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2488 if ((new_value == 0) && (old_value != 0))
2489 rt_cache_flush(net);
2491 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2492 new_value != old_value)
2493 rt_cache_flush(net);
2495 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2496 new_value != old_value) {
2497 ifindex = devinet_conf_ifindex(net, cnf);
2498 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2502 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2503 new_value != old_value) {
2504 ifindex = devinet_conf_ifindex(net, cnf);
2505 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2506 NETCONFA_PROXY_NEIGH,
2509 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2510 new_value != old_value) {
2511 ifindex = devinet_conf_ifindex(net, cnf);
2512 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2513 NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2521 static int devinet_sysctl_forward(const struct ctl_table *ctl, int write,
2522 void *buffer, size_t *lenp, loff_t *ppos)
2524 int *valp = ctl->data;
2527 struct net *net = ctl->extra2;
2530 if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2533 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2535 if (write && *valp != val) {
2536 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2537 if (!rtnl_net_trylock(net)) {
2538 /* Restore the original values before restarting */
2541 return restart_syscall();
2543 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2544 inet_forward_change(net);
2546 struct ipv4_devconf *cnf = ctl->extra1;
2547 struct in_device *idev =
2548 container_of(cnf, struct in_device, cnf);
2550 dev_disable_lro(idev->dev);
2551 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2552 NETCONFA_FORWARDING,
2556 rtnl_net_unlock(net);
2557 rt_cache_flush(net);
2559 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2560 NETCONFA_FORWARDING,
2561 NETCONFA_IFINDEX_DEFAULT,
2562 net->ipv4.devconf_dflt);
2568 static int ipv4_doint_and_flush(const struct ctl_table *ctl, int write,
2569 void *buffer, size_t *lenp, loff_t *ppos)
2571 int *valp = ctl->data;
2573 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2574 struct net *net = ctl->extra2;
2576 if (write && *valp != val)
2577 rt_cache_flush(net);
2582 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2585 .data = ipv4_devconf.data + \
2586 IPV4_DEVCONF_ ## attr - 1, \
2587 .maxlen = sizeof(int), \
2589 .proc_handler = proc, \
2590 .extra1 = &ipv4_devconf, \
2593 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2594 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2596 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2597 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2599 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2600 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2602 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2603 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2605 static struct devinet_sysctl_table {
2606 struct ctl_table_header *sysctl_header;
2607 struct ctl_table devinet_vars[IPV4_DEVCONF_MAX];
2608 } devinet_sysctl = {
2610 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2611 devinet_sysctl_forward),
2612 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2613 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2615 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2616 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2617 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2618 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2619 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2620 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2621 "accept_source_route"),
2622 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2623 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2624 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2625 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2626 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2627 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2628 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2629 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2630 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2631 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2632 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2633 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2634 DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2635 "arp_evict_nocarrier"),
2636 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2637 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2638 "force_igmp_version"),
2639 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2640 "igmpv2_unsolicited_report_interval"),
2641 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2642 "igmpv3_unsolicited_report_interval"),
2643 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2644 "ignore_routes_with_linkdown"),
2645 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2646 "drop_gratuitous_arp"),
2648 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2649 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2650 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2651 "promote_secondaries"),
2652 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2654 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2655 "drop_unicast_in_l2_multicast"),
2659 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2660 int ifindex, struct ipv4_devconf *p)
2663 struct devinet_sysctl_table *t;
2664 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2666 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2670 for (i = 0; i < ARRAY_SIZE(t->devinet_vars); i++) {
2671 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2672 t->devinet_vars[i].extra1 = p;
2673 t->devinet_vars[i].extra2 = net;
2676 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2678 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2679 if (!t->sysctl_header)
2684 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2694 static void __devinet_sysctl_unregister(struct net *net,
2695 struct ipv4_devconf *cnf, int ifindex)
2697 struct devinet_sysctl_table *t = cnf->sysctl;
2701 unregister_net_sysctl_table(t->sysctl_header);
2705 inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2708 static int devinet_sysctl_register(struct in_device *idev)
2712 if (!sysctl_dev_name_is_allowed(idev->dev->name))
2715 err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2718 err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2719 idev->dev->ifindex, &idev->cnf);
2721 neigh_sysctl_unregister(idev->arp_parms);
2725 static void devinet_sysctl_unregister(struct in_device *idev)
2727 struct net *net = dev_net(idev->dev);
2729 __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2730 neigh_sysctl_unregister(idev->arp_parms);
2733 static struct ctl_table ctl_forward_entry[] = {
2735 .procname = "ip_forward",
2736 .data = &ipv4_devconf.data[
2737 IPV4_DEVCONF_FORWARDING - 1],
2738 .maxlen = sizeof(int),
2740 .proc_handler = devinet_sysctl_forward,
2741 .extra1 = &ipv4_devconf,
2742 .extra2 = &init_net,
2747 static __net_init int devinet_init_net(struct net *net)
2749 #ifdef CONFIG_SYSCTL
2750 struct ctl_table_header *forw_hdr;
2751 struct ctl_table *tbl;
2753 struct ipv4_devconf *all, *dflt;
2758 net->ipv4.inet_addr_lst = kmalloc_array(IN4_ADDR_HSIZE,
2759 sizeof(struct hlist_head),
2761 if (!net->ipv4.inet_addr_lst)
2762 goto err_alloc_hash;
2764 all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2768 dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2770 goto err_alloc_dflt;
2772 #ifdef CONFIG_SYSCTL
2773 tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2777 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2778 tbl[0].extra1 = all;
2779 tbl[0].extra2 = net;
2782 if (!net_eq(net, &init_net)) {
2783 switch (net_inherit_devconf()) {
2785 /* copy from the current netns */
2786 memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2787 sizeof(ipv4_devconf));
2789 current->nsproxy->net_ns->ipv4.devconf_dflt,
2790 sizeof(ipv4_devconf_dflt));
2794 /* copy from init_net */
2795 memcpy(all, init_net.ipv4.devconf_all,
2796 sizeof(ipv4_devconf));
2797 memcpy(dflt, init_net.ipv4.devconf_dflt,
2798 sizeof(ipv4_devconf_dflt));
2801 /* use compiled values */
2806 #ifdef CONFIG_SYSCTL
2807 err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2811 err = __devinet_sysctl_register(net, "default",
2812 NETCONFA_IFINDEX_DEFAULT, dflt);
2817 forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2818 ARRAY_SIZE(ctl_forward_entry));
2821 net->ipv4.forw_hdr = forw_hdr;
2824 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2825 INIT_HLIST_HEAD(&net->ipv4.inet_addr_lst[i]);
2827 INIT_DEFERRABLE_WORK(&net->ipv4.addr_chk_work, check_lifetime);
2829 net->ipv4.devconf_all = all;
2830 net->ipv4.devconf_dflt = dflt;
2833 #ifdef CONFIG_SYSCTL
2835 __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2837 __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2846 kfree(net->ipv4.inet_addr_lst);
2851 static __net_exit void devinet_exit_net(struct net *net)
2853 #ifdef CONFIG_SYSCTL
2854 const struct ctl_table *tbl;
2857 cancel_delayed_work_sync(&net->ipv4.addr_chk_work);
2859 #ifdef CONFIG_SYSCTL
2860 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2861 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2862 __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2863 NETCONFA_IFINDEX_DEFAULT);
2864 __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2865 NETCONFA_IFINDEX_ALL);
2868 kfree(net->ipv4.devconf_dflt);
2869 kfree(net->ipv4.devconf_all);
2870 kfree(net->ipv4.inet_addr_lst);
2873 static __net_initdata struct pernet_operations devinet_ops = {
2874 .init = devinet_init_net,
2875 .exit = devinet_exit_net,
2878 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2880 .fill_link_af = inet_fill_link_af,
2881 .get_link_af_size = inet_get_link_af_size,
2882 .validate_link_af = inet_validate_link_af,
2883 .set_link_af = inet_set_link_af,
2886 static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = {
2887 {.protocol = PF_INET, .msgtype = RTM_NEWADDR, .doit = inet_rtm_newaddr,
2888 .flags = RTNL_FLAG_DOIT_PERNET},
2889 {.protocol = PF_INET, .msgtype = RTM_DELADDR, .doit = inet_rtm_deladdr,
2890 .flags = RTNL_FLAG_DOIT_PERNET},
2891 {.protocol = PF_INET, .msgtype = RTM_GETADDR, .dumpit = inet_dump_ifaddr,
2892 .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
2893 {.protocol = PF_INET, .msgtype = RTM_GETNETCONF,
2894 .doit = inet_netconf_get_devconf, .dumpit = inet_netconf_dump_devconf,
2895 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
2896 {.owner = THIS_MODULE, .protocol = PF_INET, .msgtype = RTM_GETMULTICAST,
2897 .dumpit = inet_dump_ifmcaddr, .flags = RTNL_FLAG_DUMP_UNLOCKED},
2900 void __init devinet_init(void)
2902 register_pernet_subsys(&devinet_ops);
2903 register_netdevice_notifier(&ip_netdev_notifier);
2905 if (rtnl_af_register(&inet_af_ops))
2906 panic("Unable to register inet_af_ops\n");
2908 rtnl_register_many(devinet_rtnl_msg_handlers);