1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * NET3 IP device support routines.
5 * Derived from the IP parts of dev.c 1.0.19
7 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8 * Mark Evans, <evansmp@uhura.aston.ac.uk>
11 * Alan Cox, <gw4pts@gw4pts.ampr.org>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
15 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
17 * Cyrus Durgin: updated for kmod
18 * Matthias Andree: in devinet_ioctl, compare label and
19 * address (4.4BSD alias style support),
20 * fall back to comparing just the label
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
52 #include <linux/sysctl.h>
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
65 #define IPV6ONLY_FLAGS \
66 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
70 static struct ipv4_devconf ipv4_devconf = {
72 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
78 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
82 static struct ipv4_devconf ipv4_devconf_dflt = {
84 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
91 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 [IFA_LOCAL] = { .type = NLA_U32 },
100 [IFA_ADDRESS] = { .type = NLA_U32 },
101 [IFA_BROADCAST] = { .type = NLA_U32 },
102 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
104 [IFA_FLAGS] = { .type = NLA_U32 },
105 [IFA_RT_PRIORITY] = { .type = NLA_U32 },
106 [IFA_TARGET_NETNSID] = { .type = NLA_S32 },
107 [IFA_PROTO] = { .type = NLA_U8 },
110 struct inet_fill_args {
119 #define IN4_ADDR_HSIZE_SHIFT 8
120 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
126 u32 val = (__force u32) addr ^ net_hash_mix(net);
128 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
133 u32 hash = inet_addr_hash(net, ifa->ifa_local);
136 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
139 static void inet_hash_remove(struct in_ifaddr *ifa)
142 hlist_del_init_rcu(&ifa->hash);
146 * __ip_dev_find - find the first device with a given source address.
147 * @net: the net namespace
148 * @addr: the source address
149 * @devref: if true, take a reference on the found device
151 * If a caller uses devref=false, it should be protected by RCU, or RTNL
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
155 struct net_device *result = NULL;
156 struct in_ifaddr *ifa;
159 ifa = inet_lookup_ifaddr_rcu(net, addr);
161 struct flowi4 fl4 = { .daddr = addr };
162 struct fib_result res = { 0 };
163 struct fib_table *local;
165 /* Fallback to FIB local table so that communication
166 * over loopback subnets work.
168 local = fib_get_table(net, RT_TABLE_LOCAL);
170 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171 res.type == RTN_LOCAL)
172 result = FIB_RES_DEV(res);
174 result = ifa->ifa_dev->dev;
176 if (result && devref)
181 EXPORT_SYMBOL(__ip_dev_find);
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
186 u32 hash = inet_addr_hash(net, addr);
187 struct in_ifaddr *ifa;
189 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190 if (ifa->ifa_local == addr &&
191 net_eq(dev_net(ifa->ifa_dev->dev), net))
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202 struct in_ifaddr __rcu **ifap,
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
208 static int devinet_sysctl_register(struct in_device *idev)
212 static void devinet_sysctl_unregister(struct in_device *idev)
217 /* Locks all the inet devices. */
219 static struct in_ifaddr *inet_alloc_ifa(void)
221 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
224 static void inet_rcu_free_ifa(struct rcu_head *head)
226 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
229 in_dev_put(ifa->ifa_dev);
233 static void inet_free_ifa(struct in_ifaddr *ifa)
235 /* Our reference to ifa->ifa_dev must be freed ASAP
236 * to release the reference to the netdev the same way.
237 * in_dev_put() -> in_dev_finish_destroy() -> netdev_put()
239 call_rcu_hurry(&ifa->rcu_head, inet_rcu_free_ifa);
242 static void in_dev_free_rcu(struct rcu_head *head)
244 struct in_device *idev = container_of(head, struct in_device, rcu_head);
246 kfree(rcu_dereference_protected(idev->mc_hash, 1));
250 void in_dev_finish_destroy(struct in_device *idev)
252 struct net_device *dev = idev->dev;
254 WARN_ON(idev->ifa_list);
255 WARN_ON(idev->mc_list);
256 #ifdef NET_REFCNT_DEBUG
257 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
259 netdev_put(dev, &idev->dev_tracker);
261 pr_err("Freeing alive in_device %p\n", idev);
263 call_rcu(&idev->rcu_head, in_dev_free_rcu);
265 EXPORT_SYMBOL(in_dev_finish_destroy);
267 static struct in_device *inetdev_init(struct net_device *dev)
269 struct in_device *in_dev;
274 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
277 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
278 sizeof(in_dev->cnf));
279 in_dev->cnf.sysctl = NULL;
281 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
282 if (!in_dev->arp_parms)
284 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
285 dev_disable_lro(dev);
286 /* Reference in_dev->dev */
287 netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
288 /* Account for reference dev->ip_ptr (below) */
289 refcount_set(&in_dev->refcnt, 1);
291 err = devinet_sysctl_register(in_dev);
294 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
299 ip_mc_init_dev(in_dev);
300 if (dev->flags & IFF_UP)
303 /* we can receive as soon as ip_ptr is set -- do this last */
304 rcu_assign_pointer(dev->ip_ptr, in_dev);
306 return in_dev ?: ERR_PTR(err);
313 static void inetdev_destroy(struct in_device *in_dev)
315 struct net_device *dev;
316 struct in_ifaddr *ifa;
324 ip_mc_destroy_dev(in_dev);
326 while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
327 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
331 RCU_INIT_POINTER(dev->ip_ptr, NULL);
333 devinet_sysctl_unregister(in_dev);
334 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
340 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
342 const struct in_ifaddr *ifa;
345 in_dev_for_each_ifa_rcu(ifa, in_dev) {
346 if (inet_ifa_match(a, ifa)) {
347 if (!b || inet_ifa_match(b, ifa)) {
357 static void __inet_del_ifa(struct in_device *in_dev,
358 struct in_ifaddr __rcu **ifap,
359 int destroy, struct nlmsghdr *nlh, u32 portid)
361 struct in_ifaddr *promote = NULL;
362 struct in_ifaddr *ifa, *ifa1;
363 struct in_ifaddr __rcu **last_prim;
364 struct in_ifaddr *prev_prom = NULL;
365 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
369 ifa1 = rtnl_dereference(*ifap);
374 /* 1. Deleting primary ifaddr forces deletion all secondaries
375 * unless alias promotion is set
378 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
379 struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
381 while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
382 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
383 ifa1->ifa_scope <= ifa->ifa_scope)
384 last_prim = &ifa->ifa_next;
386 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
387 ifa1->ifa_mask != ifa->ifa_mask ||
388 !inet_ifa_match(ifa1->ifa_address, ifa)) {
389 ifap1 = &ifa->ifa_next;
395 inet_hash_remove(ifa);
396 *ifap1 = ifa->ifa_next;
398 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
399 blocking_notifier_call_chain(&inetaddr_chain,
409 /* On promotion all secondaries from subnet are changing
410 * the primary IP, we must remove all their routes silently
411 * and later to add them back with new prefsrc. Do this
412 * while all addresses are on the device list.
414 for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
415 if (ifa1->ifa_mask == ifa->ifa_mask &&
416 inet_ifa_match(ifa1->ifa_address, ifa))
417 fib_del_ifaddr(ifa, ifa1);
423 *ifap = ifa1->ifa_next;
424 inet_hash_remove(ifa1);
426 /* 3. Announce address deletion */
428 /* Send message first, then call notifier.
429 At first sight, FIB update triggered by notifier
430 will refer to already deleted ifaddr, that could confuse
431 netlink listeners. It is not true: look, gated sees
432 that route deleted and if it still thinks that ifaddr
433 is valid, it will try to restore deleted routes... Grr.
434 So that, this order is correct.
436 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
437 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
440 struct in_ifaddr *next_sec;
442 next_sec = rtnl_dereference(promote->ifa_next);
444 struct in_ifaddr *last_sec;
446 rcu_assign_pointer(prev_prom->ifa_next, next_sec);
448 last_sec = rtnl_dereference(*last_prim);
449 rcu_assign_pointer(promote->ifa_next, last_sec);
450 rcu_assign_pointer(*last_prim, promote);
453 promote->ifa_flags &= ~IFA_F_SECONDARY;
454 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
455 blocking_notifier_call_chain(&inetaddr_chain,
457 for (ifa = next_sec; ifa;
458 ifa = rtnl_dereference(ifa->ifa_next)) {
459 if (ifa1->ifa_mask != ifa->ifa_mask ||
460 !inet_ifa_match(ifa1->ifa_address, ifa))
470 static void inet_del_ifa(struct in_device *in_dev,
471 struct in_ifaddr __rcu **ifap,
474 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
477 static void check_lifetime(struct work_struct *work);
479 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
481 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
482 u32 portid, struct netlink_ext_ack *extack)
484 struct in_ifaddr __rcu **last_primary, **ifap;
485 struct in_device *in_dev = ifa->ifa_dev;
486 struct in_validator_info ivi;
487 struct in_ifaddr *ifa1;
492 if (!ifa->ifa_local) {
497 ifa->ifa_flags &= ~IFA_F_SECONDARY;
498 last_primary = &in_dev->ifa_list;
500 /* Don't set IPv6 only flags to IPv4 addresses */
501 ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
503 ifap = &in_dev->ifa_list;
504 ifa1 = rtnl_dereference(*ifap);
507 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
508 ifa->ifa_scope <= ifa1->ifa_scope)
509 last_primary = &ifa1->ifa_next;
510 if (ifa1->ifa_mask == ifa->ifa_mask &&
511 inet_ifa_match(ifa1->ifa_address, ifa)) {
512 if (ifa1->ifa_local == ifa->ifa_local) {
516 if (ifa1->ifa_scope != ifa->ifa_scope) {
517 NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
521 ifa->ifa_flags |= IFA_F_SECONDARY;
524 ifap = &ifa1->ifa_next;
525 ifa1 = rtnl_dereference(*ifap);
528 /* Allow any devices that wish to register ifaddr validtors to weigh
529 * in now, before changes are committed. The rntl lock is serializing
530 * access here, so the state should not change between a validator call
531 * and a final notify on commit. This isn't invoked on promotion under
532 * the assumption that validators are checking the address itself, and
535 ivi.ivi_addr = ifa->ifa_address;
536 ivi.ivi_dev = ifa->ifa_dev;
538 ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
540 ret = notifier_to_errno(ret);
546 if (!(ifa->ifa_flags & IFA_F_SECONDARY))
549 rcu_assign_pointer(ifa->ifa_next, *ifap);
550 rcu_assign_pointer(*ifap, ifa);
552 inet_hash_insert(dev_net(in_dev->dev), ifa);
554 cancel_delayed_work(&check_lifetime_work);
555 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
557 /* Send message first, then call notifier.
558 Notifier will trigger FIB update, so that
559 listeners of netlink will know about new ifaddr */
560 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
561 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
566 static int inet_insert_ifa(struct in_ifaddr *ifa)
568 return __inet_insert_ifa(ifa, NULL, 0, NULL);
571 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
573 struct in_device *in_dev = __in_dev_get_rtnl(dev);
581 ipv4_devconf_setall(in_dev);
582 neigh_parms_data_state_setall(in_dev->arp_parms);
583 if (ifa->ifa_dev != in_dev) {
584 WARN_ON(ifa->ifa_dev);
586 ifa->ifa_dev = in_dev;
588 if (ipv4_is_loopback(ifa->ifa_local))
589 ifa->ifa_scope = RT_SCOPE_HOST;
590 return inet_insert_ifa(ifa);
593 /* Caller must hold RCU or RTNL :
594 * We dont take a reference on found in_device
596 struct in_device *inetdev_by_index(struct net *net, int ifindex)
598 struct net_device *dev;
599 struct in_device *in_dev = NULL;
602 dev = dev_get_by_index_rcu(net, ifindex);
604 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
608 EXPORT_SYMBOL(inetdev_by_index);
610 /* Called only from RTNL semaphored context. No locks. */
612 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
615 struct in_ifaddr *ifa;
619 in_dev_for_each_ifa_rtnl(ifa, in_dev) {
620 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
626 static int ip_mc_autojoin_config(struct net *net, bool join,
627 const struct in_ifaddr *ifa)
629 #if defined(CONFIG_IP_MULTICAST)
630 struct ip_mreqn mreq = {
631 .imr_multiaddr.s_addr = ifa->ifa_address,
632 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
634 struct sock *sk = net->ipv4.mc_autojoin_sk;
641 ret = ip_mc_join_group(sk, &mreq);
643 ret = ip_mc_leave_group(sk, &mreq);
652 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
653 struct netlink_ext_ack *extack)
655 struct net *net = sock_net(skb->sk);
656 struct in_ifaddr __rcu **ifap;
657 struct nlattr *tb[IFA_MAX+1];
658 struct in_device *in_dev;
659 struct ifaddrmsg *ifm;
660 struct in_ifaddr *ifa;
665 err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
666 ifa_ipv4_policy, extack);
670 ifm = nlmsg_data(nlh);
671 in_dev = inetdev_by_index(net, ifm->ifa_index);
673 NL_SET_ERR_MSG(extack, "ipv4: Device not found");
678 for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
679 ifap = &ifa->ifa_next) {
681 ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
684 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
687 if (tb[IFA_ADDRESS] &&
688 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
689 !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
692 if (ipv4_is_multicast(ifa->ifa_address))
693 ip_mc_autojoin_config(net, false, ifa);
694 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
698 NL_SET_ERR_MSG(extack, "ipv4: Address not found");
699 err = -EADDRNOTAVAIL;
704 #define INFINITY_LIFE_TIME 0xFFFFFFFF
706 static void check_lifetime(struct work_struct *work)
708 unsigned long now, next, next_sec, next_sched;
709 struct in_ifaddr *ifa;
710 struct hlist_node *n;
714 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
716 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
717 bool change_needed = false;
720 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
721 unsigned long age, tstamp;
726 flags = READ_ONCE(ifa->ifa_flags);
727 if (flags & IFA_F_PERMANENT)
730 preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
731 valid_lft = READ_ONCE(ifa->ifa_valid_lft);
732 tstamp = READ_ONCE(ifa->ifa_tstamp);
733 /* We try to batch several events at once. */
734 age = (now - tstamp +
735 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
737 if (valid_lft != INFINITY_LIFE_TIME &&
739 change_needed = true;
740 } else if (preferred_lft ==
741 INFINITY_LIFE_TIME) {
743 } else if (age >= preferred_lft) {
744 if (time_before(tstamp + valid_lft * HZ, next))
745 next = tstamp + valid_lft * HZ;
747 if (!(flags & IFA_F_DEPRECATED))
748 change_needed = true;
749 } else if (time_before(tstamp + preferred_lft * HZ,
751 next = tstamp + preferred_lft * HZ;
758 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
761 if (ifa->ifa_flags & IFA_F_PERMANENT)
764 /* We try to batch several events at once. */
765 age = (now - ifa->ifa_tstamp +
766 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
768 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
769 age >= ifa->ifa_valid_lft) {
770 struct in_ifaddr __rcu **ifap;
771 struct in_ifaddr *tmp;
773 ifap = &ifa->ifa_dev->ifa_list;
774 tmp = rtnl_dereference(*ifap);
777 inet_del_ifa(ifa->ifa_dev,
781 ifap = &tmp->ifa_next;
782 tmp = rtnl_dereference(*ifap);
784 } else if (ifa->ifa_preferred_lft !=
785 INFINITY_LIFE_TIME &&
786 age >= ifa->ifa_preferred_lft &&
787 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
788 ifa->ifa_flags |= IFA_F_DEPRECATED;
789 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
795 next_sec = round_jiffies_up(next);
798 /* If rounded timeout is accurate enough, accept it. */
799 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
800 next_sched = next_sec;
803 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
804 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
805 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
807 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
811 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
814 unsigned long timeout;
817 flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
819 timeout = addrconf_timeout_fixup(valid_lft, HZ);
820 if (addrconf_finite_timeout(timeout))
821 WRITE_ONCE(ifa->ifa_valid_lft, timeout);
823 flags |= IFA_F_PERMANENT;
825 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
826 if (addrconf_finite_timeout(timeout)) {
828 flags |= IFA_F_DEPRECATED;
829 WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
831 WRITE_ONCE(ifa->ifa_flags, flags);
832 WRITE_ONCE(ifa->ifa_tstamp, jiffies);
833 if (!ifa->ifa_cstamp)
834 WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
837 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
838 __u32 *pvalid_lft, __u32 *pprefered_lft,
839 struct netlink_ext_ack *extack)
841 struct nlattr *tb[IFA_MAX+1];
842 struct in_ifaddr *ifa;
843 struct ifaddrmsg *ifm;
844 struct net_device *dev;
845 struct in_device *in_dev;
848 err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
849 ifa_ipv4_policy, extack);
853 ifm = nlmsg_data(nlh);
856 if (ifm->ifa_prefixlen > 32) {
857 NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
861 if (!tb[IFA_LOCAL]) {
862 NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
866 dev = __dev_get_by_index(net, ifm->ifa_index);
869 NL_SET_ERR_MSG(extack, "ipv4: Device not found");
873 in_dev = __in_dev_get_rtnl(dev);
878 ifa = inet_alloc_ifa();
881 * A potential indev allocation can be left alive, it stays
882 * assigned to its device and is destroy with it.
886 ipv4_devconf_setall(in_dev);
887 neigh_parms_data_state_setall(in_dev->arp_parms);
890 if (!tb[IFA_ADDRESS])
891 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
893 INIT_HLIST_NODE(&ifa->hash);
894 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
895 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
896 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
898 ifa->ifa_scope = ifm->ifa_scope;
899 ifa->ifa_dev = in_dev;
901 ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
902 ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
904 if (tb[IFA_BROADCAST])
905 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
908 nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
910 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
912 if (tb[IFA_RT_PRIORITY])
913 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
916 ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
918 if (tb[IFA_CACHEINFO]) {
919 struct ifa_cacheinfo *ci;
921 ci = nla_data(tb[IFA_CACHEINFO]);
922 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
923 NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
927 *pvalid_lft = ci->ifa_valid;
928 *pprefered_lft = ci->ifa_prefered;
939 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
941 struct in_device *in_dev = ifa->ifa_dev;
942 struct in_ifaddr *ifa1;
947 in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
948 if (ifa1->ifa_mask == ifa->ifa_mask &&
949 inet_ifa_match(ifa1->ifa_address, ifa) &&
950 ifa1->ifa_local == ifa->ifa_local)
956 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
957 struct netlink_ext_ack *extack)
959 struct net *net = sock_net(skb->sk);
960 struct in_ifaddr *ifa;
961 struct in_ifaddr *ifa_existing;
962 __u32 valid_lft = INFINITY_LIFE_TIME;
963 __u32 prefered_lft = INFINITY_LIFE_TIME;
967 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
971 ifa_existing = find_matching_ifa(ifa);
973 /* It would be best to check for !NLM_F_CREATE here but
974 * userspace already relies on not having to provide this.
976 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
977 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
978 int ret = ip_mc_autojoin_config(net, true, ifa);
981 NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
986 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
989 u32 new_metric = ifa->ifa_rt_priority;
990 u8 new_proto = ifa->ifa_proto;
994 if (nlh->nlmsg_flags & NLM_F_EXCL ||
995 !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
996 NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
1001 if (ifa->ifa_rt_priority != new_metric) {
1002 fib_modify_prefix_metric(ifa, new_metric);
1003 ifa->ifa_rt_priority = new_metric;
1006 ifa->ifa_proto = new_proto;
1008 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1009 cancel_delayed_work(&check_lifetime_work);
1010 queue_delayed_work(system_power_efficient_wq,
1011 &check_lifetime_work, 0);
1012 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1018 * Determine a default network mask, based on the IP address.
1021 static int inet_abc_len(__be32 addr)
1023 int rc = -1; /* Something else, probably a multicast. */
1025 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1028 __u32 haddr = ntohl(addr);
1029 if (IN_CLASSA(haddr))
1031 else if (IN_CLASSB(haddr))
1033 else if (IN_CLASSC(haddr))
1035 else if (IN_CLASSE(haddr))
1043 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1045 struct sockaddr_in sin_orig;
1046 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1047 struct in_ifaddr __rcu **ifap = NULL;
1048 struct in_device *in_dev;
1049 struct in_ifaddr *ifa = NULL;
1050 struct net_device *dev;
1053 int tryaddrmatch = 0;
1055 ifr->ifr_name[IFNAMSIZ - 1] = 0;
1057 /* save original address for comparison */
1058 memcpy(&sin_orig, sin, sizeof(*sin));
1060 colon = strchr(ifr->ifr_name, ':');
1064 dev_load(net, ifr->ifr_name);
1067 case SIOCGIFADDR: /* Get interface address */
1068 case SIOCGIFBRDADDR: /* Get the broadcast address */
1069 case SIOCGIFDSTADDR: /* Get the destination address */
1070 case SIOCGIFNETMASK: /* Get the netmask for the interface */
1071 /* Note that these ioctls will not sleep,
1072 so that we do not impose a lock.
1073 One day we will be forced to put shlock here (I mean SMP)
1075 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1076 memset(sin, 0, sizeof(*sin));
1077 sin->sin_family = AF_INET;
1082 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1085 case SIOCSIFADDR: /* Set interface address (and family) */
1086 case SIOCSIFBRDADDR: /* Set the broadcast address */
1087 case SIOCSIFDSTADDR: /* Set the destination address */
1088 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1090 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1093 if (sin->sin_family != AF_INET)
1104 dev = __dev_get_by_name(net, ifr->ifr_name);
1111 in_dev = __in_dev_get_rtnl(dev);
1114 /* Matthias Andree */
1115 /* compare label and address (4.4BSD style) */
1116 /* note: we only do this for a limited set of ioctls
1117 and only if the original address family was AF_INET.
1118 This is checked above. */
1120 for (ifap = &in_dev->ifa_list;
1121 (ifa = rtnl_dereference(*ifap)) != NULL;
1122 ifap = &ifa->ifa_next) {
1123 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1124 sin_orig.sin_addr.s_addr ==
1130 /* we didn't get a match, maybe the application is
1131 4.3BSD-style and passed in junk so we fall back to
1132 comparing just the label */
1134 for (ifap = &in_dev->ifa_list;
1135 (ifa = rtnl_dereference(*ifap)) != NULL;
1136 ifap = &ifa->ifa_next)
1137 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1142 ret = -EADDRNOTAVAIL;
1143 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1147 case SIOCGIFADDR: /* Get interface address */
1149 sin->sin_addr.s_addr = ifa->ifa_local;
1152 case SIOCGIFBRDADDR: /* Get the broadcast address */
1154 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1157 case SIOCGIFDSTADDR: /* Get the destination address */
1159 sin->sin_addr.s_addr = ifa->ifa_address;
1162 case SIOCGIFNETMASK: /* Get the netmask for the interface */
1164 sin->sin_addr.s_addr = ifa->ifa_mask;
1169 ret = -EADDRNOTAVAIL;
1173 if (!(ifr->ifr_flags & IFF_UP))
1174 inet_del_ifa(in_dev, ifap, 1);
1177 ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1180 case SIOCSIFADDR: /* Set interface address (and family) */
1182 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1187 ifa = inet_alloc_ifa();
1190 INIT_HLIST_NODE(&ifa->hash);
1192 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1194 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1197 if (ifa->ifa_local == sin->sin_addr.s_addr)
1199 inet_del_ifa(in_dev, ifap, 0);
1200 ifa->ifa_broadcast = 0;
1204 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1206 if (!(dev->flags & IFF_POINTOPOINT)) {
1207 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1208 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1209 if ((dev->flags & IFF_BROADCAST) &&
1210 ifa->ifa_prefixlen < 31)
1211 ifa->ifa_broadcast = ifa->ifa_address |
1214 ifa->ifa_prefixlen = 32;
1215 ifa->ifa_mask = inet_make_mask(32);
1217 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1218 ret = inet_set_ifa(dev, ifa);
1221 case SIOCSIFBRDADDR: /* Set the broadcast address */
1223 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1224 inet_del_ifa(in_dev, ifap, 0);
1225 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1226 inet_insert_ifa(ifa);
1230 case SIOCSIFDSTADDR: /* Set the destination address */
1232 if (ifa->ifa_address == sin->sin_addr.s_addr)
1235 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1238 inet_del_ifa(in_dev, ifap, 0);
1239 ifa->ifa_address = sin->sin_addr.s_addr;
1240 inet_insert_ifa(ifa);
1243 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1246 * The mask we set must be legal.
1249 if (bad_mask(sin->sin_addr.s_addr, 0))
1252 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1253 __be32 old_mask = ifa->ifa_mask;
1254 inet_del_ifa(in_dev, ifap, 0);
1255 ifa->ifa_mask = sin->sin_addr.s_addr;
1256 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1258 /* See if current broadcast address matches
1259 * with current netmask, then recalculate
1260 * the broadcast address. Otherwise it's a
1261 * funny address, so don't touch it since
1262 * the user seems to know what (s)he's doing...
1264 if ((dev->flags & IFF_BROADCAST) &&
1265 (ifa->ifa_prefixlen < 31) &&
1266 (ifa->ifa_broadcast ==
1267 (ifa->ifa_local|~old_mask))) {
1268 ifa->ifa_broadcast = (ifa->ifa_local |
1269 ~sin->sin_addr.s_addr);
1271 inet_insert_ifa(ifa);
1281 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1283 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1284 const struct in_ifaddr *ifa;
1288 if (WARN_ON(size > sizeof(struct ifreq)))
1294 in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1301 memset(&ifr, 0, sizeof(struct ifreq));
1302 strcpy(ifr.ifr_name, ifa->ifa_label);
1304 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1305 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1308 if (copy_to_user(buf + done, &ifr, size)) {
1319 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1322 const struct in_ifaddr *ifa;
1324 in_dev_for_each_ifa_rcu(ifa, in_dev) {
1325 if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1327 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1328 ifa->ifa_scope <= scope)
1329 return ifa->ifa_local;
1335 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1337 const struct in_ifaddr *ifa;
1339 unsigned char localnet_scope = RT_SCOPE_HOST;
1340 struct in_device *in_dev;
1341 struct net *net = dev_net(dev);
1345 in_dev = __in_dev_get_rcu(dev);
1349 if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1350 localnet_scope = RT_SCOPE_LINK;
1352 in_dev_for_each_ifa_rcu(ifa, in_dev) {
1353 if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1355 if (min(ifa->ifa_scope, localnet_scope) > scope)
1357 if (!dst || inet_ifa_match(dst, ifa)) {
1358 addr = ifa->ifa_local;
1362 addr = ifa->ifa_local;
1368 master_idx = l3mdev_master_ifindex_rcu(dev);
1370 /* For VRFs, the VRF device takes the place of the loopback device,
1371 * with addresses on it being preferred. Note in such cases the
1372 * loopback device will be among the devices that fail the master_idx
1373 * equality check in the loop below.
1376 (dev = dev_get_by_index_rcu(net, master_idx)) &&
1377 (in_dev = __in_dev_get_rcu(dev))) {
1378 addr = in_dev_select_addr(in_dev, scope);
1383 /* Not loopback addresses on loopback should be preferred
1384 in this case. It is important that lo is the first interface
1387 for_each_netdev_rcu(net, dev) {
1388 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1391 in_dev = __in_dev_get_rcu(dev);
1395 addr = in_dev_select_addr(in_dev, scope);
1403 EXPORT_SYMBOL(inet_select_addr);
1405 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1406 __be32 local, int scope)
1408 unsigned char localnet_scope = RT_SCOPE_HOST;
1409 const struct in_ifaddr *ifa;
1413 if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1414 localnet_scope = RT_SCOPE_LINK;
1416 in_dev_for_each_ifa_rcu(ifa, in_dev) {
1417 unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1420 (local == ifa->ifa_local || !local) &&
1421 min_scope <= scope) {
1422 addr = ifa->ifa_local;
1427 same = (!local || inet_ifa_match(local, ifa)) &&
1428 (!dst || inet_ifa_match(dst, ifa));
1432 /* Is the selected addr into dst subnet? */
1433 if (inet_ifa_match(addr, ifa))
1435 /* No, then can we use new local src? */
1436 if (min_scope <= scope) {
1437 addr = ifa->ifa_local;
1440 /* search for large dst subnet for addr */
1446 return same ? addr : 0;
1450 * Confirm that local IP address exists using wildcards:
1451 * - net: netns to check, cannot be NULL
1452 * - in_dev: only on this interface, NULL=any interface
1453 * - dst: only in the same subnet as dst, 0=any dst
1454 * - local: address, 0=autoselect the local address
1455 * - scope: maximum allowed scope value for the local address
1457 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1458 __be32 dst, __be32 local, int scope)
1461 struct net_device *dev;
1464 return confirm_addr_indev(in_dev, dst, local, scope);
1467 for_each_netdev_rcu(net, dev) {
1468 in_dev = __in_dev_get_rcu(dev);
1470 addr = confirm_addr_indev(in_dev, dst, local, scope);
1479 EXPORT_SYMBOL(inet_confirm_addr);
1485 int register_inetaddr_notifier(struct notifier_block *nb)
1487 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1489 EXPORT_SYMBOL(register_inetaddr_notifier);
1491 int unregister_inetaddr_notifier(struct notifier_block *nb)
1493 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1495 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1497 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1499 return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1501 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1503 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1505 return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1508 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1510 /* Rename ifa_labels for a device name change. Make some effort to preserve
1511 * existing alias numbering and to create unique labels if possible.
1513 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1515 struct in_ifaddr *ifa;
1518 in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1519 char old[IFNAMSIZ], *dot;
1521 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1522 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1525 dot = strchr(old, ':');
1527 sprintf(old, ":%d", named);
1530 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1531 strcat(ifa->ifa_label, dot);
1533 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1535 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1539 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1540 struct in_device *in_dev)
1543 const struct in_ifaddr *ifa;
1545 in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1546 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1547 ifa->ifa_local, dev,
1548 ifa->ifa_local, NULL,
1549 dev->dev_addr, NULL);
1553 /* Called only under RTNL semaphore */
1555 static int inetdev_event(struct notifier_block *this, unsigned long event,
1558 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1559 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1564 if (event == NETDEV_REGISTER) {
1565 in_dev = inetdev_init(dev);
1567 return notifier_from_errno(PTR_ERR(in_dev));
1568 if (dev->flags & IFF_LOOPBACK) {
1569 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1570 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1572 } else if (event == NETDEV_CHANGEMTU) {
1573 /* Re-enabling IP */
1574 if (inetdev_valid_mtu(dev->mtu))
1575 in_dev = inetdev_init(dev);
1581 case NETDEV_REGISTER:
1582 pr_debug("%s: bug\n", __func__);
1583 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1586 if (!inetdev_valid_mtu(dev->mtu))
1588 if (dev->flags & IFF_LOOPBACK) {
1589 struct in_ifaddr *ifa = inet_alloc_ifa();
1592 INIT_HLIST_NODE(&ifa->hash);
1594 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1595 ifa->ifa_prefixlen = 8;
1596 ifa->ifa_mask = inet_make_mask(8);
1597 in_dev_hold(in_dev);
1598 ifa->ifa_dev = in_dev;
1599 ifa->ifa_scope = RT_SCOPE_HOST;
1600 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1601 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1602 INFINITY_LIFE_TIME);
1603 ipv4_devconf_setall(in_dev);
1604 neigh_parms_data_state_setall(in_dev->arp_parms);
1605 inet_insert_ifa(ifa);
1610 case NETDEV_CHANGEADDR:
1611 if (!IN_DEV_ARP_NOTIFY(in_dev))
1614 case NETDEV_NOTIFY_PEERS:
1615 /* Send gratuitous ARP to notify of link change */
1616 inetdev_send_gratuitous_arp(dev, in_dev);
1621 case NETDEV_PRE_TYPE_CHANGE:
1622 ip_mc_unmap(in_dev);
1624 case NETDEV_POST_TYPE_CHANGE:
1625 ip_mc_remap(in_dev);
1627 case NETDEV_CHANGEMTU:
1628 if (inetdev_valid_mtu(dev->mtu))
1630 /* disable IP when MTU is not enough */
1632 case NETDEV_UNREGISTER:
1633 inetdev_destroy(in_dev);
1635 case NETDEV_CHANGENAME:
1636 /* Do not notify about label change, this event is
1637 * not interesting to applications using netlink.
1639 inetdev_changename(dev, in_dev);
1641 devinet_sysctl_unregister(in_dev);
1642 devinet_sysctl_register(in_dev);
1649 static struct notifier_block ip_netdev_notifier = {
1650 .notifier_call = inetdev_event,
1653 static size_t inet_nlmsg_size(void)
1655 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1656 + nla_total_size(4) /* IFA_ADDRESS */
1657 + nla_total_size(4) /* IFA_LOCAL */
1658 + nla_total_size(4) /* IFA_BROADCAST */
1659 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1660 + nla_total_size(4) /* IFA_FLAGS */
1661 + nla_total_size(1) /* IFA_PROTO */
1662 + nla_total_size(4) /* IFA_RT_PRIORITY */
1663 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1666 static inline u32 cstamp_delta(unsigned long cstamp)
1668 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1671 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1672 unsigned long tstamp, u32 preferred, u32 valid)
1674 struct ifa_cacheinfo ci;
1676 ci.cstamp = cstamp_delta(cstamp);
1677 ci.tstamp = cstamp_delta(tstamp);
1678 ci.ifa_prefered = preferred;
1679 ci.ifa_valid = valid;
1681 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1684 static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
1685 struct inet_fill_args *args)
1687 struct ifaddrmsg *ifm;
1688 struct nlmsghdr *nlh;
1689 unsigned long tstamp;
1690 u32 preferred, valid;
1693 nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1698 ifm = nlmsg_data(nlh);
1699 ifm->ifa_family = AF_INET;
1700 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1702 flags = READ_ONCE(ifa->ifa_flags);
1703 /* Warning : ifm->ifa_flags is an __u8, it holds only 8 bits.
1704 * The 32bit value is given in IFA_FLAGS attribute.
1706 ifm->ifa_flags = (__u8)flags;
1708 ifm->ifa_scope = ifa->ifa_scope;
1709 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1711 if (args->netnsid >= 0 &&
1712 nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1713 goto nla_put_failure;
1715 tstamp = READ_ONCE(ifa->ifa_tstamp);
1716 if (!(flags & IFA_F_PERMANENT)) {
1717 preferred = READ_ONCE(ifa->ifa_preferred_lft);
1718 valid = READ_ONCE(ifa->ifa_valid_lft);
1719 if (preferred != INFINITY_LIFE_TIME) {
1720 long tval = (jiffies - tstamp) / HZ;
1722 if (preferred > tval)
1726 if (valid != INFINITY_LIFE_TIME) {
1734 preferred = INFINITY_LIFE_TIME;
1735 valid = INFINITY_LIFE_TIME;
1737 if ((ifa->ifa_address &&
1738 nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1740 nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1741 (ifa->ifa_broadcast &&
1742 nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1743 (ifa->ifa_label[0] &&
1744 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1746 nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1747 nla_put_u32(skb, IFA_FLAGS, flags) ||
1748 (ifa->ifa_rt_priority &&
1749 nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1750 put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
1752 goto nla_put_failure;
1754 nlmsg_end(skb, nlh);
1758 nlmsg_cancel(skb, nlh);
1762 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1763 struct inet_fill_args *fillargs,
1764 struct net **tgt_net, struct sock *sk,
1765 struct netlink_callback *cb)
1767 struct netlink_ext_ack *extack = cb->extack;
1768 struct nlattr *tb[IFA_MAX+1];
1769 struct ifaddrmsg *ifm;
1772 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1773 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1777 ifm = nlmsg_data(nlh);
1778 if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1779 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1783 fillargs->ifindex = ifm->ifa_index;
1784 if (fillargs->ifindex) {
1785 cb->answer_flags |= NLM_F_DUMP_FILTERED;
1786 fillargs->flags |= NLM_F_DUMP_FILTERED;
1789 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1790 ifa_ipv4_policy, extack);
1794 for (i = 0; i <= IFA_MAX; ++i) {
1798 if (i == IFA_TARGET_NETNSID) {
1801 fillargs->netnsid = nla_get_s32(tb[i]);
1803 net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1805 fillargs->netnsid = -1;
1806 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1807 return PTR_ERR(net);
1811 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1819 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1820 struct netlink_callback *cb, int *s_ip_idx,
1821 struct inet_fill_args *fillargs)
1823 struct in_ifaddr *ifa;
1827 in_dev_for_each_ifa_rcu(ifa, in_dev) {
1828 if (ip_idx < *s_ip_idx) {
1832 err = inet_fill_ifaddr(skb, ifa, fillargs);
1836 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1847 /* Combine dev_addr_genid and dev_base_seq to detect changes.
1849 static u32 inet_base_seq(const struct net *net)
1851 u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1852 READ_ONCE(net->dev_base_seq);
1854 /* Must not return 0 (see nl_dump_check_consistent()).
1855 * Chose a value far away from 0.
1862 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1864 const struct nlmsghdr *nlh = cb->nlh;
1865 struct inet_fill_args fillargs = {
1866 .portid = NETLINK_CB(cb->skb).portid,
1867 .seq = nlh->nlmsg_seq,
1868 .event = RTM_NEWADDR,
1869 .flags = NLM_F_MULTI,
1872 struct net *net = sock_net(skb->sk);
1873 struct net *tgt_net = net;
1875 unsigned long ifindex;
1877 } *ctx = (void *)cb->ctx;
1878 struct in_device *in_dev;
1879 struct net_device *dev;
1883 if (cb->strict_check) {
1884 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1889 if (fillargs.ifindex) {
1890 dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
1895 in_dev = __in_dev_get_rcu(dev);
1898 err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1904 cb->seq = inet_base_seq(tgt_net);
1906 for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
1907 in_dev = __in_dev_get_rcu(dev);
1910 err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1916 if (fillargs.netnsid >= 0)
1922 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1925 struct inet_fill_args fillargs = {
1927 .seq = nlh ? nlh->nlmsg_seq : 0,
1932 struct sk_buff *skb;
1936 net = dev_net(ifa->ifa_dev->dev);
1937 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1941 err = inet_fill_ifaddr(skb, ifa, &fillargs);
1943 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1944 WARN_ON(err == -EMSGSIZE);
1948 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1952 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1955 static size_t inet_get_link_af_size(const struct net_device *dev,
1956 u32 ext_filter_mask)
1958 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1963 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1966 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1967 u32 ext_filter_mask)
1969 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1976 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1980 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1981 ((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
1986 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1987 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1990 static int inet_validate_link_af(const struct net_device *dev,
1991 const struct nlattr *nla,
1992 struct netlink_ext_ack *extack)
1994 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1997 if (dev && !__in_dev_get_rtnl(dev))
1998 return -EAFNOSUPPORT;
2000 err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
2001 inet_af_policy, extack);
2005 if (tb[IFLA_INET_CONF]) {
2006 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
2007 int cfgid = nla_type(a);
2012 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2020 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2021 struct netlink_ext_ack *extack)
2023 struct in_device *in_dev = __in_dev_get_rtnl(dev);
2024 struct nlattr *a, *tb[IFLA_INET_MAX+1];
2028 return -EAFNOSUPPORT;
2030 if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2033 if (tb[IFLA_INET_CONF]) {
2034 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2035 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2041 static int inet_netconf_msgsize_devconf(int type)
2043 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2044 + nla_total_size(4); /* NETCONFA_IFINDEX */
2047 if (type == NETCONFA_ALL)
2050 if (all || type == NETCONFA_FORWARDING)
2051 size += nla_total_size(4);
2052 if (all || type == NETCONFA_RP_FILTER)
2053 size += nla_total_size(4);
2054 if (all || type == NETCONFA_MC_FORWARDING)
2055 size += nla_total_size(4);
2056 if (all || type == NETCONFA_BC_FORWARDING)
2057 size += nla_total_size(4);
2058 if (all || type == NETCONFA_PROXY_NEIGH)
2059 size += nla_total_size(4);
2060 if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2061 size += nla_total_size(4);
2066 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2067 const struct ipv4_devconf *devconf,
2068 u32 portid, u32 seq, int event,
2069 unsigned int flags, int type)
2071 struct nlmsghdr *nlh;
2072 struct netconfmsg *ncm;
2075 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2080 if (type == NETCONFA_ALL)
2083 ncm = nlmsg_data(nlh);
2084 ncm->ncm_family = AF_INET;
2086 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2087 goto nla_put_failure;
2092 if ((all || type == NETCONFA_FORWARDING) &&
2093 nla_put_s32(skb, NETCONFA_FORWARDING,
2094 IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
2095 goto nla_put_failure;
2096 if ((all || type == NETCONFA_RP_FILTER) &&
2097 nla_put_s32(skb, NETCONFA_RP_FILTER,
2098 IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
2099 goto nla_put_failure;
2100 if ((all || type == NETCONFA_MC_FORWARDING) &&
2101 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2102 IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
2103 goto nla_put_failure;
2104 if ((all || type == NETCONFA_BC_FORWARDING) &&
2105 nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2106 IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
2107 goto nla_put_failure;
2108 if ((all || type == NETCONFA_PROXY_NEIGH) &&
2109 nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2110 IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
2111 goto nla_put_failure;
2112 if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2113 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2114 IPV4_DEVCONF_RO(*devconf,
2115 IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2116 goto nla_put_failure;
2119 nlmsg_end(skb, nlh);
2123 nlmsg_cancel(skb, nlh);
2127 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2128 int ifindex, struct ipv4_devconf *devconf)
2130 struct sk_buff *skb;
2133 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2137 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2140 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2141 WARN_ON(err == -EMSGSIZE);
2145 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2149 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2152 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2153 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
2154 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
2155 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
2156 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
2157 [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) },
2160 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2161 const struct nlmsghdr *nlh,
2163 struct netlink_ext_ack *extack)
2167 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2168 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2172 if (!netlink_strict_get_check(skb))
2173 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2175 devconf_ipv4_policy, extack);
2177 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2179 devconf_ipv4_policy, extack);
2183 for (i = 0; i <= NETCONFA_MAX; i++) {
2188 case NETCONFA_IFINDEX:
2191 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2199 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2200 struct nlmsghdr *nlh,
2201 struct netlink_ext_ack *extack)
2203 struct net *net = sock_net(in_skb->sk);
2204 struct nlattr *tb[NETCONFA_MAX + 1];
2205 const struct ipv4_devconf *devconf;
2206 struct in_device *in_dev = NULL;
2207 struct net_device *dev = NULL;
2208 struct sk_buff *skb;
2212 err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2216 if (!tb[NETCONFA_IFINDEX])
2219 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2221 case NETCONFA_IFINDEX_ALL:
2222 devconf = net->ipv4.devconf_all;
2224 case NETCONFA_IFINDEX_DEFAULT:
2225 devconf = net->ipv4.devconf_dflt;
2229 dev = dev_get_by_index(net, ifindex);
2231 in_dev = in_dev_get(dev);
2234 devconf = &in_dev->cnf;
2239 skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2243 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2244 NETLINK_CB(in_skb).portid,
2245 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2248 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2249 WARN_ON(err == -EMSGSIZE);
2253 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2261 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2262 struct netlink_callback *cb)
2264 const struct nlmsghdr *nlh = cb->nlh;
2265 struct net *net = sock_net(skb->sk);
2267 unsigned long ifindex;
2268 unsigned int all_default;
2269 } *ctx = (void *)cb->ctx;
2270 const struct in_device *in_dev;
2271 struct net_device *dev;
2274 if (cb->strict_check) {
2275 struct netlink_ext_ack *extack = cb->extack;
2276 struct netconfmsg *ncm;
2278 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2279 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2283 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2284 NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2290 for_each_netdev_dump(net, dev, ctx->ifindex) {
2291 in_dev = __in_dev_get_rcu(dev);
2294 err = inet_netconf_fill_devconf(skb, dev->ifindex,
2296 NETLINK_CB(cb->skb).portid,
2298 RTM_NEWNETCONF, NLM_F_MULTI,
2303 if (ctx->all_default == 0) {
2304 err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2305 net->ipv4.devconf_all,
2306 NETLINK_CB(cb->skb).portid,
2308 RTM_NEWNETCONF, NLM_F_MULTI,
2314 if (ctx->all_default == 1) {
2315 err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2316 net->ipv4.devconf_dflt,
2317 NETLINK_CB(cb->skb).portid,
2319 RTM_NEWNETCONF, NLM_F_MULTI,
2330 #ifdef CONFIG_SYSCTL
2332 static void devinet_copy_dflt_conf(struct net *net, int i)
2334 struct net_device *dev;
2337 for_each_netdev_rcu(net, dev) {
2338 struct in_device *in_dev;
2340 in_dev = __in_dev_get_rcu(dev);
2341 if (in_dev && !test_bit(i, in_dev->cnf.state))
2342 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2347 /* called with RTNL locked */
2348 static void inet_forward_change(struct net *net)
2350 struct net_device *dev;
2351 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2353 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2354 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2355 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2356 NETCONFA_FORWARDING,
2357 NETCONFA_IFINDEX_ALL,
2358 net->ipv4.devconf_all);
2359 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2360 NETCONFA_FORWARDING,
2361 NETCONFA_IFINDEX_DEFAULT,
2362 net->ipv4.devconf_dflt);
2364 for_each_netdev(net, dev) {
2365 struct in_device *in_dev;
2368 dev_disable_lro(dev);
2370 in_dev = __in_dev_get_rtnl(dev);
2372 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2373 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2374 NETCONFA_FORWARDING,
2375 dev->ifindex, &in_dev->cnf);
2380 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2382 if (cnf == net->ipv4.devconf_dflt)
2383 return NETCONFA_IFINDEX_DEFAULT;
2384 else if (cnf == net->ipv4.devconf_all)
2385 return NETCONFA_IFINDEX_ALL;
2387 struct in_device *idev
2388 = container_of(cnf, struct in_device, cnf);
2389 return idev->dev->ifindex;
2393 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2394 void *buffer, size_t *lenp, loff_t *ppos)
2396 int old_value = *(int *)ctl->data;
2397 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2398 int new_value = *(int *)ctl->data;
2401 struct ipv4_devconf *cnf = ctl->extra1;
2402 struct net *net = ctl->extra2;
2403 int i = (int *)ctl->data - cnf->data;
2406 set_bit(i, cnf->state);
2408 if (cnf == net->ipv4.devconf_dflt)
2409 devinet_copy_dflt_conf(net, i);
2410 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2411 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2412 if ((new_value == 0) && (old_value != 0))
2413 rt_cache_flush(net);
2415 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2416 new_value != old_value)
2417 rt_cache_flush(net);
2419 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2420 new_value != old_value) {
2421 ifindex = devinet_conf_ifindex(net, cnf);
2422 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2426 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2427 new_value != old_value) {
2428 ifindex = devinet_conf_ifindex(net, cnf);
2429 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2430 NETCONFA_PROXY_NEIGH,
2433 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2434 new_value != old_value) {
2435 ifindex = devinet_conf_ifindex(net, cnf);
2436 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2437 NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2445 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2446 void *buffer, size_t *lenp, loff_t *ppos)
2448 int *valp = ctl->data;
2451 struct net *net = ctl->extra2;
2454 if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2457 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2459 if (write && *valp != val) {
2460 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2461 if (!rtnl_trylock()) {
2462 /* Restore the original values before restarting */
2465 return restart_syscall();
2467 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2468 inet_forward_change(net);
2470 struct ipv4_devconf *cnf = ctl->extra1;
2471 struct in_device *idev =
2472 container_of(cnf, struct in_device, cnf);
2474 dev_disable_lro(idev->dev);
2475 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2476 NETCONFA_FORWARDING,
2481 rt_cache_flush(net);
2483 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2484 NETCONFA_FORWARDING,
2485 NETCONFA_IFINDEX_DEFAULT,
2486 net->ipv4.devconf_dflt);
2492 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2493 void *buffer, size_t *lenp, loff_t *ppos)
2495 int *valp = ctl->data;
2497 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2498 struct net *net = ctl->extra2;
2500 if (write && *valp != val)
2501 rt_cache_flush(net);
2506 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2509 .data = ipv4_devconf.data + \
2510 IPV4_DEVCONF_ ## attr - 1, \
2511 .maxlen = sizeof(int), \
2513 .proc_handler = proc, \
2514 .extra1 = &ipv4_devconf, \
2517 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2518 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2520 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2521 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2523 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2524 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2526 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2527 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2529 static struct devinet_sysctl_table {
2530 struct ctl_table_header *sysctl_header;
2531 struct ctl_table devinet_vars[IPV4_DEVCONF_MAX];
2532 } devinet_sysctl = {
2534 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2535 devinet_sysctl_forward),
2536 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2537 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2539 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2540 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2541 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2542 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2543 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2544 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2545 "accept_source_route"),
2546 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2547 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2548 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2549 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2550 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2551 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2552 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2553 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2554 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2555 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2556 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2557 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2558 DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2559 "arp_evict_nocarrier"),
2560 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2561 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2562 "force_igmp_version"),
2563 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2564 "igmpv2_unsolicited_report_interval"),
2565 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2566 "igmpv3_unsolicited_report_interval"),
2567 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2568 "ignore_routes_with_linkdown"),
2569 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2570 "drop_gratuitous_arp"),
2572 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2573 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2574 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2575 "promote_secondaries"),
2576 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2578 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2579 "drop_unicast_in_l2_multicast"),
2583 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2584 int ifindex, struct ipv4_devconf *p)
2587 struct devinet_sysctl_table *t;
2588 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2590 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2594 for (i = 0; i < ARRAY_SIZE(t->devinet_vars); i++) {
2595 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2596 t->devinet_vars[i].extra1 = p;
2597 t->devinet_vars[i].extra2 = net;
2600 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2602 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2603 if (!t->sysctl_header)
2608 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2618 static void __devinet_sysctl_unregister(struct net *net,
2619 struct ipv4_devconf *cnf, int ifindex)
2621 struct devinet_sysctl_table *t = cnf->sysctl;
2625 unregister_net_sysctl_table(t->sysctl_header);
2629 inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2632 static int devinet_sysctl_register(struct in_device *idev)
2636 if (!sysctl_dev_name_is_allowed(idev->dev->name))
2639 err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2642 err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2643 idev->dev->ifindex, &idev->cnf);
2645 neigh_sysctl_unregister(idev->arp_parms);
2649 static void devinet_sysctl_unregister(struct in_device *idev)
2651 struct net *net = dev_net(idev->dev);
2653 __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2654 neigh_sysctl_unregister(idev->arp_parms);
2657 static struct ctl_table ctl_forward_entry[] = {
2659 .procname = "ip_forward",
2660 .data = &ipv4_devconf.data[
2661 IPV4_DEVCONF_FORWARDING - 1],
2662 .maxlen = sizeof(int),
2664 .proc_handler = devinet_sysctl_forward,
2665 .extra1 = &ipv4_devconf,
2666 .extra2 = &init_net,
2671 static __net_init int devinet_init_net(struct net *net)
2674 struct ipv4_devconf *all, *dflt;
2675 #ifdef CONFIG_SYSCTL
2676 struct ctl_table *tbl;
2677 struct ctl_table_header *forw_hdr;
2681 all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2685 dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2687 goto err_alloc_dflt;
2689 #ifdef CONFIG_SYSCTL
2690 tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2694 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2695 tbl[0].extra1 = all;
2696 tbl[0].extra2 = net;
2699 if (!net_eq(net, &init_net)) {
2700 switch (net_inherit_devconf()) {
2702 /* copy from the current netns */
2703 memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2704 sizeof(ipv4_devconf));
2706 current->nsproxy->net_ns->ipv4.devconf_dflt,
2707 sizeof(ipv4_devconf_dflt));
2711 /* copy from init_net */
2712 memcpy(all, init_net.ipv4.devconf_all,
2713 sizeof(ipv4_devconf));
2714 memcpy(dflt, init_net.ipv4.devconf_dflt,
2715 sizeof(ipv4_devconf_dflt));
2718 /* use compiled values */
2723 #ifdef CONFIG_SYSCTL
2724 err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2728 err = __devinet_sysctl_register(net, "default",
2729 NETCONFA_IFINDEX_DEFAULT, dflt);
2734 forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2735 ARRAY_SIZE(ctl_forward_entry));
2738 net->ipv4.forw_hdr = forw_hdr;
2741 net->ipv4.devconf_all = all;
2742 net->ipv4.devconf_dflt = dflt;
2745 #ifdef CONFIG_SYSCTL
2747 __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2749 __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2761 static __net_exit void devinet_exit_net(struct net *net)
2763 #ifdef CONFIG_SYSCTL
2764 const struct ctl_table *tbl;
2766 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2767 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2768 __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2769 NETCONFA_IFINDEX_DEFAULT);
2770 __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2771 NETCONFA_IFINDEX_ALL);
2774 kfree(net->ipv4.devconf_dflt);
2775 kfree(net->ipv4.devconf_all);
2778 static __net_initdata struct pernet_operations devinet_ops = {
2779 .init = devinet_init_net,
2780 .exit = devinet_exit_net,
2783 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2785 .fill_link_af = inet_fill_link_af,
2786 .get_link_af_size = inet_get_link_af_size,
2787 .validate_link_af = inet_validate_link_af,
2788 .set_link_af = inet_set_link_af,
2791 void __init devinet_init(void)
2795 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2796 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2798 register_pernet_subsys(&devinet_ops);
2799 register_netdevice_notifier(&ip_netdev_notifier);
2801 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2803 rtnl_af_register(&inet_af_ops);
2805 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2806 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2807 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
2808 RTNL_FLAG_DUMP_UNLOCKED);
2809 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2810 inet_netconf_dump_devconf,
2811 RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);