treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 152
[linux-block.git] / net / ipv4 / devinet.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      NET3    IP device support routines.
4  *
5  *      Derived from the IP parts of dev.c 1.0.19
6  *              Authors:        Ross Biro
7  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *      Additional Authors:
11  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *      Changes:
15  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
16  *                                      lists.
17  *              Cyrus Durgin:           updated for kmod
18  *              Matthias Andree:        in devinet_ioctl, compare label and
19  *                                      address (4.4BSD alias style support),
20  *                                      fall back to comparing just the label
21  *                                      if no match found.
22  */
23
24
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64
65 static struct ipv4_devconf ipv4_devconf = {
66         .data = {
67                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
68                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
69                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
70                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
71                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
72                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
73         },
74 };
75
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77         .data = {
78                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
79                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
80                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
81                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
82                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
84                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
85         },
86 };
87
88 #define IPV4_DEVCONF_DFLT(net, attr) \
89         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
90
91 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
92         [IFA_LOCAL]             = { .type = NLA_U32 },
93         [IFA_ADDRESS]           = { .type = NLA_U32 },
94         [IFA_BROADCAST]         = { .type = NLA_U32 },
95         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
96         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
97         [IFA_FLAGS]             = { .type = NLA_U32 },
98         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
99         [IFA_TARGET_NETNSID]    = { .type = NLA_S32 },
100 };
101
102 struct inet_fill_args {
103         u32 portid;
104         u32 seq;
105         int event;
106         unsigned int flags;
107         int netnsid;
108         int ifindex;
109 };
110
111 #define IN4_ADDR_HSIZE_SHIFT    8
112 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
113
114 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
115
116 static u32 inet_addr_hash(const struct net *net, __be32 addr)
117 {
118         u32 val = (__force u32) addr ^ net_hash_mix(net);
119
120         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
121 }
122
123 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
124 {
125         u32 hash = inet_addr_hash(net, ifa->ifa_local);
126
127         ASSERT_RTNL();
128         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
129 }
130
131 static void inet_hash_remove(struct in_ifaddr *ifa)
132 {
133         ASSERT_RTNL();
134         hlist_del_init_rcu(&ifa->hash);
135 }
136
137 /**
138  * __ip_dev_find - find the first device with a given source address.
139  * @net: the net namespace
140  * @addr: the source address
141  * @devref: if true, take a reference on the found device
142  *
143  * If a caller uses devref=false, it should be protected by RCU, or RTNL
144  */
145 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
146 {
147         struct net_device *result = NULL;
148         struct in_ifaddr *ifa;
149
150         rcu_read_lock();
151         ifa = inet_lookup_ifaddr_rcu(net, addr);
152         if (!ifa) {
153                 struct flowi4 fl4 = { .daddr = addr };
154                 struct fib_result res = { 0 };
155                 struct fib_table *local;
156
157                 /* Fallback to FIB local table so that communication
158                  * over loopback subnets work.
159                  */
160                 local = fib_get_table(net, RT_TABLE_LOCAL);
161                 if (local &&
162                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
163                     res.type == RTN_LOCAL)
164                         result = FIB_RES_DEV(res);
165         } else {
166                 result = ifa->ifa_dev->dev;
167         }
168         if (result && devref)
169                 dev_hold(result);
170         rcu_read_unlock();
171         return result;
172 }
173 EXPORT_SYMBOL(__ip_dev_find);
174
175 /* called under RCU lock */
176 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
177 {
178         u32 hash = inet_addr_hash(net, addr);
179         struct in_ifaddr *ifa;
180
181         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
182                 if (ifa->ifa_local == addr &&
183                     net_eq(dev_net(ifa->ifa_dev->dev), net))
184                         return ifa;
185
186         return NULL;
187 }
188
189 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
190
191 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
192 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
193 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
194                          int destroy);
195 #ifdef CONFIG_SYSCTL
196 static int devinet_sysctl_register(struct in_device *idev);
197 static void devinet_sysctl_unregister(struct in_device *idev);
198 #else
199 static int devinet_sysctl_register(struct in_device *idev)
200 {
201         return 0;
202 }
203 static void devinet_sysctl_unregister(struct in_device *idev)
204 {
205 }
206 #endif
207
208 /* Locks all the inet devices. */
209
210 static struct in_ifaddr *inet_alloc_ifa(void)
211 {
212         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
213 }
214
215 static void inet_rcu_free_ifa(struct rcu_head *head)
216 {
217         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
218         if (ifa->ifa_dev)
219                 in_dev_put(ifa->ifa_dev);
220         kfree(ifa);
221 }
222
223 static void inet_free_ifa(struct in_ifaddr *ifa)
224 {
225         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
226 }
227
228 void in_dev_finish_destroy(struct in_device *idev)
229 {
230         struct net_device *dev = idev->dev;
231
232         WARN_ON(idev->ifa_list);
233         WARN_ON(idev->mc_list);
234         kfree(rcu_dereference_protected(idev->mc_hash, 1));
235 #ifdef NET_REFCNT_DEBUG
236         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
237 #endif
238         dev_put(dev);
239         if (!idev->dead)
240                 pr_err("Freeing alive in_device %p\n", idev);
241         else
242                 kfree(idev);
243 }
244 EXPORT_SYMBOL(in_dev_finish_destroy);
245
246 static struct in_device *inetdev_init(struct net_device *dev)
247 {
248         struct in_device *in_dev;
249         int err = -ENOMEM;
250
251         ASSERT_RTNL();
252
253         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
254         if (!in_dev)
255                 goto out;
256         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
257                         sizeof(in_dev->cnf));
258         in_dev->cnf.sysctl = NULL;
259         in_dev->dev = dev;
260         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
261         if (!in_dev->arp_parms)
262                 goto out_kfree;
263         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
264                 dev_disable_lro(dev);
265         /* Reference in_dev->dev */
266         dev_hold(dev);
267         /* Account for reference dev->ip_ptr (below) */
268         refcount_set(&in_dev->refcnt, 1);
269
270         err = devinet_sysctl_register(in_dev);
271         if (err) {
272                 in_dev->dead = 1;
273                 in_dev_put(in_dev);
274                 in_dev = NULL;
275                 goto out;
276         }
277         ip_mc_init_dev(in_dev);
278         if (dev->flags & IFF_UP)
279                 ip_mc_up(in_dev);
280
281         /* we can receive as soon as ip_ptr is set -- do this last */
282         rcu_assign_pointer(dev->ip_ptr, in_dev);
283 out:
284         return in_dev ?: ERR_PTR(err);
285 out_kfree:
286         kfree(in_dev);
287         in_dev = NULL;
288         goto out;
289 }
290
291 static void in_dev_rcu_put(struct rcu_head *head)
292 {
293         struct in_device *idev = container_of(head, struct in_device, rcu_head);
294         in_dev_put(idev);
295 }
296
297 static void inetdev_destroy(struct in_device *in_dev)
298 {
299         struct in_ifaddr *ifa;
300         struct net_device *dev;
301
302         ASSERT_RTNL();
303
304         dev = in_dev->dev;
305
306         in_dev->dead = 1;
307
308         ip_mc_destroy_dev(in_dev);
309
310         while ((ifa = in_dev->ifa_list) != NULL) {
311                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
312                 inet_free_ifa(ifa);
313         }
314
315         RCU_INIT_POINTER(dev->ip_ptr, NULL);
316
317         devinet_sysctl_unregister(in_dev);
318         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
319         arp_ifdown(dev);
320
321         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
322 }
323
324 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
325 {
326         rcu_read_lock();
327         for_primary_ifa(in_dev) {
328                 if (inet_ifa_match(a, ifa)) {
329                         if (!b || inet_ifa_match(b, ifa)) {
330                                 rcu_read_unlock();
331                                 return 1;
332                         }
333                 }
334         } endfor_ifa(in_dev);
335         rcu_read_unlock();
336         return 0;
337 }
338
339 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
340                          int destroy, struct nlmsghdr *nlh, u32 portid)
341 {
342         struct in_ifaddr *promote = NULL;
343         struct in_ifaddr *ifa, *ifa1 = *ifap;
344         struct in_ifaddr *last_prim = in_dev->ifa_list;
345         struct in_ifaddr *prev_prom = NULL;
346         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
347
348         ASSERT_RTNL();
349
350         if (in_dev->dead)
351                 goto no_promotions;
352
353         /* 1. Deleting primary ifaddr forces deletion all secondaries
354          * unless alias promotion is set
355          **/
356
357         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
358                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
359
360                 while ((ifa = *ifap1) != NULL) {
361                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
362                             ifa1->ifa_scope <= ifa->ifa_scope)
363                                 last_prim = ifa;
364
365                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
366                             ifa1->ifa_mask != ifa->ifa_mask ||
367                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
368                                 ifap1 = &ifa->ifa_next;
369                                 prev_prom = ifa;
370                                 continue;
371                         }
372
373                         if (!do_promote) {
374                                 inet_hash_remove(ifa);
375                                 *ifap1 = ifa->ifa_next;
376
377                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
378                                 blocking_notifier_call_chain(&inetaddr_chain,
379                                                 NETDEV_DOWN, ifa);
380                                 inet_free_ifa(ifa);
381                         } else {
382                                 promote = ifa;
383                                 break;
384                         }
385                 }
386         }
387
388         /* On promotion all secondaries from subnet are changing
389          * the primary IP, we must remove all their routes silently
390          * and later to add them back with new prefsrc. Do this
391          * while all addresses are on the device list.
392          */
393         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
394                 if (ifa1->ifa_mask == ifa->ifa_mask &&
395                     inet_ifa_match(ifa1->ifa_address, ifa))
396                         fib_del_ifaddr(ifa, ifa1);
397         }
398
399 no_promotions:
400         /* 2. Unlink it */
401
402         *ifap = ifa1->ifa_next;
403         inet_hash_remove(ifa1);
404
405         /* 3. Announce address deletion */
406
407         /* Send message first, then call notifier.
408            At first sight, FIB update triggered by notifier
409            will refer to already deleted ifaddr, that could confuse
410            netlink listeners. It is not true: look, gated sees
411            that route deleted and if it still thinks that ifaddr
412            is valid, it will try to restore deleted routes... Grr.
413            So that, this order is correct.
414          */
415         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
416         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
417
418         if (promote) {
419                 struct in_ifaddr *next_sec = promote->ifa_next;
420
421                 if (prev_prom) {
422                         prev_prom->ifa_next = promote->ifa_next;
423                         promote->ifa_next = last_prim->ifa_next;
424                         last_prim->ifa_next = promote;
425                 }
426
427                 promote->ifa_flags &= ~IFA_F_SECONDARY;
428                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
429                 blocking_notifier_call_chain(&inetaddr_chain,
430                                 NETDEV_UP, promote);
431                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
432                         if (ifa1->ifa_mask != ifa->ifa_mask ||
433                             !inet_ifa_match(ifa1->ifa_address, ifa))
434                                         continue;
435                         fib_add_ifaddr(ifa);
436                 }
437
438         }
439         if (destroy)
440                 inet_free_ifa(ifa1);
441 }
442
443 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
444                          int destroy)
445 {
446         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
447 }
448
449 static void check_lifetime(struct work_struct *work);
450
451 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
452
453 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
454                              u32 portid, struct netlink_ext_ack *extack)
455 {
456         struct in_device *in_dev = ifa->ifa_dev;
457         struct in_ifaddr *ifa1, **ifap, **last_primary;
458         struct in_validator_info ivi;
459         int ret;
460
461         ASSERT_RTNL();
462
463         if (!ifa->ifa_local) {
464                 inet_free_ifa(ifa);
465                 return 0;
466         }
467
468         ifa->ifa_flags &= ~IFA_F_SECONDARY;
469         last_primary = &in_dev->ifa_list;
470
471         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
472              ifap = &ifa1->ifa_next) {
473                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
474                     ifa->ifa_scope <= ifa1->ifa_scope)
475                         last_primary = &ifa1->ifa_next;
476                 if (ifa1->ifa_mask == ifa->ifa_mask &&
477                     inet_ifa_match(ifa1->ifa_address, ifa)) {
478                         if (ifa1->ifa_local == ifa->ifa_local) {
479                                 inet_free_ifa(ifa);
480                                 return -EEXIST;
481                         }
482                         if (ifa1->ifa_scope != ifa->ifa_scope) {
483                                 inet_free_ifa(ifa);
484                                 return -EINVAL;
485                         }
486                         ifa->ifa_flags |= IFA_F_SECONDARY;
487                 }
488         }
489
490         /* Allow any devices that wish to register ifaddr validtors to weigh
491          * in now, before changes are committed.  The rntl lock is serializing
492          * access here, so the state should not change between a validator call
493          * and a final notify on commit.  This isn't invoked on promotion under
494          * the assumption that validators are checking the address itself, and
495          * not the flags.
496          */
497         ivi.ivi_addr = ifa->ifa_address;
498         ivi.ivi_dev = ifa->ifa_dev;
499         ivi.extack = extack;
500         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
501                                            NETDEV_UP, &ivi);
502         ret = notifier_to_errno(ret);
503         if (ret) {
504                 inet_free_ifa(ifa);
505                 return ret;
506         }
507
508         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
509                 prandom_seed((__force u32) ifa->ifa_local);
510                 ifap = last_primary;
511         }
512
513         ifa->ifa_next = *ifap;
514         *ifap = ifa;
515
516         inet_hash_insert(dev_net(in_dev->dev), ifa);
517
518         cancel_delayed_work(&check_lifetime_work);
519         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
520
521         /* Send message first, then call notifier.
522            Notifier will trigger FIB update, so that
523            listeners of netlink will know about new ifaddr */
524         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
525         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
526
527         return 0;
528 }
529
530 static int inet_insert_ifa(struct in_ifaddr *ifa)
531 {
532         return __inet_insert_ifa(ifa, NULL, 0, NULL);
533 }
534
535 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
536 {
537         struct in_device *in_dev = __in_dev_get_rtnl(dev);
538
539         ASSERT_RTNL();
540
541         if (!in_dev) {
542                 inet_free_ifa(ifa);
543                 return -ENOBUFS;
544         }
545         ipv4_devconf_setall(in_dev);
546         neigh_parms_data_state_setall(in_dev->arp_parms);
547         if (ifa->ifa_dev != in_dev) {
548                 WARN_ON(ifa->ifa_dev);
549                 in_dev_hold(in_dev);
550                 ifa->ifa_dev = in_dev;
551         }
552         if (ipv4_is_loopback(ifa->ifa_local))
553                 ifa->ifa_scope = RT_SCOPE_HOST;
554         return inet_insert_ifa(ifa);
555 }
556
557 /* Caller must hold RCU or RTNL :
558  * We dont take a reference on found in_device
559  */
560 struct in_device *inetdev_by_index(struct net *net, int ifindex)
561 {
562         struct net_device *dev;
563         struct in_device *in_dev = NULL;
564
565         rcu_read_lock();
566         dev = dev_get_by_index_rcu(net, ifindex);
567         if (dev)
568                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
569         rcu_read_unlock();
570         return in_dev;
571 }
572 EXPORT_SYMBOL(inetdev_by_index);
573
574 /* Called only from RTNL semaphored context. No locks. */
575
576 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
577                                     __be32 mask)
578 {
579         ASSERT_RTNL();
580
581         for_primary_ifa(in_dev) {
582                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
583                         return ifa;
584         } endfor_ifa(in_dev);
585         return NULL;
586 }
587
588 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
589 {
590         struct ip_mreqn mreq = {
591                 .imr_multiaddr.s_addr = ifa->ifa_address,
592                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
593         };
594         int ret;
595
596         ASSERT_RTNL();
597
598         lock_sock(sk);
599         if (join)
600                 ret = ip_mc_join_group(sk, &mreq);
601         else
602                 ret = ip_mc_leave_group(sk, &mreq);
603         release_sock(sk);
604
605         return ret;
606 }
607
608 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
609                             struct netlink_ext_ack *extack)
610 {
611         struct net *net = sock_net(skb->sk);
612         struct nlattr *tb[IFA_MAX+1];
613         struct in_device *in_dev;
614         struct ifaddrmsg *ifm;
615         struct in_ifaddr *ifa, **ifap;
616         int err = -EINVAL;
617
618         ASSERT_RTNL();
619
620         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
621                                      ifa_ipv4_policy, extack);
622         if (err < 0)
623                 goto errout;
624
625         ifm = nlmsg_data(nlh);
626         in_dev = inetdev_by_index(net, ifm->ifa_index);
627         if (!in_dev) {
628                 err = -ENODEV;
629                 goto errout;
630         }
631
632         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
633              ifap = &ifa->ifa_next) {
634                 if (tb[IFA_LOCAL] &&
635                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
636                         continue;
637
638                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
639                         continue;
640
641                 if (tb[IFA_ADDRESS] &&
642                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
643                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
644                         continue;
645
646                 if (ipv4_is_multicast(ifa->ifa_address))
647                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
648                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
649                 return 0;
650         }
651
652         err = -EADDRNOTAVAIL;
653 errout:
654         return err;
655 }
656
657 #define INFINITY_LIFE_TIME      0xFFFFFFFF
658
659 static void check_lifetime(struct work_struct *work)
660 {
661         unsigned long now, next, next_sec, next_sched;
662         struct in_ifaddr *ifa;
663         struct hlist_node *n;
664         int i;
665
666         now = jiffies;
667         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
668
669         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
670                 bool change_needed = false;
671
672                 rcu_read_lock();
673                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
674                         unsigned long age;
675
676                         if (ifa->ifa_flags & IFA_F_PERMANENT)
677                                 continue;
678
679                         /* We try to batch several events at once. */
680                         age = (now - ifa->ifa_tstamp +
681                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
682
683                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
684                             age >= ifa->ifa_valid_lft) {
685                                 change_needed = true;
686                         } else if (ifa->ifa_preferred_lft ==
687                                    INFINITY_LIFE_TIME) {
688                                 continue;
689                         } else if (age >= ifa->ifa_preferred_lft) {
690                                 if (time_before(ifa->ifa_tstamp +
691                                                 ifa->ifa_valid_lft * HZ, next))
692                                         next = ifa->ifa_tstamp +
693                                                ifa->ifa_valid_lft * HZ;
694
695                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
696                                         change_needed = true;
697                         } else if (time_before(ifa->ifa_tstamp +
698                                                ifa->ifa_preferred_lft * HZ,
699                                                next)) {
700                                 next = ifa->ifa_tstamp +
701                                        ifa->ifa_preferred_lft * HZ;
702                         }
703                 }
704                 rcu_read_unlock();
705                 if (!change_needed)
706                         continue;
707                 rtnl_lock();
708                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
709                         unsigned long age;
710
711                         if (ifa->ifa_flags & IFA_F_PERMANENT)
712                                 continue;
713
714                         /* We try to batch several events at once. */
715                         age = (now - ifa->ifa_tstamp +
716                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
717
718                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
719                             age >= ifa->ifa_valid_lft) {
720                                 struct in_ifaddr **ifap;
721
722                                 for (ifap = &ifa->ifa_dev->ifa_list;
723                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
724                                         if (*ifap == ifa) {
725                                                 inet_del_ifa(ifa->ifa_dev,
726                                                              ifap, 1);
727                                                 break;
728                                         }
729                                 }
730                         } else if (ifa->ifa_preferred_lft !=
731                                    INFINITY_LIFE_TIME &&
732                                    age >= ifa->ifa_preferred_lft &&
733                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
734                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
735                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
736                         }
737                 }
738                 rtnl_unlock();
739         }
740
741         next_sec = round_jiffies_up(next);
742         next_sched = next;
743
744         /* If rounded timeout is accurate enough, accept it. */
745         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
746                 next_sched = next_sec;
747
748         now = jiffies;
749         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
750         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
751                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
752
753         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
754                         next_sched - now);
755 }
756
757 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
758                              __u32 prefered_lft)
759 {
760         unsigned long timeout;
761
762         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
763
764         timeout = addrconf_timeout_fixup(valid_lft, HZ);
765         if (addrconf_finite_timeout(timeout))
766                 ifa->ifa_valid_lft = timeout;
767         else
768                 ifa->ifa_flags |= IFA_F_PERMANENT;
769
770         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
771         if (addrconf_finite_timeout(timeout)) {
772                 if (timeout == 0)
773                         ifa->ifa_flags |= IFA_F_DEPRECATED;
774                 ifa->ifa_preferred_lft = timeout;
775         }
776         ifa->ifa_tstamp = jiffies;
777         if (!ifa->ifa_cstamp)
778                 ifa->ifa_cstamp = ifa->ifa_tstamp;
779 }
780
781 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
782                                        __u32 *pvalid_lft, __u32 *pprefered_lft,
783                                        struct netlink_ext_ack *extack)
784 {
785         struct nlattr *tb[IFA_MAX+1];
786         struct in_ifaddr *ifa;
787         struct ifaddrmsg *ifm;
788         struct net_device *dev;
789         struct in_device *in_dev;
790         int err;
791
792         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
793                                      ifa_ipv4_policy, extack);
794         if (err < 0)
795                 goto errout;
796
797         ifm = nlmsg_data(nlh);
798         err = -EINVAL;
799         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
800                 goto errout;
801
802         dev = __dev_get_by_index(net, ifm->ifa_index);
803         err = -ENODEV;
804         if (!dev)
805                 goto errout;
806
807         in_dev = __in_dev_get_rtnl(dev);
808         err = -ENOBUFS;
809         if (!in_dev)
810                 goto errout;
811
812         ifa = inet_alloc_ifa();
813         if (!ifa)
814                 /*
815                  * A potential indev allocation can be left alive, it stays
816                  * assigned to its device and is destroy with it.
817                  */
818                 goto errout;
819
820         ipv4_devconf_setall(in_dev);
821         neigh_parms_data_state_setall(in_dev->arp_parms);
822         in_dev_hold(in_dev);
823
824         if (!tb[IFA_ADDRESS])
825                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
826
827         INIT_HLIST_NODE(&ifa->hash);
828         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
829         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
830         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
831                                          ifm->ifa_flags;
832         ifa->ifa_scope = ifm->ifa_scope;
833         ifa->ifa_dev = in_dev;
834
835         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
836         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
837
838         if (tb[IFA_BROADCAST])
839                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
840
841         if (tb[IFA_LABEL])
842                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
843         else
844                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
845
846         if (tb[IFA_RT_PRIORITY])
847                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
848
849         if (tb[IFA_CACHEINFO]) {
850                 struct ifa_cacheinfo *ci;
851
852                 ci = nla_data(tb[IFA_CACHEINFO]);
853                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
854                         err = -EINVAL;
855                         goto errout_free;
856                 }
857                 *pvalid_lft = ci->ifa_valid;
858                 *pprefered_lft = ci->ifa_prefered;
859         }
860
861         return ifa;
862
863 errout_free:
864         inet_free_ifa(ifa);
865 errout:
866         return ERR_PTR(err);
867 }
868
869 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
870 {
871         struct in_device *in_dev = ifa->ifa_dev;
872         struct in_ifaddr *ifa1, **ifap;
873
874         if (!ifa->ifa_local)
875                 return NULL;
876
877         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
878              ifap = &ifa1->ifa_next) {
879                 if (ifa1->ifa_mask == ifa->ifa_mask &&
880                     inet_ifa_match(ifa1->ifa_address, ifa) &&
881                     ifa1->ifa_local == ifa->ifa_local)
882                         return ifa1;
883         }
884         return NULL;
885 }
886
887 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
888                             struct netlink_ext_ack *extack)
889 {
890         struct net *net = sock_net(skb->sk);
891         struct in_ifaddr *ifa;
892         struct in_ifaddr *ifa_existing;
893         __u32 valid_lft = INFINITY_LIFE_TIME;
894         __u32 prefered_lft = INFINITY_LIFE_TIME;
895
896         ASSERT_RTNL();
897
898         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
899         if (IS_ERR(ifa))
900                 return PTR_ERR(ifa);
901
902         ifa_existing = find_matching_ifa(ifa);
903         if (!ifa_existing) {
904                 /* It would be best to check for !NLM_F_CREATE here but
905                  * userspace already relies on not having to provide this.
906                  */
907                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
908                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
909                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
910                                                true, ifa);
911
912                         if (ret < 0) {
913                                 inet_free_ifa(ifa);
914                                 return ret;
915                         }
916                 }
917                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
918                                          extack);
919         } else {
920                 u32 new_metric = ifa->ifa_rt_priority;
921
922                 inet_free_ifa(ifa);
923
924                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
925                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
926                         return -EEXIST;
927                 ifa = ifa_existing;
928
929                 if (ifa->ifa_rt_priority != new_metric) {
930                         fib_modify_prefix_metric(ifa, new_metric);
931                         ifa->ifa_rt_priority = new_metric;
932                 }
933
934                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
935                 cancel_delayed_work(&check_lifetime_work);
936                 queue_delayed_work(system_power_efficient_wq,
937                                 &check_lifetime_work, 0);
938                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
939         }
940         return 0;
941 }
942
943 /*
944  *      Determine a default network mask, based on the IP address.
945  */
946
947 static int inet_abc_len(__be32 addr)
948 {
949         int rc = -1;    /* Something else, probably a multicast. */
950
951         if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
952                 rc = 0;
953         else {
954                 __u32 haddr = ntohl(addr);
955                 if (IN_CLASSA(haddr))
956                         rc = 8;
957                 else if (IN_CLASSB(haddr))
958                         rc = 16;
959                 else if (IN_CLASSC(haddr))
960                         rc = 24;
961                 else if (IN_CLASSE(haddr))
962                         rc = 32;
963         }
964
965         return rc;
966 }
967
968
969 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
970 {
971         struct sockaddr_in sin_orig;
972         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
973         struct in_device *in_dev;
974         struct in_ifaddr **ifap = NULL;
975         struct in_ifaddr *ifa = NULL;
976         struct net_device *dev;
977         char *colon;
978         int ret = -EFAULT;
979         int tryaddrmatch = 0;
980
981         ifr->ifr_name[IFNAMSIZ - 1] = 0;
982
983         /* save original address for comparison */
984         memcpy(&sin_orig, sin, sizeof(*sin));
985
986         colon = strchr(ifr->ifr_name, ':');
987         if (colon)
988                 *colon = 0;
989
990         dev_load(net, ifr->ifr_name);
991
992         switch (cmd) {
993         case SIOCGIFADDR:       /* Get interface address */
994         case SIOCGIFBRDADDR:    /* Get the broadcast address */
995         case SIOCGIFDSTADDR:    /* Get the destination address */
996         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
997                 /* Note that these ioctls will not sleep,
998                    so that we do not impose a lock.
999                    One day we will be forced to put shlock here (I mean SMP)
1000                  */
1001                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1002                 memset(sin, 0, sizeof(*sin));
1003                 sin->sin_family = AF_INET;
1004                 break;
1005
1006         case SIOCSIFFLAGS:
1007                 ret = -EPERM;
1008                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1009                         goto out;
1010                 break;
1011         case SIOCSIFADDR:       /* Set interface address (and family) */
1012         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1013         case SIOCSIFDSTADDR:    /* Set the destination address */
1014         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1015                 ret = -EPERM;
1016                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1017                         goto out;
1018                 ret = -EINVAL;
1019                 if (sin->sin_family != AF_INET)
1020                         goto out;
1021                 break;
1022         default:
1023                 ret = -EINVAL;
1024                 goto out;
1025         }
1026
1027         rtnl_lock();
1028
1029         ret = -ENODEV;
1030         dev = __dev_get_by_name(net, ifr->ifr_name);
1031         if (!dev)
1032                 goto done;
1033
1034         if (colon)
1035                 *colon = ':';
1036
1037         in_dev = __in_dev_get_rtnl(dev);
1038         if (in_dev) {
1039                 if (tryaddrmatch) {
1040                         /* Matthias Andree */
1041                         /* compare label and address (4.4BSD style) */
1042                         /* note: we only do this for a limited set of ioctls
1043                            and only if the original address family was AF_INET.
1044                            This is checked above. */
1045                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1046                              ifap = &ifa->ifa_next) {
1047                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1048                                     sin_orig.sin_addr.s_addr ==
1049                                                         ifa->ifa_local) {
1050                                         break; /* found */
1051                                 }
1052                         }
1053                 }
1054                 /* we didn't get a match, maybe the application is
1055                    4.3BSD-style and passed in junk so we fall back to
1056                    comparing just the label */
1057                 if (!ifa) {
1058                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1059                              ifap = &ifa->ifa_next)
1060                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1061                                         break;
1062                 }
1063         }
1064
1065         ret = -EADDRNOTAVAIL;
1066         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1067                 goto done;
1068
1069         switch (cmd) {
1070         case SIOCGIFADDR:       /* Get interface address */
1071                 ret = 0;
1072                 sin->sin_addr.s_addr = ifa->ifa_local;
1073                 break;
1074
1075         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1076                 ret = 0;
1077                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1078                 break;
1079
1080         case SIOCGIFDSTADDR:    /* Get the destination address */
1081                 ret = 0;
1082                 sin->sin_addr.s_addr = ifa->ifa_address;
1083                 break;
1084
1085         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1086                 ret = 0;
1087                 sin->sin_addr.s_addr = ifa->ifa_mask;
1088                 break;
1089
1090         case SIOCSIFFLAGS:
1091                 if (colon) {
1092                         ret = -EADDRNOTAVAIL;
1093                         if (!ifa)
1094                                 break;
1095                         ret = 0;
1096                         if (!(ifr->ifr_flags & IFF_UP))
1097                                 inet_del_ifa(in_dev, ifap, 1);
1098                         break;
1099                 }
1100                 ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1101                 break;
1102
1103         case SIOCSIFADDR:       /* Set interface address (and family) */
1104                 ret = -EINVAL;
1105                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1106                         break;
1107
1108                 if (!ifa) {
1109                         ret = -ENOBUFS;
1110                         ifa = inet_alloc_ifa();
1111                         if (!ifa)
1112                                 break;
1113                         INIT_HLIST_NODE(&ifa->hash);
1114                         if (colon)
1115                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1116                         else
1117                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1118                 } else {
1119                         ret = 0;
1120                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1121                                 break;
1122                         inet_del_ifa(in_dev, ifap, 0);
1123                         ifa->ifa_broadcast = 0;
1124                         ifa->ifa_scope = 0;
1125                 }
1126
1127                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1128
1129                 if (!(dev->flags & IFF_POINTOPOINT)) {
1130                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1131                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1132                         if ((dev->flags & IFF_BROADCAST) &&
1133                             ifa->ifa_prefixlen < 31)
1134                                 ifa->ifa_broadcast = ifa->ifa_address |
1135                                                      ~ifa->ifa_mask;
1136                 } else {
1137                         ifa->ifa_prefixlen = 32;
1138                         ifa->ifa_mask = inet_make_mask(32);
1139                 }
1140                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1141                 ret = inet_set_ifa(dev, ifa);
1142                 break;
1143
1144         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1145                 ret = 0;
1146                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1147                         inet_del_ifa(in_dev, ifap, 0);
1148                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1149                         inet_insert_ifa(ifa);
1150                 }
1151                 break;
1152
1153         case SIOCSIFDSTADDR:    /* Set the destination address */
1154                 ret = 0;
1155                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1156                         break;
1157                 ret = -EINVAL;
1158                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1159                         break;
1160                 ret = 0;
1161                 inet_del_ifa(in_dev, ifap, 0);
1162                 ifa->ifa_address = sin->sin_addr.s_addr;
1163                 inet_insert_ifa(ifa);
1164                 break;
1165
1166         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1167
1168                 /*
1169                  *      The mask we set must be legal.
1170                  */
1171                 ret = -EINVAL;
1172                 if (bad_mask(sin->sin_addr.s_addr, 0))
1173                         break;
1174                 ret = 0;
1175                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1176                         __be32 old_mask = ifa->ifa_mask;
1177                         inet_del_ifa(in_dev, ifap, 0);
1178                         ifa->ifa_mask = sin->sin_addr.s_addr;
1179                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1180
1181                         /* See if current broadcast address matches
1182                          * with current netmask, then recalculate
1183                          * the broadcast address. Otherwise it's a
1184                          * funny address, so don't touch it since
1185                          * the user seems to know what (s)he's doing...
1186                          */
1187                         if ((dev->flags & IFF_BROADCAST) &&
1188                             (ifa->ifa_prefixlen < 31) &&
1189                             (ifa->ifa_broadcast ==
1190                              (ifa->ifa_local|~old_mask))) {
1191                                 ifa->ifa_broadcast = (ifa->ifa_local |
1192                                                       ~sin->sin_addr.s_addr);
1193                         }
1194                         inet_insert_ifa(ifa);
1195                 }
1196                 break;
1197         }
1198 done:
1199         rtnl_unlock();
1200 out:
1201         return ret;
1202 }
1203
1204 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1205 {
1206         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1207         struct in_ifaddr *ifa;
1208         struct ifreq ifr;
1209         int done = 0;
1210
1211         if (WARN_ON(size > sizeof(struct ifreq)))
1212                 goto out;
1213
1214         if (!in_dev)
1215                 goto out;
1216
1217         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1218                 if (!buf) {
1219                         done += size;
1220                         continue;
1221                 }
1222                 if (len < size)
1223                         break;
1224                 memset(&ifr, 0, sizeof(struct ifreq));
1225                 strcpy(ifr.ifr_name, ifa->ifa_label);
1226
1227                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1228                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1229                                                                 ifa->ifa_local;
1230
1231                 if (copy_to_user(buf + done, &ifr, size)) {
1232                         done = -EFAULT;
1233                         break;
1234                 }
1235                 len  -= size;
1236                 done += size;
1237         }
1238 out:
1239         return done;
1240 }
1241
1242 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1243                                  int scope)
1244 {
1245         for_primary_ifa(in_dev) {
1246                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1247                     ifa->ifa_scope <= scope)
1248                         return ifa->ifa_local;
1249         } endfor_ifa(in_dev);
1250
1251         return 0;
1252 }
1253
1254 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1255 {
1256         __be32 addr = 0;
1257         struct in_device *in_dev;
1258         struct net *net = dev_net(dev);
1259         int master_idx;
1260
1261         rcu_read_lock();
1262         in_dev = __in_dev_get_rcu(dev);
1263         if (!in_dev)
1264                 goto no_in_dev;
1265
1266         for_primary_ifa(in_dev) {
1267                 if (ifa->ifa_scope > scope)
1268                         continue;
1269                 if (!dst || inet_ifa_match(dst, ifa)) {
1270                         addr = ifa->ifa_local;
1271                         break;
1272                 }
1273                 if (!addr)
1274                         addr = ifa->ifa_local;
1275         } endfor_ifa(in_dev);
1276
1277         if (addr)
1278                 goto out_unlock;
1279 no_in_dev:
1280         master_idx = l3mdev_master_ifindex_rcu(dev);
1281
1282         /* For VRFs, the VRF device takes the place of the loopback device,
1283          * with addresses on it being preferred.  Note in such cases the
1284          * loopback device will be among the devices that fail the master_idx
1285          * equality check in the loop below.
1286          */
1287         if (master_idx &&
1288             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1289             (in_dev = __in_dev_get_rcu(dev))) {
1290                 addr = in_dev_select_addr(in_dev, scope);
1291                 if (addr)
1292                         goto out_unlock;
1293         }
1294
1295         /* Not loopback addresses on loopback should be preferred
1296            in this case. It is important that lo is the first interface
1297            in dev_base list.
1298          */
1299         for_each_netdev_rcu(net, dev) {
1300                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1301                         continue;
1302
1303                 in_dev = __in_dev_get_rcu(dev);
1304                 if (!in_dev)
1305                         continue;
1306
1307                 addr = in_dev_select_addr(in_dev, scope);
1308                 if (addr)
1309                         goto out_unlock;
1310         }
1311 out_unlock:
1312         rcu_read_unlock();
1313         return addr;
1314 }
1315 EXPORT_SYMBOL(inet_select_addr);
1316
1317 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1318                               __be32 local, int scope)
1319 {
1320         int same = 0;
1321         __be32 addr = 0;
1322
1323         for_ifa(in_dev) {
1324                 if (!addr &&
1325                     (local == ifa->ifa_local || !local) &&
1326                     ifa->ifa_scope <= scope) {
1327                         addr = ifa->ifa_local;
1328                         if (same)
1329                                 break;
1330                 }
1331                 if (!same) {
1332                         same = (!local || inet_ifa_match(local, ifa)) &&
1333                                 (!dst || inet_ifa_match(dst, ifa));
1334                         if (same && addr) {
1335                                 if (local || !dst)
1336                                         break;
1337                                 /* Is the selected addr into dst subnet? */
1338                                 if (inet_ifa_match(addr, ifa))
1339                                         break;
1340                                 /* No, then can we use new local src? */
1341                                 if (ifa->ifa_scope <= scope) {
1342                                         addr = ifa->ifa_local;
1343                                         break;
1344                                 }
1345                                 /* search for large dst subnet for addr */
1346                                 same = 0;
1347                         }
1348                 }
1349         } endfor_ifa(in_dev);
1350
1351         return same ? addr : 0;
1352 }
1353
1354 /*
1355  * Confirm that local IP address exists using wildcards:
1356  * - net: netns to check, cannot be NULL
1357  * - in_dev: only on this interface, NULL=any interface
1358  * - dst: only in the same subnet as dst, 0=any dst
1359  * - local: address, 0=autoselect the local address
1360  * - scope: maximum allowed scope value for the local address
1361  */
1362 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1363                          __be32 dst, __be32 local, int scope)
1364 {
1365         __be32 addr = 0;
1366         struct net_device *dev;
1367
1368         if (in_dev)
1369                 return confirm_addr_indev(in_dev, dst, local, scope);
1370
1371         rcu_read_lock();
1372         for_each_netdev_rcu(net, dev) {
1373                 in_dev = __in_dev_get_rcu(dev);
1374                 if (in_dev) {
1375                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1376                         if (addr)
1377                                 break;
1378                 }
1379         }
1380         rcu_read_unlock();
1381
1382         return addr;
1383 }
1384 EXPORT_SYMBOL(inet_confirm_addr);
1385
1386 /*
1387  *      Device notifier
1388  */
1389
1390 int register_inetaddr_notifier(struct notifier_block *nb)
1391 {
1392         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1393 }
1394 EXPORT_SYMBOL(register_inetaddr_notifier);
1395
1396 int unregister_inetaddr_notifier(struct notifier_block *nb)
1397 {
1398         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1399 }
1400 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1401
1402 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1403 {
1404         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1405 }
1406 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1407
1408 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1409 {
1410         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1411             nb);
1412 }
1413 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1414
1415 /* Rename ifa_labels for a device name change. Make some effort to preserve
1416  * existing alias numbering and to create unique labels if possible.
1417 */
1418 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1419 {
1420         struct in_ifaddr *ifa;
1421         int named = 0;
1422
1423         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1424                 char old[IFNAMSIZ], *dot;
1425
1426                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1427                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1428                 if (named++ == 0)
1429                         goto skip;
1430                 dot = strchr(old, ':');
1431                 if (!dot) {
1432                         sprintf(old, ":%d", named);
1433                         dot = old;
1434                 }
1435                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1436                         strcat(ifa->ifa_label, dot);
1437                 else
1438                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1439 skip:
1440                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1441         }
1442 }
1443
1444 static bool inetdev_valid_mtu(unsigned int mtu)
1445 {
1446         return mtu >= IPV4_MIN_MTU;
1447 }
1448
1449 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1450                                         struct in_device *in_dev)
1451
1452 {
1453         struct in_ifaddr *ifa;
1454
1455         for (ifa = in_dev->ifa_list; ifa;
1456              ifa = ifa->ifa_next) {
1457                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1458                          ifa->ifa_local, dev,
1459                          ifa->ifa_local, NULL,
1460                          dev->dev_addr, NULL);
1461         }
1462 }
1463
1464 /* Called only under RTNL semaphore */
1465
1466 static int inetdev_event(struct notifier_block *this, unsigned long event,
1467                          void *ptr)
1468 {
1469         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1470         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1471
1472         ASSERT_RTNL();
1473
1474         if (!in_dev) {
1475                 if (event == NETDEV_REGISTER) {
1476                         in_dev = inetdev_init(dev);
1477                         if (IS_ERR(in_dev))
1478                                 return notifier_from_errno(PTR_ERR(in_dev));
1479                         if (dev->flags & IFF_LOOPBACK) {
1480                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1481                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1482                         }
1483                 } else if (event == NETDEV_CHANGEMTU) {
1484                         /* Re-enabling IP */
1485                         if (inetdev_valid_mtu(dev->mtu))
1486                                 in_dev = inetdev_init(dev);
1487                 }
1488                 goto out;
1489         }
1490
1491         switch (event) {
1492         case NETDEV_REGISTER:
1493                 pr_debug("%s: bug\n", __func__);
1494                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1495                 break;
1496         case NETDEV_UP:
1497                 if (!inetdev_valid_mtu(dev->mtu))
1498                         break;
1499                 if (dev->flags & IFF_LOOPBACK) {
1500                         struct in_ifaddr *ifa = inet_alloc_ifa();
1501
1502                         if (ifa) {
1503                                 INIT_HLIST_NODE(&ifa->hash);
1504                                 ifa->ifa_local =
1505                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1506                                 ifa->ifa_prefixlen = 8;
1507                                 ifa->ifa_mask = inet_make_mask(8);
1508                                 in_dev_hold(in_dev);
1509                                 ifa->ifa_dev = in_dev;
1510                                 ifa->ifa_scope = RT_SCOPE_HOST;
1511                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1512                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1513                                                  INFINITY_LIFE_TIME);
1514                                 ipv4_devconf_setall(in_dev);
1515                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1516                                 inet_insert_ifa(ifa);
1517                         }
1518                 }
1519                 ip_mc_up(in_dev);
1520                 /* fall through */
1521         case NETDEV_CHANGEADDR:
1522                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1523                         break;
1524                 /* fall through */
1525         case NETDEV_NOTIFY_PEERS:
1526                 /* Send gratuitous ARP to notify of link change */
1527                 inetdev_send_gratuitous_arp(dev, in_dev);
1528                 break;
1529         case NETDEV_DOWN:
1530                 ip_mc_down(in_dev);
1531                 break;
1532         case NETDEV_PRE_TYPE_CHANGE:
1533                 ip_mc_unmap(in_dev);
1534                 break;
1535         case NETDEV_POST_TYPE_CHANGE:
1536                 ip_mc_remap(in_dev);
1537                 break;
1538         case NETDEV_CHANGEMTU:
1539                 if (inetdev_valid_mtu(dev->mtu))
1540                         break;
1541                 /* disable IP when MTU is not enough */
1542                 /* fall through */
1543         case NETDEV_UNREGISTER:
1544                 inetdev_destroy(in_dev);
1545                 break;
1546         case NETDEV_CHANGENAME:
1547                 /* Do not notify about label change, this event is
1548                  * not interesting to applications using netlink.
1549                  */
1550                 inetdev_changename(dev, in_dev);
1551
1552                 devinet_sysctl_unregister(in_dev);
1553                 devinet_sysctl_register(in_dev);
1554                 break;
1555         }
1556 out:
1557         return NOTIFY_DONE;
1558 }
1559
1560 static struct notifier_block ip_netdev_notifier = {
1561         .notifier_call = inetdev_event,
1562 };
1563
1564 static size_t inet_nlmsg_size(void)
1565 {
1566         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1567                + nla_total_size(4) /* IFA_ADDRESS */
1568                + nla_total_size(4) /* IFA_LOCAL */
1569                + nla_total_size(4) /* IFA_BROADCAST */
1570                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1571                + nla_total_size(4)  /* IFA_FLAGS */
1572                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1573                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1574 }
1575
1576 static inline u32 cstamp_delta(unsigned long cstamp)
1577 {
1578         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1579 }
1580
1581 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1582                          unsigned long tstamp, u32 preferred, u32 valid)
1583 {
1584         struct ifa_cacheinfo ci;
1585
1586         ci.cstamp = cstamp_delta(cstamp);
1587         ci.tstamp = cstamp_delta(tstamp);
1588         ci.ifa_prefered = preferred;
1589         ci.ifa_valid = valid;
1590
1591         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1592 }
1593
1594 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1595                             struct inet_fill_args *args)
1596 {
1597         struct ifaddrmsg *ifm;
1598         struct nlmsghdr  *nlh;
1599         u32 preferred, valid;
1600
1601         nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1602                         args->flags);
1603         if (!nlh)
1604                 return -EMSGSIZE;
1605
1606         ifm = nlmsg_data(nlh);
1607         ifm->ifa_family = AF_INET;
1608         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1609         ifm->ifa_flags = ifa->ifa_flags;
1610         ifm->ifa_scope = ifa->ifa_scope;
1611         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1612
1613         if (args->netnsid >= 0 &&
1614             nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1615                 goto nla_put_failure;
1616
1617         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1618                 preferred = ifa->ifa_preferred_lft;
1619                 valid = ifa->ifa_valid_lft;
1620                 if (preferred != INFINITY_LIFE_TIME) {
1621                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1622
1623                         if (preferred > tval)
1624                                 preferred -= tval;
1625                         else
1626                                 preferred = 0;
1627                         if (valid != INFINITY_LIFE_TIME) {
1628                                 if (valid > tval)
1629                                         valid -= tval;
1630                                 else
1631                                         valid = 0;
1632                         }
1633                 }
1634         } else {
1635                 preferred = INFINITY_LIFE_TIME;
1636                 valid = INFINITY_LIFE_TIME;
1637         }
1638         if ((ifa->ifa_address &&
1639              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1640             (ifa->ifa_local &&
1641              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1642             (ifa->ifa_broadcast &&
1643              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1644             (ifa->ifa_label[0] &&
1645              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1646             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1647             (ifa->ifa_rt_priority &&
1648              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1649             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1650                           preferred, valid))
1651                 goto nla_put_failure;
1652
1653         nlmsg_end(skb, nlh);
1654         return 0;
1655
1656 nla_put_failure:
1657         nlmsg_cancel(skb, nlh);
1658         return -EMSGSIZE;
1659 }
1660
1661 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1662                                       struct inet_fill_args *fillargs,
1663                                       struct net **tgt_net, struct sock *sk,
1664                                       struct netlink_callback *cb)
1665 {
1666         struct netlink_ext_ack *extack = cb->extack;
1667         struct nlattr *tb[IFA_MAX+1];
1668         struct ifaddrmsg *ifm;
1669         int err, i;
1670
1671         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1672                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1673                 return -EINVAL;
1674         }
1675
1676         ifm = nlmsg_data(nlh);
1677         if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1678                 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1679                 return -EINVAL;
1680         }
1681
1682         fillargs->ifindex = ifm->ifa_index;
1683         if (fillargs->ifindex) {
1684                 cb->answer_flags |= NLM_F_DUMP_FILTERED;
1685                 fillargs->flags |= NLM_F_DUMP_FILTERED;
1686         }
1687
1688         err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1689                                             ifa_ipv4_policy, extack);
1690         if (err < 0)
1691                 return err;
1692
1693         for (i = 0; i <= IFA_MAX; ++i) {
1694                 if (!tb[i])
1695                         continue;
1696
1697                 if (i == IFA_TARGET_NETNSID) {
1698                         struct net *net;
1699
1700                         fillargs->netnsid = nla_get_s32(tb[i]);
1701
1702                         net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1703                         if (IS_ERR(net)) {
1704                                 fillargs->netnsid = -1;
1705                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1706                                 return PTR_ERR(net);
1707                         }
1708                         *tgt_net = net;
1709                 } else {
1710                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1711                         return -EINVAL;
1712                 }
1713         }
1714
1715         return 0;
1716 }
1717
1718 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1719                             struct netlink_callback *cb, int s_ip_idx,
1720                             struct inet_fill_args *fillargs)
1721 {
1722         struct in_ifaddr *ifa;
1723         int ip_idx = 0;
1724         int err;
1725
1726         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next, ip_idx++) {
1727                 if (ip_idx < s_ip_idx)
1728                         continue;
1729
1730                 err = inet_fill_ifaddr(skb, ifa, fillargs);
1731                 if (err < 0)
1732                         goto done;
1733
1734                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1735         }
1736         err = 0;
1737
1738 done:
1739         cb->args[2] = ip_idx;
1740
1741         return err;
1742 }
1743
1744 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1745 {
1746         const struct nlmsghdr *nlh = cb->nlh;
1747         struct inet_fill_args fillargs = {
1748                 .portid = NETLINK_CB(cb->skb).portid,
1749                 .seq = nlh->nlmsg_seq,
1750                 .event = RTM_NEWADDR,
1751                 .flags = NLM_F_MULTI,
1752                 .netnsid = -1,
1753         };
1754         struct net *net = sock_net(skb->sk);
1755         struct net *tgt_net = net;
1756         int h, s_h;
1757         int idx, s_idx;
1758         int s_ip_idx;
1759         struct net_device *dev;
1760         struct in_device *in_dev;
1761         struct hlist_head *head;
1762         int err = 0;
1763
1764         s_h = cb->args[0];
1765         s_idx = idx = cb->args[1];
1766         s_ip_idx = cb->args[2];
1767
1768         if (cb->strict_check) {
1769                 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1770                                                  skb->sk, cb);
1771                 if (err < 0)
1772                         goto put_tgt_net;
1773
1774                 err = 0;
1775                 if (fillargs.ifindex) {
1776                         dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1777                         if (!dev) {
1778                                 err = -ENODEV;
1779                                 goto put_tgt_net;
1780                         }
1781
1782                         in_dev = __in_dev_get_rtnl(dev);
1783                         if (in_dev) {
1784                                 err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1785                                                        &fillargs);
1786                         }
1787                         goto put_tgt_net;
1788                 }
1789         }
1790
1791         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1792                 idx = 0;
1793                 head = &tgt_net->dev_index_head[h];
1794                 rcu_read_lock();
1795                 cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1796                           tgt_net->dev_base_seq;
1797                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1798                         if (idx < s_idx)
1799                                 goto cont;
1800                         if (h > s_h || idx > s_idx)
1801                                 s_ip_idx = 0;
1802                         in_dev = __in_dev_get_rcu(dev);
1803                         if (!in_dev)
1804                                 goto cont;
1805
1806                         err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1807                                                &fillargs);
1808                         if (err < 0) {
1809                                 rcu_read_unlock();
1810                                 goto done;
1811                         }
1812 cont:
1813                         idx++;
1814                 }
1815                 rcu_read_unlock();
1816         }
1817
1818 done:
1819         cb->args[0] = h;
1820         cb->args[1] = idx;
1821 put_tgt_net:
1822         if (fillargs.netnsid >= 0)
1823                 put_net(tgt_net);
1824
1825         return skb->len ? : err;
1826 }
1827
1828 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1829                       u32 portid)
1830 {
1831         struct inet_fill_args fillargs = {
1832                 .portid = portid,
1833                 .seq = nlh ? nlh->nlmsg_seq : 0,
1834                 .event = event,
1835                 .flags = 0,
1836                 .netnsid = -1,
1837         };
1838         struct sk_buff *skb;
1839         int err = -ENOBUFS;
1840         struct net *net;
1841
1842         net = dev_net(ifa->ifa_dev->dev);
1843         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1844         if (!skb)
1845                 goto errout;
1846
1847         err = inet_fill_ifaddr(skb, ifa, &fillargs);
1848         if (err < 0) {
1849                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1850                 WARN_ON(err == -EMSGSIZE);
1851                 kfree_skb(skb);
1852                 goto errout;
1853         }
1854         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1855         return;
1856 errout:
1857         if (err < 0)
1858                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1859 }
1860
1861 static size_t inet_get_link_af_size(const struct net_device *dev,
1862                                     u32 ext_filter_mask)
1863 {
1864         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1865
1866         if (!in_dev)
1867                 return 0;
1868
1869         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1870 }
1871
1872 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1873                              u32 ext_filter_mask)
1874 {
1875         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1876         struct nlattr *nla;
1877         int i;
1878
1879         if (!in_dev)
1880                 return -ENODATA;
1881
1882         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1883         if (!nla)
1884                 return -EMSGSIZE;
1885
1886         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1887                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1888
1889         return 0;
1890 }
1891
1892 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1893         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1894 };
1895
1896 static int inet_validate_link_af(const struct net_device *dev,
1897                                  const struct nlattr *nla)
1898 {
1899         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1900         int err, rem;
1901
1902         if (dev && !__in_dev_get_rcu(dev))
1903                 return -EAFNOSUPPORT;
1904
1905         err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1906                                           inet_af_policy, NULL);
1907         if (err < 0)
1908                 return err;
1909
1910         if (tb[IFLA_INET_CONF]) {
1911                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1912                         int cfgid = nla_type(a);
1913
1914                         if (nla_len(a) < 4)
1915                                 return -EINVAL;
1916
1917                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1918                                 return -EINVAL;
1919                 }
1920         }
1921
1922         return 0;
1923 }
1924
1925 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1926 {
1927         struct in_device *in_dev = __in_dev_get_rcu(dev);
1928         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1929         int rem;
1930
1931         if (!in_dev)
1932                 return -EAFNOSUPPORT;
1933
1934         if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1935                 BUG();
1936
1937         if (tb[IFLA_INET_CONF]) {
1938                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1939                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1940         }
1941
1942         return 0;
1943 }
1944
1945 static int inet_netconf_msgsize_devconf(int type)
1946 {
1947         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1948                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1949         bool all = false;
1950
1951         if (type == NETCONFA_ALL)
1952                 all = true;
1953
1954         if (all || type == NETCONFA_FORWARDING)
1955                 size += nla_total_size(4);
1956         if (all || type == NETCONFA_RP_FILTER)
1957                 size += nla_total_size(4);
1958         if (all || type == NETCONFA_MC_FORWARDING)
1959                 size += nla_total_size(4);
1960         if (all || type == NETCONFA_BC_FORWARDING)
1961                 size += nla_total_size(4);
1962         if (all || type == NETCONFA_PROXY_NEIGH)
1963                 size += nla_total_size(4);
1964         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1965                 size += nla_total_size(4);
1966
1967         return size;
1968 }
1969
1970 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1971                                      struct ipv4_devconf *devconf, u32 portid,
1972                                      u32 seq, int event, unsigned int flags,
1973                                      int type)
1974 {
1975         struct nlmsghdr  *nlh;
1976         struct netconfmsg *ncm;
1977         bool all = false;
1978
1979         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1980                         flags);
1981         if (!nlh)
1982                 return -EMSGSIZE;
1983
1984         if (type == NETCONFA_ALL)
1985                 all = true;
1986
1987         ncm = nlmsg_data(nlh);
1988         ncm->ncm_family = AF_INET;
1989
1990         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1991                 goto nla_put_failure;
1992
1993         if (!devconf)
1994                 goto out;
1995
1996         if ((all || type == NETCONFA_FORWARDING) &&
1997             nla_put_s32(skb, NETCONFA_FORWARDING,
1998                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1999                 goto nla_put_failure;
2000         if ((all || type == NETCONFA_RP_FILTER) &&
2001             nla_put_s32(skb, NETCONFA_RP_FILTER,
2002                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2003                 goto nla_put_failure;
2004         if ((all || type == NETCONFA_MC_FORWARDING) &&
2005             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2006                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2007                 goto nla_put_failure;
2008         if ((all || type == NETCONFA_BC_FORWARDING) &&
2009             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2010                         IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2011                 goto nla_put_failure;
2012         if ((all || type == NETCONFA_PROXY_NEIGH) &&
2013             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2014                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2015                 goto nla_put_failure;
2016         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2017             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2018                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2019                 goto nla_put_failure;
2020
2021 out:
2022         nlmsg_end(skb, nlh);
2023         return 0;
2024
2025 nla_put_failure:
2026         nlmsg_cancel(skb, nlh);
2027         return -EMSGSIZE;
2028 }
2029
2030 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2031                                  int ifindex, struct ipv4_devconf *devconf)
2032 {
2033         struct sk_buff *skb;
2034         int err = -ENOBUFS;
2035
2036         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2037         if (!skb)
2038                 goto errout;
2039
2040         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2041                                         event, 0, type);
2042         if (err < 0) {
2043                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2044                 WARN_ON(err == -EMSGSIZE);
2045                 kfree_skb(skb);
2046                 goto errout;
2047         }
2048         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2049         return;
2050 errout:
2051         if (err < 0)
2052                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2053 }
2054
2055 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2056         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
2057         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
2058         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
2059         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
2060         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
2061 };
2062
2063 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2064                                       const struct nlmsghdr *nlh,
2065                                       struct nlattr **tb,
2066                                       struct netlink_ext_ack *extack)
2067 {
2068         int i, err;
2069
2070         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2071                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2072                 return -EINVAL;
2073         }
2074
2075         if (!netlink_strict_get_check(skb))
2076                 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2077                                               tb, NETCONFA_MAX,
2078                                               devconf_ipv4_policy, extack);
2079
2080         err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2081                                             tb, NETCONFA_MAX,
2082                                             devconf_ipv4_policy, extack);
2083         if (err)
2084                 return err;
2085
2086         for (i = 0; i <= NETCONFA_MAX; i++) {
2087                 if (!tb[i])
2088                         continue;
2089
2090                 switch (i) {
2091                 case NETCONFA_IFINDEX:
2092                         break;
2093                 default:
2094                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2095                         return -EINVAL;
2096                 }
2097         }
2098
2099         return 0;
2100 }
2101
2102 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2103                                     struct nlmsghdr *nlh,
2104                                     struct netlink_ext_ack *extack)
2105 {
2106         struct net *net = sock_net(in_skb->sk);
2107         struct nlattr *tb[NETCONFA_MAX+1];
2108         struct sk_buff *skb;
2109         struct ipv4_devconf *devconf;
2110         struct in_device *in_dev;
2111         struct net_device *dev;
2112         int ifindex;
2113         int err;
2114
2115         err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2116         if (err)
2117                 goto errout;
2118
2119         err = -EINVAL;
2120         if (!tb[NETCONFA_IFINDEX])
2121                 goto errout;
2122
2123         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2124         switch (ifindex) {
2125         case NETCONFA_IFINDEX_ALL:
2126                 devconf = net->ipv4.devconf_all;
2127                 break;
2128         case NETCONFA_IFINDEX_DEFAULT:
2129                 devconf = net->ipv4.devconf_dflt;
2130                 break;
2131         default:
2132                 dev = __dev_get_by_index(net, ifindex);
2133                 if (!dev)
2134                         goto errout;
2135                 in_dev = __in_dev_get_rtnl(dev);
2136                 if (!in_dev)
2137                         goto errout;
2138                 devconf = &in_dev->cnf;
2139                 break;
2140         }
2141
2142         err = -ENOBUFS;
2143         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2144         if (!skb)
2145                 goto errout;
2146
2147         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2148                                         NETLINK_CB(in_skb).portid,
2149                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2150                                         NETCONFA_ALL);
2151         if (err < 0) {
2152                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2153                 WARN_ON(err == -EMSGSIZE);
2154                 kfree_skb(skb);
2155                 goto errout;
2156         }
2157         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2158 errout:
2159         return err;
2160 }
2161
2162 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2163                                      struct netlink_callback *cb)
2164 {
2165         const struct nlmsghdr *nlh = cb->nlh;
2166         struct net *net = sock_net(skb->sk);
2167         int h, s_h;
2168         int idx, s_idx;
2169         struct net_device *dev;
2170         struct in_device *in_dev;
2171         struct hlist_head *head;
2172
2173         if (cb->strict_check) {
2174                 struct netlink_ext_ack *extack = cb->extack;
2175                 struct netconfmsg *ncm;
2176
2177                 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2178                         NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2179                         return -EINVAL;
2180                 }
2181
2182                 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2183                         NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2184                         return -EINVAL;
2185                 }
2186         }
2187
2188         s_h = cb->args[0];
2189         s_idx = idx = cb->args[1];
2190
2191         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2192                 idx = 0;
2193                 head = &net->dev_index_head[h];
2194                 rcu_read_lock();
2195                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2196                           net->dev_base_seq;
2197                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2198                         if (idx < s_idx)
2199                                 goto cont;
2200                         in_dev = __in_dev_get_rcu(dev);
2201                         if (!in_dev)
2202                                 goto cont;
2203
2204                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2205                                                       &in_dev->cnf,
2206                                                       NETLINK_CB(cb->skb).portid,
2207                                                       nlh->nlmsg_seq,
2208                                                       RTM_NEWNETCONF,
2209                                                       NLM_F_MULTI,
2210                                                       NETCONFA_ALL) < 0) {
2211                                 rcu_read_unlock();
2212                                 goto done;
2213                         }
2214                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2215 cont:
2216                         idx++;
2217                 }
2218                 rcu_read_unlock();
2219         }
2220         if (h == NETDEV_HASHENTRIES) {
2221                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2222                                               net->ipv4.devconf_all,
2223                                               NETLINK_CB(cb->skb).portid,
2224                                               nlh->nlmsg_seq,
2225                                               RTM_NEWNETCONF, NLM_F_MULTI,
2226                                               NETCONFA_ALL) < 0)
2227                         goto done;
2228                 else
2229                         h++;
2230         }
2231         if (h == NETDEV_HASHENTRIES + 1) {
2232                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2233                                               net->ipv4.devconf_dflt,
2234                                               NETLINK_CB(cb->skb).portid,
2235                                               nlh->nlmsg_seq,
2236                                               RTM_NEWNETCONF, NLM_F_MULTI,
2237                                               NETCONFA_ALL) < 0)
2238                         goto done;
2239                 else
2240                         h++;
2241         }
2242 done:
2243         cb->args[0] = h;
2244         cb->args[1] = idx;
2245
2246         return skb->len;
2247 }
2248
2249 #ifdef CONFIG_SYSCTL
2250
2251 static void devinet_copy_dflt_conf(struct net *net, int i)
2252 {
2253         struct net_device *dev;
2254
2255         rcu_read_lock();
2256         for_each_netdev_rcu(net, dev) {
2257                 struct in_device *in_dev;
2258
2259                 in_dev = __in_dev_get_rcu(dev);
2260                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2261                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2262         }
2263         rcu_read_unlock();
2264 }
2265
2266 /* called with RTNL locked */
2267 static void inet_forward_change(struct net *net)
2268 {
2269         struct net_device *dev;
2270         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2271
2272         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2273         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2274         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2275                                     NETCONFA_FORWARDING,
2276                                     NETCONFA_IFINDEX_ALL,
2277                                     net->ipv4.devconf_all);
2278         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2279                                     NETCONFA_FORWARDING,
2280                                     NETCONFA_IFINDEX_DEFAULT,
2281                                     net->ipv4.devconf_dflt);
2282
2283         for_each_netdev(net, dev) {
2284                 struct in_device *in_dev;
2285
2286                 if (on)
2287                         dev_disable_lro(dev);
2288
2289                 in_dev = __in_dev_get_rtnl(dev);
2290                 if (in_dev) {
2291                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2292                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2293                                                     NETCONFA_FORWARDING,
2294                                                     dev->ifindex, &in_dev->cnf);
2295                 }
2296         }
2297 }
2298
2299 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2300 {
2301         if (cnf == net->ipv4.devconf_dflt)
2302                 return NETCONFA_IFINDEX_DEFAULT;
2303         else if (cnf == net->ipv4.devconf_all)
2304                 return NETCONFA_IFINDEX_ALL;
2305         else {
2306                 struct in_device *idev
2307                         = container_of(cnf, struct in_device, cnf);
2308                 return idev->dev->ifindex;
2309         }
2310 }
2311
2312 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2313                              void __user *buffer,
2314                              size_t *lenp, loff_t *ppos)
2315 {
2316         int old_value = *(int *)ctl->data;
2317         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2318         int new_value = *(int *)ctl->data;
2319
2320         if (write) {
2321                 struct ipv4_devconf *cnf = ctl->extra1;
2322                 struct net *net = ctl->extra2;
2323                 int i = (int *)ctl->data - cnf->data;
2324                 int ifindex;
2325
2326                 set_bit(i, cnf->state);
2327
2328                 if (cnf == net->ipv4.devconf_dflt)
2329                         devinet_copy_dflt_conf(net, i);
2330                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2331                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2332                         if ((new_value == 0) && (old_value != 0))
2333                                 rt_cache_flush(net);
2334
2335                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2336                     new_value != old_value)
2337                         rt_cache_flush(net);
2338
2339                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2340                     new_value != old_value) {
2341                         ifindex = devinet_conf_ifindex(net, cnf);
2342                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2343                                                     NETCONFA_RP_FILTER,
2344                                                     ifindex, cnf);
2345                 }
2346                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2347                     new_value != old_value) {
2348                         ifindex = devinet_conf_ifindex(net, cnf);
2349                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2350                                                     NETCONFA_PROXY_NEIGH,
2351                                                     ifindex, cnf);
2352                 }
2353                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2354                     new_value != old_value) {
2355                         ifindex = devinet_conf_ifindex(net, cnf);
2356                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2357                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2358                                                     ifindex, cnf);
2359                 }
2360         }
2361
2362         return ret;
2363 }
2364
2365 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2366                                   void __user *buffer,
2367                                   size_t *lenp, loff_t *ppos)
2368 {
2369         int *valp = ctl->data;
2370         int val = *valp;
2371         loff_t pos = *ppos;
2372         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2373
2374         if (write && *valp != val) {
2375                 struct net *net = ctl->extra2;
2376
2377                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2378                         if (!rtnl_trylock()) {
2379                                 /* Restore the original values before restarting */
2380                                 *valp = val;
2381                                 *ppos = pos;
2382                                 return restart_syscall();
2383                         }
2384                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2385                                 inet_forward_change(net);
2386                         } else {
2387                                 struct ipv4_devconf *cnf = ctl->extra1;
2388                                 struct in_device *idev =
2389                                         container_of(cnf, struct in_device, cnf);
2390                                 if (*valp)
2391                                         dev_disable_lro(idev->dev);
2392                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2393                                                             NETCONFA_FORWARDING,
2394                                                             idev->dev->ifindex,
2395                                                             cnf);
2396                         }
2397                         rtnl_unlock();
2398                         rt_cache_flush(net);
2399                 } else
2400                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2401                                                     NETCONFA_FORWARDING,
2402                                                     NETCONFA_IFINDEX_DEFAULT,
2403                                                     net->ipv4.devconf_dflt);
2404         }
2405
2406         return ret;
2407 }
2408
2409 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2410                                 void __user *buffer,
2411                                 size_t *lenp, loff_t *ppos)
2412 {
2413         int *valp = ctl->data;
2414         int val = *valp;
2415         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2416         struct net *net = ctl->extra2;
2417
2418         if (write && *valp != val)
2419                 rt_cache_flush(net);
2420
2421         return ret;
2422 }
2423
2424 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2425         { \
2426                 .procname       = name, \
2427                 .data           = ipv4_devconf.data + \
2428                                   IPV4_DEVCONF_ ## attr - 1, \
2429                 .maxlen         = sizeof(int), \
2430                 .mode           = mval, \
2431                 .proc_handler   = proc, \
2432                 .extra1         = &ipv4_devconf, \
2433         }
2434
2435 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2436         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2437
2438 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2439         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2440
2441 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2442         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2443
2444 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2445         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2446
2447 static struct devinet_sysctl_table {
2448         struct ctl_table_header *sysctl_header;
2449         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2450 } devinet_sysctl = {
2451         .devinet_vars = {
2452                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2453                                              devinet_sysctl_forward),
2454                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2455                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2456
2457                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2458                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2459                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2460                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2461                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2462                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2463                                         "accept_source_route"),
2464                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2465                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2466                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2467                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2468                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2469                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2470                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2471                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2472                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2473                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2474                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2475                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2476                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2477                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2478                                         "force_igmp_version"),
2479                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2480                                         "igmpv2_unsolicited_report_interval"),
2481                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2482                                         "igmpv3_unsolicited_report_interval"),
2483                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2484                                         "ignore_routes_with_linkdown"),
2485                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2486                                         "drop_gratuitous_arp"),
2487
2488                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2489                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2490                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2491                                               "promote_secondaries"),
2492                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2493                                               "route_localnet"),
2494                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2495                                               "drop_unicast_in_l2_multicast"),
2496         },
2497 };
2498
2499 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2500                                      int ifindex, struct ipv4_devconf *p)
2501 {
2502         int i;
2503         struct devinet_sysctl_table *t;
2504         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2505
2506         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2507         if (!t)
2508                 goto out;
2509
2510         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2511                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2512                 t->devinet_vars[i].extra1 = p;
2513                 t->devinet_vars[i].extra2 = net;
2514         }
2515
2516         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2517
2518         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2519         if (!t->sysctl_header)
2520                 goto free;
2521
2522         p->sysctl = t;
2523
2524         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2525                                     ifindex, p);
2526         return 0;
2527
2528 free:
2529         kfree(t);
2530 out:
2531         return -ENOBUFS;
2532 }
2533
2534 static void __devinet_sysctl_unregister(struct net *net,
2535                                         struct ipv4_devconf *cnf, int ifindex)
2536 {
2537         struct devinet_sysctl_table *t = cnf->sysctl;
2538
2539         if (t) {
2540                 cnf->sysctl = NULL;
2541                 unregister_net_sysctl_table(t->sysctl_header);
2542                 kfree(t);
2543         }
2544
2545         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2546 }
2547
2548 static int devinet_sysctl_register(struct in_device *idev)
2549 {
2550         int err;
2551
2552         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2553                 return -EINVAL;
2554
2555         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2556         if (err)
2557                 return err;
2558         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2559                                         idev->dev->ifindex, &idev->cnf);
2560         if (err)
2561                 neigh_sysctl_unregister(idev->arp_parms);
2562         return err;
2563 }
2564
2565 static void devinet_sysctl_unregister(struct in_device *idev)
2566 {
2567         struct net *net = dev_net(idev->dev);
2568
2569         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2570         neigh_sysctl_unregister(idev->arp_parms);
2571 }
2572
2573 static struct ctl_table ctl_forward_entry[] = {
2574         {
2575                 .procname       = "ip_forward",
2576                 .data           = &ipv4_devconf.data[
2577                                         IPV4_DEVCONF_FORWARDING - 1],
2578                 .maxlen         = sizeof(int),
2579                 .mode           = 0644,
2580                 .proc_handler   = devinet_sysctl_forward,
2581                 .extra1         = &ipv4_devconf,
2582                 .extra2         = &init_net,
2583         },
2584         { },
2585 };
2586 #endif
2587
2588 static __net_init int devinet_init_net(struct net *net)
2589 {
2590         int err;
2591         struct ipv4_devconf *all, *dflt;
2592 #ifdef CONFIG_SYSCTL
2593         struct ctl_table *tbl;
2594         struct ctl_table_header *forw_hdr;
2595 #endif
2596
2597         err = -ENOMEM;
2598         all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2599         if (!all)
2600                 goto err_alloc_all;
2601
2602         dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2603         if (!dflt)
2604                 goto err_alloc_dflt;
2605
2606 #ifdef CONFIG_SYSCTL
2607         tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2608         if (!tbl)
2609                 goto err_alloc_ctl;
2610
2611         tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2612         tbl[0].extra1 = all;
2613         tbl[0].extra2 = net;
2614 #endif
2615
2616         if ((!IS_ENABLED(CONFIG_SYSCTL) ||
2617              sysctl_devconf_inherit_init_net != 2) &&
2618             !net_eq(net, &init_net)) {
2619                 memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf));
2620                 memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt));
2621         }
2622
2623 #ifdef CONFIG_SYSCTL
2624         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2625         if (err < 0)
2626                 goto err_reg_all;
2627
2628         err = __devinet_sysctl_register(net, "default",
2629                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2630         if (err < 0)
2631                 goto err_reg_dflt;
2632
2633         err = -ENOMEM;
2634         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2635         if (!forw_hdr)
2636                 goto err_reg_ctl;
2637         net->ipv4.forw_hdr = forw_hdr;
2638 #endif
2639
2640         net->ipv4.devconf_all = all;
2641         net->ipv4.devconf_dflt = dflt;
2642         return 0;
2643
2644 #ifdef CONFIG_SYSCTL
2645 err_reg_ctl:
2646         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2647 err_reg_dflt:
2648         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2649 err_reg_all:
2650         kfree(tbl);
2651 err_alloc_ctl:
2652 #endif
2653         kfree(dflt);
2654 err_alloc_dflt:
2655         kfree(all);
2656 err_alloc_all:
2657         return err;
2658 }
2659
2660 static __net_exit void devinet_exit_net(struct net *net)
2661 {
2662 #ifdef CONFIG_SYSCTL
2663         struct ctl_table *tbl;
2664
2665         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2666         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2667         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2668                                     NETCONFA_IFINDEX_DEFAULT);
2669         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2670                                     NETCONFA_IFINDEX_ALL);
2671         kfree(tbl);
2672 #endif
2673         kfree(net->ipv4.devconf_dflt);
2674         kfree(net->ipv4.devconf_all);
2675 }
2676
2677 static __net_initdata struct pernet_operations devinet_ops = {
2678         .init = devinet_init_net,
2679         .exit = devinet_exit_net,
2680 };
2681
2682 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2683         .family           = AF_INET,
2684         .fill_link_af     = inet_fill_link_af,
2685         .get_link_af_size = inet_get_link_af_size,
2686         .validate_link_af = inet_validate_link_af,
2687         .set_link_af      = inet_set_link_af,
2688 };
2689
2690 void __init devinet_init(void)
2691 {
2692         int i;
2693
2694         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2695                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2696
2697         register_pernet_subsys(&devinet_ops);
2698
2699         register_gifconf(PF_INET, inet_gifconf);
2700         register_netdevice_notifier(&ip_netdev_notifier);
2701
2702         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2703
2704         rtnl_af_register(&inet_af_ops);
2705
2706         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2707         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2708         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2709         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2710                       inet_netconf_dump_devconf, 0);
2711 }