Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[linux-2.6-block.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 static struct ipv4_devconf ipv4_devconf = {
70         .data = {
71                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77         },
78 };
79
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81         .data = {
82                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89         },
90 };
91
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96         [IFA_LOCAL]             = { .type = NLA_U32 },
97         [IFA_ADDRESS]           = { .type = NLA_U32 },
98         [IFA_BROADCAST]         = { .type = NLA_U32 },
99         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
101         [IFA_FLAGS]             = { .type = NLA_U32 },
102         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
103         [IFA_TARGET_NETNSID]    = { .type = NLA_S32 },
104 };
105
106 struct inet_fill_args {
107         u32 portid;
108         u32 seq;
109         int event;
110         unsigned int flags;
111         int netnsid;
112         int ifindex;
113 };
114
115 #define IN4_ADDR_HSIZE_SHIFT    8
116 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
117
118 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
119
120 static u32 inet_addr_hash(const struct net *net, __be32 addr)
121 {
122         u32 val = (__force u32) addr ^ net_hash_mix(net);
123
124         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
125 }
126
127 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
128 {
129         u32 hash = inet_addr_hash(net, ifa->ifa_local);
130
131         ASSERT_RTNL();
132         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
133 }
134
135 static void inet_hash_remove(struct in_ifaddr *ifa)
136 {
137         ASSERT_RTNL();
138         hlist_del_init_rcu(&ifa->hash);
139 }
140
141 /**
142  * __ip_dev_find - find the first device with a given source address.
143  * @net: the net namespace
144  * @addr: the source address
145  * @devref: if true, take a reference on the found device
146  *
147  * If a caller uses devref=false, it should be protected by RCU, or RTNL
148  */
149 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
150 {
151         struct net_device *result = NULL;
152         struct in_ifaddr *ifa;
153
154         rcu_read_lock();
155         ifa = inet_lookup_ifaddr_rcu(net, addr);
156         if (!ifa) {
157                 struct flowi4 fl4 = { .daddr = addr };
158                 struct fib_result res = { 0 };
159                 struct fib_table *local;
160
161                 /* Fallback to FIB local table so that communication
162                  * over loopback subnets work.
163                  */
164                 local = fib_get_table(net, RT_TABLE_LOCAL);
165                 if (local &&
166                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167                     res.type == RTN_LOCAL)
168                         result = FIB_RES_DEV(res);
169         } else {
170                 result = ifa->ifa_dev->dev;
171         }
172         if (result && devref)
173                 dev_hold(result);
174         rcu_read_unlock();
175         return result;
176 }
177 EXPORT_SYMBOL(__ip_dev_find);
178
179 /* called under RCU lock */
180 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
181 {
182         u32 hash = inet_addr_hash(net, addr);
183         struct in_ifaddr *ifa;
184
185         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
186                 if (ifa->ifa_local == addr &&
187                     net_eq(dev_net(ifa->ifa_dev->dev), net))
188                         return ifa;
189
190         return NULL;
191 }
192
193 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
194
195 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
196 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
197 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
198                          int destroy);
199 #ifdef CONFIG_SYSCTL
200 static int devinet_sysctl_register(struct in_device *idev);
201 static void devinet_sysctl_unregister(struct in_device *idev);
202 #else
203 static int devinet_sysctl_register(struct in_device *idev)
204 {
205         return 0;
206 }
207 static void devinet_sysctl_unregister(struct in_device *idev)
208 {
209 }
210 #endif
211
212 /* Locks all the inet devices. */
213
214 static struct in_ifaddr *inet_alloc_ifa(void)
215 {
216         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
217 }
218
219 static void inet_rcu_free_ifa(struct rcu_head *head)
220 {
221         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
222         if (ifa->ifa_dev)
223                 in_dev_put(ifa->ifa_dev);
224         kfree(ifa);
225 }
226
227 static void inet_free_ifa(struct in_ifaddr *ifa)
228 {
229         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
230 }
231
232 void in_dev_finish_destroy(struct in_device *idev)
233 {
234         struct net_device *dev = idev->dev;
235
236         WARN_ON(idev->ifa_list);
237         WARN_ON(idev->mc_list);
238         kfree(rcu_dereference_protected(idev->mc_hash, 1));
239 #ifdef NET_REFCNT_DEBUG
240         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
241 #endif
242         dev_put(dev);
243         if (!idev->dead)
244                 pr_err("Freeing alive in_device %p\n", idev);
245         else
246                 kfree(idev);
247 }
248 EXPORT_SYMBOL(in_dev_finish_destroy);
249
250 static struct in_device *inetdev_init(struct net_device *dev)
251 {
252         struct in_device *in_dev;
253         int err = -ENOMEM;
254
255         ASSERT_RTNL();
256
257         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
258         if (!in_dev)
259                 goto out;
260         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
261                         sizeof(in_dev->cnf));
262         in_dev->cnf.sysctl = NULL;
263         in_dev->dev = dev;
264         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
265         if (!in_dev->arp_parms)
266                 goto out_kfree;
267         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
268                 dev_disable_lro(dev);
269         /* Reference in_dev->dev */
270         dev_hold(dev);
271         /* Account for reference dev->ip_ptr (below) */
272         refcount_set(&in_dev->refcnt, 1);
273
274         err = devinet_sysctl_register(in_dev);
275         if (err) {
276                 in_dev->dead = 1;
277                 in_dev_put(in_dev);
278                 in_dev = NULL;
279                 goto out;
280         }
281         ip_mc_init_dev(in_dev);
282         if (dev->flags & IFF_UP)
283                 ip_mc_up(in_dev);
284
285         /* we can receive as soon as ip_ptr is set -- do this last */
286         rcu_assign_pointer(dev->ip_ptr, in_dev);
287 out:
288         return in_dev ?: ERR_PTR(err);
289 out_kfree:
290         kfree(in_dev);
291         in_dev = NULL;
292         goto out;
293 }
294
295 static void in_dev_rcu_put(struct rcu_head *head)
296 {
297         struct in_device *idev = container_of(head, struct in_device, rcu_head);
298         in_dev_put(idev);
299 }
300
301 static void inetdev_destroy(struct in_device *in_dev)
302 {
303         struct in_ifaddr *ifa;
304         struct net_device *dev;
305
306         ASSERT_RTNL();
307
308         dev = in_dev->dev;
309
310         in_dev->dead = 1;
311
312         ip_mc_destroy_dev(in_dev);
313
314         while ((ifa = in_dev->ifa_list) != NULL) {
315                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
316                 inet_free_ifa(ifa);
317         }
318
319         RCU_INIT_POINTER(dev->ip_ptr, NULL);
320
321         devinet_sysctl_unregister(in_dev);
322         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
323         arp_ifdown(dev);
324
325         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
326 }
327
328 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
329 {
330         rcu_read_lock();
331         for_primary_ifa(in_dev) {
332                 if (inet_ifa_match(a, ifa)) {
333                         if (!b || inet_ifa_match(b, ifa)) {
334                                 rcu_read_unlock();
335                                 return 1;
336                         }
337                 }
338         } endfor_ifa(in_dev);
339         rcu_read_unlock();
340         return 0;
341 }
342
343 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
344                          int destroy, struct nlmsghdr *nlh, u32 portid)
345 {
346         struct in_ifaddr *promote = NULL;
347         struct in_ifaddr *ifa, *ifa1 = *ifap;
348         struct in_ifaddr *last_prim = in_dev->ifa_list;
349         struct in_ifaddr *prev_prom = NULL;
350         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
351
352         ASSERT_RTNL();
353
354         if (in_dev->dead)
355                 goto no_promotions;
356
357         /* 1. Deleting primary ifaddr forces deletion all secondaries
358          * unless alias promotion is set
359          **/
360
361         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
362                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
363
364                 while ((ifa = *ifap1) != NULL) {
365                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
366                             ifa1->ifa_scope <= ifa->ifa_scope)
367                                 last_prim = ifa;
368
369                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
370                             ifa1->ifa_mask != ifa->ifa_mask ||
371                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
372                                 ifap1 = &ifa->ifa_next;
373                                 prev_prom = ifa;
374                                 continue;
375                         }
376
377                         if (!do_promote) {
378                                 inet_hash_remove(ifa);
379                                 *ifap1 = ifa->ifa_next;
380
381                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
382                                 blocking_notifier_call_chain(&inetaddr_chain,
383                                                 NETDEV_DOWN, ifa);
384                                 inet_free_ifa(ifa);
385                         } else {
386                                 promote = ifa;
387                                 break;
388                         }
389                 }
390         }
391
392         /* On promotion all secondaries from subnet are changing
393          * the primary IP, we must remove all their routes silently
394          * and later to add them back with new prefsrc. Do this
395          * while all addresses are on the device list.
396          */
397         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
398                 if (ifa1->ifa_mask == ifa->ifa_mask &&
399                     inet_ifa_match(ifa1->ifa_address, ifa))
400                         fib_del_ifaddr(ifa, ifa1);
401         }
402
403 no_promotions:
404         /* 2. Unlink it */
405
406         *ifap = ifa1->ifa_next;
407         inet_hash_remove(ifa1);
408
409         /* 3. Announce address deletion */
410
411         /* Send message first, then call notifier.
412            At first sight, FIB update triggered by notifier
413            will refer to already deleted ifaddr, that could confuse
414            netlink listeners. It is not true: look, gated sees
415            that route deleted and if it still thinks that ifaddr
416            is valid, it will try to restore deleted routes... Grr.
417            So that, this order is correct.
418          */
419         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
420         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
421
422         if (promote) {
423                 struct in_ifaddr *next_sec = promote->ifa_next;
424
425                 if (prev_prom) {
426                         prev_prom->ifa_next = promote->ifa_next;
427                         promote->ifa_next = last_prim->ifa_next;
428                         last_prim->ifa_next = promote;
429                 }
430
431                 promote->ifa_flags &= ~IFA_F_SECONDARY;
432                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
433                 blocking_notifier_call_chain(&inetaddr_chain,
434                                 NETDEV_UP, promote);
435                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
436                         if (ifa1->ifa_mask != ifa->ifa_mask ||
437                             !inet_ifa_match(ifa1->ifa_address, ifa))
438                                         continue;
439                         fib_add_ifaddr(ifa);
440                 }
441
442         }
443         if (destroy)
444                 inet_free_ifa(ifa1);
445 }
446
447 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
448                          int destroy)
449 {
450         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
451 }
452
453 static void check_lifetime(struct work_struct *work);
454
455 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
456
457 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
458                              u32 portid, struct netlink_ext_ack *extack)
459 {
460         struct in_device *in_dev = ifa->ifa_dev;
461         struct in_ifaddr *ifa1, **ifap, **last_primary;
462         struct in_validator_info ivi;
463         int ret;
464
465         ASSERT_RTNL();
466
467         if (!ifa->ifa_local) {
468                 inet_free_ifa(ifa);
469                 return 0;
470         }
471
472         ifa->ifa_flags &= ~IFA_F_SECONDARY;
473         last_primary = &in_dev->ifa_list;
474
475         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
476              ifap = &ifa1->ifa_next) {
477                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
478                     ifa->ifa_scope <= ifa1->ifa_scope)
479                         last_primary = &ifa1->ifa_next;
480                 if (ifa1->ifa_mask == ifa->ifa_mask &&
481                     inet_ifa_match(ifa1->ifa_address, ifa)) {
482                         if (ifa1->ifa_local == ifa->ifa_local) {
483                                 inet_free_ifa(ifa);
484                                 return -EEXIST;
485                         }
486                         if (ifa1->ifa_scope != ifa->ifa_scope) {
487                                 inet_free_ifa(ifa);
488                                 return -EINVAL;
489                         }
490                         ifa->ifa_flags |= IFA_F_SECONDARY;
491                 }
492         }
493
494         /* Allow any devices that wish to register ifaddr validtors to weigh
495          * in now, before changes are committed.  The rntl lock is serializing
496          * access here, so the state should not change between a validator call
497          * and a final notify on commit.  This isn't invoked on promotion under
498          * the assumption that validators are checking the address itself, and
499          * not the flags.
500          */
501         ivi.ivi_addr = ifa->ifa_address;
502         ivi.ivi_dev = ifa->ifa_dev;
503         ivi.extack = extack;
504         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
505                                            NETDEV_UP, &ivi);
506         ret = notifier_to_errno(ret);
507         if (ret) {
508                 inet_free_ifa(ifa);
509                 return ret;
510         }
511
512         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
513                 prandom_seed((__force u32) ifa->ifa_local);
514                 ifap = last_primary;
515         }
516
517         ifa->ifa_next = *ifap;
518         *ifap = ifa;
519
520         inet_hash_insert(dev_net(in_dev->dev), ifa);
521
522         cancel_delayed_work(&check_lifetime_work);
523         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
524
525         /* Send message first, then call notifier.
526            Notifier will trigger FIB update, so that
527            listeners of netlink will know about new ifaddr */
528         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
529         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
530
531         return 0;
532 }
533
534 static int inet_insert_ifa(struct in_ifaddr *ifa)
535 {
536         return __inet_insert_ifa(ifa, NULL, 0, NULL);
537 }
538
539 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
540 {
541         struct in_device *in_dev = __in_dev_get_rtnl(dev);
542
543         ASSERT_RTNL();
544
545         if (!in_dev) {
546                 inet_free_ifa(ifa);
547                 return -ENOBUFS;
548         }
549         ipv4_devconf_setall(in_dev);
550         neigh_parms_data_state_setall(in_dev->arp_parms);
551         if (ifa->ifa_dev != in_dev) {
552                 WARN_ON(ifa->ifa_dev);
553                 in_dev_hold(in_dev);
554                 ifa->ifa_dev = in_dev;
555         }
556         if (ipv4_is_loopback(ifa->ifa_local))
557                 ifa->ifa_scope = RT_SCOPE_HOST;
558         return inet_insert_ifa(ifa);
559 }
560
561 /* Caller must hold RCU or RTNL :
562  * We dont take a reference on found in_device
563  */
564 struct in_device *inetdev_by_index(struct net *net, int ifindex)
565 {
566         struct net_device *dev;
567         struct in_device *in_dev = NULL;
568
569         rcu_read_lock();
570         dev = dev_get_by_index_rcu(net, ifindex);
571         if (dev)
572                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
573         rcu_read_unlock();
574         return in_dev;
575 }
576 EXPORT_SYMBOL(inetdev_by_index);
577
578 /* Called only from RTNL semaphored context. No locks. */
579
580 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
581                                     __be32 mask)
582 {
583         ASSERT_RTNL();
584
585         for_primary_ifa(in_dev) {
586                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
587                         return ifa;
588         } endfor_ifa(in_dev);
589         return NULL;
590 }
591
592 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
593 {
594         struct ip_mreqn mreq = {
595                 .imr_multiaddr.s_addr = ifa->ifa_address,
596                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
597         };
598         int ret;
599
600         ASSERT_RTNL();
601
602         lock_sock(sk);
603         if (join)
604                 ret = ip_mc_join_group(sk, &mreq);
605         else
606                 ret = ip_mc_leave_group(sk, &mreq);
607         release_sock(sk);
608
609         return ret;
610 }
611
612 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
613                             struct netlink_ext_ack *extack)
614 {
615         struct net *net = sock_net(skb->sk);
616         struct nlattr *tb[IFA_MAX+1];
617         struct in_device *in_dev;
618         struct ifaddrmsg *ifm;
619         struct in_ifaddr *ifa, **ifap;
620         int err = -EINVAL;
621
622         ASSERT_RTNL();
623
624         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
625                           extack);
626         if (err < 0)
627                 goto errout;
628
629         ifm = nlmsg_data(nlh);
630         in_dev = inetdev_by_index(net, ifm->ifa_index);
631         if (!in_dev) {
632                 err = -ENODEV;
633                 goto errout;
634         }
635
636         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
637              ifap = &ifa->ifa_next) {
638                 if (tb[IFA_LOCAL] &&
639                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
640                         continue;
641
642                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
643                         continue;
644
645                 if (tb[IFA_ADDRESS] &&
646                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
647                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
648                         continue;
649
650                 if (ipv4_is_multicast(ifa->ifa_address))
651                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
652                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
653                 return 0;
654         }
655
656         err = -EADDRNOTAVAIL;
657 errout:
658         return err;
659 }
660
661 #define INFINITY_LIFE_TIME      0xFFFFFFFF
662
663 static void check_lifetime(struct work_struct *work)
664 {
665         unsigned long now, next, next_sec, next_sched;
666         struct in_ifaddr *ifa;
667         struct hlist_node *n;
668         int i;
669
670         now = jiffies;
671         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
672
673         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
674                 bool change_needed = false;
675
676                 rcu_read_lock();
677                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
678                         unsigned long age;
679
680                         if (ifa->ifa_flags & IFA_F_PERMANENT)
681                                 continue;
682
683                         /* We try to batch several events at once. */
684                         age = (now - ifa->ifa_tstamp +
685                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
686
687                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
688                             age >= ifa->ifa_valid_lft) {
689                                 change_needed = true;
690                         } else if (ifa->ifa_preferred_lft ==
691                                    INFINITY_LIFE_TIME) {
692                                 continue;
693                         } else if (age >= ifa->ifa_preferred_lft) {
694                                 if (time_before(ifa->ifa_tstamp +
695                                                 ifa->ifa_valid_lft * HZ, next))
696                                         next = ifa->ifa_tstamp +
697                                                ifa->ifa_valid_lft * HZ;
698
699                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
700                                         change_needed = true;
701                         } else if (time_before(ifa->ifa_tstamp +
702                                                ifa->ifa_preferred_lft * HZ,
703                                                next)) {
704                                 next = ifa->ifa_tstamp +
705                                        ifa->ifa_preferred_lft * HZ;
706                         }
707                 }
708                 rcu_read_unlock();
709                 if (!change_needed)
710                         continue;
711                 rtnl_lock();
712                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
713                         unsigned long age;
714
715                         if (ifa->ifa_flags & IFA_F_PERMANENT)
716                                 continue;
717
718                         /* We try to batch several events at once. */
719                         age = (now - ifa->ifa_tstamp +
720                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
721
722                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
723                             age >= ifa->ifa_valid_lft) {
724                                 struct in_ifaddr **ifap;
725
726                                 for (ifap = &ifa->ifa_dev->ifa_list;
727                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
728                                         if (*ifap == ifa) {
729                                                 inet_del_ifa(ifa->ifa_dev,
730                                                              ifap, 1);
731                                                 break;
732                                         }
733                                 }
734                         } else if (ifa->ifa_preferred_lft !=
735                                    INFINITY_LIFE_TIME &&
736                                    age >= ifa->ifa_preferred_lft &&
737                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
738                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
739                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
740                         }
741                 }
742                 rtnl_unlock();
743         }
744
745         next_sec = round_jiffies_up(next);
746         next_sched = next;
747
748         /* If rounded timeout is accurate enough, accept it. */
749         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
750                 next_sched = next_sec;
751
752         now = jiffies;
753         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
754         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
755                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
756
757         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
758                         next_sched - now);
759 }
760
761 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
762                              __u32 prefered_lft)
763 {
764         unsigned long timeout;
765
766         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
767
768         timeout = addrconf_timeout_fixup(valid_lft, HZ);
769         if (addrconf_finite_timeout(timeout))
770                 ifa->ifa_valid_lft = timeout;
771         else
772                 ifa->ifa_flags |= IFA_F_PERMANENT;
773
774         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
775         if (addrconf_finite_timeout(timeout)) {
776                 if (timeout == 0)
777                         ifa->ifa_flags |= IFA_F_DEPRECATED;
778                 ifa->ifa_preferred_lft = timeout;
779         }
780         ifa->ifa_tstamp = jiffies;
781         if (!ifa->ifa_cstamp)
782                 ifa->ifa_cstamp = ifa->ifa_tstamp;
783 }
784
785 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
786                                        __u32 *pvalid_lft, __u32 *pprefered_lft,
787                                        struct netlink_ext_ack *extack)
788 {
789         struct nlattr *tb[IFA_MAX+1];
790         struct in_ifaddr *ifa;
791         struct ifaddrmsg *ifm;
792         struct net_device *dev;
793         struct in_device *in_dev;
794         int err;
795
796         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
797                           extack);
798         if (err < 0)
799                 goto errout;
800
801         ifm = nlmsg_data(nlh);
802         err = -EINVAL;
803         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
804                 goto errout;
805
806         dev = __dev_get_by_index(net, ifm->ifa_index);
807         err = -ENODEV;
808         if (!dev)
809                 goto errout;
810
811         in_dev = __in_dev_get_rtnl(dev);
812         err = -ENOBUFS;
813         if (!in_dev)
814                 goto errout;
815
816         ifa = inet_alloc_ifa();
817         if (!ifa)
818                 /*
819                  * A potential indev allocation can be left alive, it stays
820                  * assigned to its device and is destroy with it.
821                  */
822                 goto errout;
823
824         ipv4_devconf_setall(in_dev);
825         neigh_parms_data_state_setall(in_dev->arp_parms);
826         in_dev_hold(in_dev);
827
828         if (!tb[IFA_ADDRESS])
829                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
830
831         INIT_HLIST_NODE(&ifa->hash);
832         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
833         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
834         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
835                                          ifm->ifa_flags;
836         ifa->ifa_scope = ifm->ifa_scope;
837         ifa->ifa_dev = in_dev;
838
839         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
840         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
841
842         if (tb[IFA_BROADCAST])
843                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
844
845         if (tb[IFA_LABEL])
846                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
847         else
848                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
849
850         if (tb[IFA_RT_PRIORITY])
851                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
852
853         if (tb[IFA_CACHEINFO]) {
854                 struct ifa_cacheinfo *ci;
855
856                 ci = nla_data(tb[IFA_CACHEINFO]);
857                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
858                         err = -EINVAL;
859                         goto errout_free;
860                 }
861                 *pvalid_lft = ci->ifa_valid;
862                 *pprefered_lft = ci->ifa_prefered;
863         }
864
865         return ifa;
866
867 errout_free:
868         inet_free_ifa(ifa);
869 errout:
870         return ERR_PTR(err);
871 }
872
873 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
874 {
875         struct in_device *in_dev = ifa->ifa_dev;
876         struct in_ifaddr *ifa1, **ifap;
877
878         if (!ifa->ifa_local)
879                 return NULL;
880
881         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
882              ifap = &ifa1->ifa_next) {
883                 if (ifa1->ifa_mask == ifa->ifa_mask &&
884                     inet_ifa_match(ifa1->ifa_address, ifa) &&
885                     ifa1->ifa_local == ifa->ifa_local)
886                         return ifa1;
887         }
888         return NULL;
889 }
890
891 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
892                             struct netlink_ext_ack *extack)
893 {
894         struct net *net = sock_net(skb->sk);
895         struct in_ifaddr *ifa;
896         struct in_ifaddr *ifa_existing;
897         __u32 valid_lft = INFINITY_LIFE_TIME;
898         __u32 prefered_lft = INFINITY_LIFE_TIME;
899
900         ASSERT_RTNL();
901
902         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
903         if (IS_ERR(ifa))
904                 return PTR_ERR(ifa);
905
906         ifa_existing = find_matching_ifa(ifa);
907         if (!ifa_existing) {
908                 /* It would be best to check for !NLM_F_CREATE here but
909                  * userspace already relies on not having to provide this.
910                  */
911                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
912                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
913                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
914                                                true, ifa);
915
916                         if (ret < 0) {
917                                 inet_free_ifa(ifa);
918                                 return ret;
919                         }
920                 }
921                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
922                                          extack);
923         } else {
924                 u32 new_metric = ifa->ifa_rt_priority;
925
926                 inet_free_ifa(ifa);
927
928                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
929                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
930                         return -EEXIST;
931                 ifa = ifa_existing;
932
933                 if (ifa->ifa_rt_priority != new_metric) {
934                         fib_modify_prefix_metric(ifa, new_metric);
935                         ifa->ifa_rt_priority = new_metric;
936                 }
937
938                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
939                 cancel_delayed_work(&check_lifetime_work);
940                 queue_delayed_work(system_power_efficient_wq,
941                                 &check_lifetime_work, 0);
942                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
943         }
944         return 0;
945 }
946
947 /*
948  *      Determine a default network mask, based on the IP address.
949  */
950
951 static int inet_abc_len(__be32 addr)
952 {
953         int rc = -1;    /* Something else, probably a multicast. */
954
955         if (ipv4_is_zeronet(addr))
956                 rc = 0;
957         else {
958                 __u32 haddr = ntohl(addr);
959
960                 if (IN_CLASSA(haddr))
961                         rc = 8;
962                 else if (IN_CLASSB(haddr))
963                         rc = 16;
964                 else if (IN_CLASSC(haddr))
965                         rc = 24;
966         }
967
968         return rc;
969 }
970
971
972 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
973 {
974         struct sockaddr_in sin_orig;
975         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
976         struct in_device *in_dev;
977         struct in_ifaddr **ifap = NULL;
978         struct in_ifaddr *ifa = NULL;
979         struct net_device *dev;
980         char *colon;
981         int ret = -EFAULT;
982         int tryaddrmatch = 0;
983
984         ifr->ifr_name[IFNAMSIZ - 1] = 0;
985
986         /* save original address for comparison */
987         memcpy(&sin_orig, sin, sizeof(*sin));
988
989         colon = strchr(ifr->ifr_name, ':');
990         if (colon)
991                 *colon = 0;
992
993         dev_load(net, ifr->ifr_name);
994
995         switch (cmd) {
996         case SIOCGIFADDR:       /* Get interface address */
997         case SIOCGIFBRDADDR:    /* Get the broadcast address */
998         case SIOCGIFDSTADDR:    /* Get the destination address */
999         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1000                 /* Note that these ioctls will not sleep,
1001                    so that we do not impose a lock.
1002                    One day we will be forced to put shlock here (I mean SMP)
1003                  */
1004                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1005                 memset(sin, 0, sizeof(*sin));
1006                 sin->sin_family = AF_INET;
1007                 break;
1008
1009         case SIOCSIFFLAGS:
1010                 ret = -EPERM;
1011                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1012                         goto out;
1013                 break;
1014         case SIOCSIFADDR:       /* Set interface address (and family) */
1015         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1016         case SIOCSIFDSTADDR:    /* Set the destination address */
1017         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1018                 ret = -EPERM;
1019                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1020                         goto out;
1021                 ret = -EINVAL;
1022                 if (sin->sin_family != AF_INET)
1023                         goto out;
1024                 break;
1025         default:
1026                 ret = -EINVAL;
1027                 goto out;
1028         }
1029
1030         rtnl_lock();
1031
1032         ret = -ENODEV;
1033         dev = __dev_get_by_name(net, ifr->ifr_name);
1034         if (!dev)
1035                 goto done;
1036
1037         if (colon)
1038                 *colon = ':';
1039
1040         in_dev = __in_dev_get_rtnl(dev);
1041         if (in_dev) {
1042                 if (tryaddrmatch) {
1043                         /* Matthias Andree */
1044                         /* compare label and address (4.4BSD style) */
1045                         /* note: we only do this for a limited set of ioctls
1046                            and only if the original address family was AF_INET.
1047                            This is checked above. */
1048                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1049                              ifap = &ifa->ifa_next) {
1050                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1051                                     sin_orig.sin_addr.s_addr ==
1052                                                         ifa->ifa_local) {
1053                                         break; /* found */
1054                                 }
1055                         }
1056                 }
1057                 /* we didn't get a match, maybe the application is
1058                    4.3BSD-style and passed in junk so we fall back to
1059                    comparing just the label */
1060                 if (!ifa) {
1061                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1062                              ifap = &ifa->ifa_next)
1063                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1064                                         break;
1065                 }
1066         }
1067
1068         ret = -EADDRNOTAVAIL;
1069         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1070                 goto done;
1071
1072         switch (cmd) {
1073         case SIOCGIFADDR:       /* Get interface address */
1074                 ret = 0;
1075                 sin->sin_addr.s_addr = ifa->ifa_local;
1076                 break;
1077
1078         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1079                 ret = 0;
1080                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1081                 break;
1082
1083         case SIOCGIFDSTADDR:    /* Get the destination address */
1084                 ret = 0;
1085                 sin->sin_addr.s_addr = ifa->ifa_address;
1086                 break;
1087
1088         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1089                 ret = 0;
1090                 sin->sin_addr.s_addr = ifa->ifa_mask;
1091                 break;
1092
1093         case SIOCSIFFLAGS:
1094                 if (colon) {
1095                         ret = -EADDRNOTAVAIL;
1096                         if (!ifa)
1097                                 break;
1098                         ret = 0;
1099                         if (!(ifr->ifr_flags & IFF_UP))
1100                                 inet_del_ifa(in_dev, ifap, 1);
1101                         break;
1102                 }
1103                 ret = dev_change_flags(dev, ifr->ifr_flags);
1104                 break;
1105
1106         case SIOCSIFADDR:       /* Set interface address (and family) */
1107                 ret = -EINVAL;
1108                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1109                         break;
1110
1111                 if (!ifa) {
1112                         ret = -ENOBUFS;
1113                         ifa = inet_alloc_ifa();
1114                         if (!ifa)
1115                                 break;
1116                         INIT_HLIST_NODE(&ifa->hash);
1117                         if (colon)
1118                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1119                         else
1120                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1121                 } else {
1122                         ret = 0;
1123                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1124                                 break;
1125                         inet_del_ifa(in_dev, ifap, 0);
1126                         ifa->ifa_broadcast = 0;
1127                         ifa->ifa_scope = 0;
1128                 }
1129
1130                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1131
1132                 if (!(dev->flags & IFF_POINTOPOINT)) {
1133                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1134                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1135                         if ((dev->flags & IFF_BROADCAST) &&
1136                             ifa->ifa_prefixlen < 31)
1137                                 ifa->ifa_broadcast = ifa->ifa_address |
1138                                                      ~ifa->ifa_mask;
1139                 } else {
1140                         ifa->ifa_prefixlen = 32;
1141                         ifa->ifa_mask = inet_make_mask(32);
1142                 }
1143                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1144                 ret = inet_set_ifa(dev, ifa);
1145                 break;
1146
1147         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1148                 ret = 0;
1149                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1150                         inet_del_ifa(in_dev, ifap, 0);
1151                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1152                         inet_insert_ifa(ifa);
1153                 }
1154                 break;
1155
1156         case SIOCSIFDSTADDR:    /* Set the destination address */
1157                 ret = 0;
1158                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1159                         break;
1160                 ret = -EINVAL;
1161                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1162                         break;
1163                 ret = 0;
1164                 inet_del_ifa(in_dev, ifap, 0);
1165                 ifa->ifa_address = sin->sin_addr.s_addr;
1166                 inet_insert_ifa(ifa);
1167                 break;
1168
1169         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1170
1171                 /*
1172                  *      The mask we set must be legal.
1173                  */
1174                 ret = -EINVAL;
1175                 if (bad_mask(sin->sin_addr.s_addr, 0))
1176                         break;
1177                 ret = 0;
1178                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1179                         __be32 old_mask = ifa->ifa_mask;
1180                         inet_del_ifa(in_dev, ifap, 0);
1181                         ifa->ifa_mask = sin->sin_addr.s_addr;
1182                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1183
1184                         /* See if current broadcast address matches
1185                          * with current netmask, then recalculate
1186                          * the broadcast address. Otherwise it's a
1187                          * funny address, so don't touch it since
1188                          * the user seems to know what (s)he's doing...
1189                          */
1190                         if ((dev->flags & IFF_BROADCAST) &&
1191                             (ifa->ifa_prefixlen < 31) &&
1192                             (ifa->ifa_broadcast ==
1193                              (ifa->ifa_local|~old_mask))) {
1194                                 ifa->ifa_broadcast = (ifa->ifa_local |
1195                                                       ~sin->sin_addr.s_addr);
1196                         }
1197                         inet_insert_ifa(ifa);
1198                 }
1199                 break;
1200         }
1201 done:
1202         rtnl_unlock();
1203 out:
1204         return ret;
1205 }
1206
1207 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1208 {
1209         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1210         struct in_ifaddr *ifa;
1211         struct ifreq ifr;
1212         int done = 0;
1213
1214         if (WARN_ON(size > sizeof(struct ifreq)))
1215                 goto out;
1216
1217         if (!in_dev)
1218                 goto out;
1219
1220         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1221                 if (!buf) {
1222                         done += size;
1223                         continue;
1224                 }
1225                 if (len < size)
1226                         break;
1227                 memset(&ifr, 0, sizeof(struct ifreq));
1228                 strcpy(ifr.ifr_name, ifa->ifa_label);
1229
1230                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1231                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1232                                                                 ifa->ifa_local;
1233
1234                 if (copy_to_user(buf + done, &ifr, size)) {
1235                         done = -EFAULT;
1236                         break;
1237                 }
1238                 len  -= size;
1239                 done += size;
1240         }
1241 out:
1242         return done;
1243 }
1244
1245 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1246                                  int scope)
1247 {
1248         for_primary_ifa(in_dev) {
1249                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1250                     ifa->ifa_scope <= scope)
1251                         return ifa->ifa_local;
1252         } endfor_ifa(in_dev);
1253
1254         return 0;
1255 }
1256
1257 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1258 {
1259         __be32 addr = 0;
1260         struct in_device *in_dev;
1261         struct net *net = dev_net(dev);
1262         int master_idx;
1263
1264         rcu_read_lock();
1265         in_dev = __in_dev_get_rcu(dev);
1266         if (!in_dev)
1267                 goto no_in_dev;
1268
1269         for_primary_ifa(in_dev) {
1270                 if (ifa->ifa_scope > scope)
1271                         continue;
1272                 if (!dst || inet_ifa_match(dst, ifa)) {
1273                         addr = ifa->ifa_local;
1274                         break;
1275                 }
1276                 if (!addr)
1277                         addr = ifa->ifa_local;
1278         } endfor_ifa(in_dev);
1279
1280         if (addr)
1281                 goto out_unlock;
1282 no_in_dev:
1283         master_idx = l3mdev_master_ifindex_rcu(dev);
1284
1285         /* For VRFs, the VRF device takes the place of the loopback device,
1286          * with addresses on it being preferred.  Note in such cases the
1287          * loopback device will be among the devices that fail the master_idx
1288          * equality check in the loop below.
1289          */
1290         if (master_idx &&
1291             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1292             (in_dev = __in_dev_get_rcu(dev))) {
1293                 addr = in_dev_select_addr(in_dev, scope);
1294                 if (addr)
1295                         goto out_unlock;
1296         }
1297
1298         /* Not loopback addresses on loopback should be preferred
1299            in this case. It is important that lo is the first interface
1300            in dev_base list.
1301          */
1302         for_each_netdev_rcu(net, dev) {
1303                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1304                         continue;
1305
1306                 in_dev = __in_dev_get_rcu(dev);
1307                 if (!in_dev)
1308                         continue;
1309
1310                 addr = in_dev_select_addr(in_dev, scope);
1311                 if (addr)
1312                         goto out_unlock;
1313         }
1314 out_unlock:
1315         rcu_read_unlock();
1316         return addr;
1317 }
1318 EXPORT_SYMBOL(inet_select_addr);
1319
1320 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1321                               __be32 local, int scope)
1322 {
1323         int same = 0;
1324         __be32 addr = 0;
1325
1326         for_ifa(in_dev) {
1327                 if (!addr &&
1328                     (local == ifa->ifa_local || !local) &&
1329                     ifa->ifa_scope <= scope) {
1330                         addr = ifa->ifa_local;
1331                         if (same)
1332                                 break;
1333                 }
1334                 if (!same) {
1335                         same = (!local || inet_ifa_match(local, ifa)) &&
1336                                 (!dst || inet_ifa_match(dst, ifa));
1337                         if (same && addr) {
1338                                 if (local || !dst)
1339                                         break;
1340                                 /* Is the selected addr into dst subnet? */
1341                                 if (inet_ifa_match(addr, ifa))
1342                                         break;
1343                                 /* No, then can we use new local src? */
1344                                 if (ifa->ifa_scope <= scope) {
1345                                         addr = ifa->ifa_local;
1346                                         break;
1347                                 }
1348                                 /* search for large dst subnet for addr */
1349                                 same = 0;
1350                         }
1351                 }
1352         } endfor_ifa(in_dev);
1353
1354         return same ? addr : 0;
1355 }
1356
1357 /*
1358  * Confirm that local IP address exists using wildcards:
1359  * - net: netns to check, cannot be NULL
1360  * - in_dev: only on this interface, NULL=any interface
1361  * - dst: only in the same subnet as dst, 0=any dst
1362  * - local: address, 0=autoselect the local address
1363  * - scope: maximum allowed scope value for the local address
1364  */
1365 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1366                          __be32 dst, __be32 local, int scope)
1367 {
1368         __be32 addr = 0;
1369         struct net_device *dev;
1370
1371         if (in_dev)
1372                 return confirm_addr_indev(in_dev, dst, local, scope);
1373
1374         rcu_read_lock();
1375         for_each_netdev_rcu(net, dev) {
1376                 in_dev = __in_dev_get_rcu(dev);
1377                 if (in_dev) {
1378                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1379                         if (addr)
1380                                 break;
1381                 }
1382         }
1383         rcu_read_unlock();
1384
1385         return addr;
1386 }
1387 EXPORT_SYMBOL(inet_confirm_addr);
1388
1389 /*
1390  *      Device notifier
1391  */
1392
1393 int register_inetaddr_notifier(struct notifier_block *nb)
1394 {
1395         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1396 }
1397 EXPORT_SYMBOL(register_inetaddr_notifier);
1398
1399 int unregister_inetaddr_notifier(struct notifier_block *nb)
1400 {
1401         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1402 }
1403 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1404
1405 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1406 {
1407         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1408 }
1409 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1410
1411 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1412 {
1413         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1414             nb);
1415 }
1416 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1417
1418 /* Rename ifa_labels for a device name change. Make some effort to preserve
1419  * existing alias numbering and to create unique labels if possible.
1420 */
1421 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1422 {
1423         struct in_ifaddr *ifa;
1424         int named = 0;
1425
1426         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1427                 char old[IFNAMSIZ], *dot;
1428
1429                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1430                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1431                 if (named++ == 0)
1432                         goto skip;
1433                 dot = strchr(old, ':');
1434                 if (!dot) {
1435                         sprintf(old, ":%d", named);
1436                         dot = old;
1437                 }
1438                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1439                         strcat(ifa->ifa_label, dot);
1440                 else
1441                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1442 skip:
1443                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1444         }
1445 }
1446
1447 static bool inetdev_valid_mtu(unsigned int mtu)
1448 {
1449         return mtu >= IPV4_MIN_MTU;
1450 }
1451
1452 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1453                                         struct in_device *in_dev)
1454
1455 {
1456         struct in_ifaddr *ifa;
1457
1458         for (ifa = in_dev->ifa_list; ifa;
1459              ifa = ifa->ifa_next) {
1460                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1461                          ifa->ifa_local, dev,
1462                          ifa->ifa_local, NULL,
1463                          dev->dev_addr, NULL);
1464         }
1465 }
1466
1467 /* Called only under RTNL semaphore */
1468
1469 static int inetdev_event(struct notifier_block *this, unsigned long event,
1470                          void *ptr)
1471 {
1472         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1473         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1474
1475         ASSERT_RTNL();
1476
1477         if (!in_dev) {
1478                 if (event == NETDEV_REGISTER) {
1479                         in_dev = inetdev_init(dev);
1480                         if (IS_ERR(in_dev))
1481                                 return notifier_from_errno(PTR_ERR(in_dev));
1482                         if (dev->flags & IFF_LOOPBACK) {
1483                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1484                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1485                         }
1486                 } else if (event == NETDEV_CHANGEMTU) {
1487                         /* Re-enabling IP */
1488                         if (inetdev_valid_mtu(dev->mtu))
1489                                 in_dev = inetdev_init(dev);
1490                 }
1491                 goto out;
1492         }
1493
1494         switch (event) {
1495         case NETDEV_REGISTER:
1496                 pr_debug("%s: bug\n", __func__);
1497                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1498                 break;
1499         case NETDEV_UP:
1500                 if (!inetdev_valid_mtu(dev->mtu))
1501                         break;
1502                 if (dev->flags & IFF_LOOPBACK) {
1503                         struct in_ifaddr *ifa = inet_alloc_ifa();
1504
1505                         if (ifa) {
1506                                 INIT_HLIST_NODE(&ifa->hash);
1507                                 ifa->ifa_local =
1508                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1509                                 ifa->ifa_prefixlen = 8;
1510                                 ifa->ifa_mask = inet_make_mask(8);
1511                                 in_dev_hold(in_dev);
1512                                 ifa->ifa_dev = in_dev;
1513                                 ifa->ifa_scope = RT_SCOPE_HOST;
1514                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1515                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1516                                                  INFINITY_LIFE_TIME);
1517                                 ipv4_devconf_setall(in_dev);
1518                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1519                                 inet_insert_ifa(ifa);
1520                         }
1521                 }
1522                 ip_mc_up(in_dev);
1523                 /* fall through */
1524         case NETDEV_CHANGEADDR:
1525                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1526                         break;
1527                 /* fall through */
1528         case NETDEV_NOTIFY_PEERS:
1529                 /* Send gratuitous ARP to notify of link change */
1530                 inetdev_send_gratuitous_arp(dev, in_dev);
1531                 break;
1532         case NETDEV_DOWN:
1533                 ip_mc_down(in_dev);
1534                 break;
1535         case NETDEV_PRE_TYPE_CHANGE:
1536                 ip_mc_unmap(in_dev);
1537                 break;
1538         case NETDEV_POST_TYPE_CHANGE:
1539                 ip_mc_remap(in_dev);
1540                 break;
1541         case NETDEV_CHANGEMTU:
1542                 if (inetdev_valid_mtu(dev->mtu))
1543                         break;
1544                 /* disable IP when MTU is not enough */
1545                 /* fall through */
1546         case NETDEV_UNREGISTER:
1547                 inetdev_destroy(in_dev);
1548                 break;
1549         case NETDEV_CHANGENAME:
1550                 /* Do not notify about label change, this event is
1551                  * not interesting to applications using netlink.
1552                  */
1553                 inetdev_changename(dev, in_dev);
1554
1555                 devinet_sysctl_unregister(in_dev);
1556                 devinet_sysctl_register(in_dev);
1557                 break;
1558         }
1559 out:
1560         return NOTIFY_DONE;
1561 }
1562
1563 static struct notifier_block ip_netdev_notifier = {
1564         .notifier_call = inetdev_event,
1565 };
1566
1567 static size_t inet_nlmsg_size(void)
1568 {
1569         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1570                + nla_total_size(4) /* IFA_ADDRESS */
1571                + nla_total_size(4) /* IFA_LOCAL */
1572                + nla_total_size(4) /* IFA_BROADCAST */
1573                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1574                + nla_total_size(4)  /* IFA_FLAGS */
1575                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1576                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1577 }
1578
1579 static inline u32 cstamp_delta(unsigned long cstamp)
1580 {
1581         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1582 }
1583
1584 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1585                          unsigned long tstamp, u32 preferred, u32 valid)
1586 {
1587         struct ifa_cacheinfo ci;
1588
1589         ci.cstamp = cstamp_delta(cstamp);
1590         ci.tstamp = cstamp_delta(tstamp);
1591         ci.ifa_prefered = preferred;
1592         ci.ifa_valid = valid;
1593
1594         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1595 }
1596
1597 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1598                             struct inet_fill_args *args)
1599 {
1600         struct ifaddrmsg *ifm;
1601         struct nlmsghdr  *nlh;
1602         u32 preferred, valid;
1603
1604         nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1605                         args->flags);
1606         if (!nlh)
1607                 return -EMSGSIZE;
1608
1609         ifm = nlmsg_data(nlh);
1610         ifm->ifa_family = AF_INET;
1611         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1612         ifm->ifa_flags = ifa->ifa_flags;
1613         ifm->ifa_scope = ifa->ifa_scope;
1614         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1615
1616         if (args->netnsid >= 0 &&
1617             nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1618                 goto nla_put_failure;
1619
1620         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1621                 preferred = ifa->ifa_preferred_lft;
1622                 valid = ifa->ifa_valid_lft;
1623                 if (preferred != INFINITY_LIFE_TIME) {
1624                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1625
1626                         if (preferred > tval)
1627                                 preferred -= tval;
1628                         else
1629                                 preferred = 0;
1630                         if (valid != INFINITY_LIFE_TIME) {
1631                                 if (valid > tval)
1632                                         valid -= tval;
1633                                 else
1634                                         valid = 0;
1635                         }
1636                 }
1637         } else {
1638                 preferred = INFINITY_LIFE_TIME;
1639                 valid = INFINITY_LIFE_TIME;
1640         }
1641         if ((ifa->ifa_address &&
1642              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1643             (ifa->ifa_local &&
1644              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1645             (ifa->ifa_broadcast &&
1646              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1647             (ifa->ifa_label[0] &&
1648              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1649             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1650             (ifa->ifa_rt_priority &&
1651              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1652             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1653                           preferred, valid))
1654                 goto nla_put_failure;
1655
1656         nlmsg_end(skb, nlh);
1657         return 0;
1658
1659 nla_put_failure:
1660         nlmsg_cancel(skb, nlh);
1661         return -EMSGSIZE;
1662 }
1663
1664 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1665                                       struct inet_fill_args *fillargs,
1666                                       struct net **tgt_net, struct sock *sk,
1667                                       struct netlink_callback *cb)
1668 {
1669         struct netlink_ext_ack *extack = cb->extack;
1670         struct nlattr *tb[IFA_MAX+1];
1671         struct ifaddrmsg *ifm;
1672         int err, i;
1673
1674         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1675                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1676                 return -EINVAL;
1677         }
1678
1679         ifm = nlmsg_data(nlh);
1680         if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1681                 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1682                 return -EINVAL;
1683         }
1684
1685         fillargs->ifindex = ifm->ifa_index;
1686         if (fillargs->ifindex) {
1687                 cb->answer_flags |= NLM_F_DUMP_FILTERED;
1688                 fillargs->flags |= NLM_F_DUMP_FILTERED;
1689         }
1690
1691         err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1692                                  ifa_ipv4_policy, extack);
1693         if (err < 0)
1694                 return err;
1695
1696         for (i = 0; i <= IFA_MAX; ++i) {
1697                 if (!tb[i])
1698                         continue;
1699
1700                 if (i == IFA_TARGET_NETNSID) {
1701                         struct net *net;
1702
1703                         fillargs->netnsid = nla_get_s32(tb[i]);
1704
1705                         net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1706                         if (IS_ERR(net)) {
1707                                 fillargs->netnsid = -1;
1708                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1709                                 return PTR_ERR(net);
1710                         }
1711                         *tgt_net = net;
1712                 } else {
1713                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1714                         return -EINVAL;
1715                 }
1716         }
1717
1718         return 0;
1719 }
1720
1721 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1722                             struct netlink_callback *cb, int s_ip_idx,
1723                             struct inet_fill_args *fillargs)
1724 {
1725         struct in_ifaddr *ifa;
1726         int ip_idx = 0;
1727         int err;
1728
1729         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next, ip_idx++) {
1730                 if (ip_idx < s_ip_idx)
1731                         continue;
1732
1733                 err = inet_fill_ifaddr(skb, ifa, fillargs);
1734                 if (err < 0)
1735                         goto done;
1736
1737                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1738         }
1739         err = 0;
1740
1741 done:
1742         cb->args[2] = ip_idx;
1743
1744         return err;
1745 }
1746
1747 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1748 {
1749         const struct nlmsghdr *nlh = cb->nlh;
1750         struct inet_fill_args fillargs = {
1751                 .portid = NETLINK_CB(cb->skb).portid,
1752                 .seq = nlh->nlmsg_seq,
1753                 .event = RTM_NEWADDR,
1754                 .flags = NLM_F_MULTI,
1755                 .netnsid = -1,
1756         };
1757         struct net *net = sock_net(skb->sk);
1758         struct net *tgt_net = net;
1759         int h, s_h;
1760         int idx, s_idx;
1761         int s_ip_idx;
1762         struct net_device *dev;
1763         struct in_device *in_dev;
1764         struct hlist_head *head;
1765         int err = 0;
1766
1767         s_h = cb->args[0];
1768         s_idx = idx = cb->args[1];
1769         s_ip_idx = cb->args[2];
1770
1771         if (cb->strict_check) {
1772                 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1773                                                  skb->sk, cb);
1774                 if (err < 0)
1775                         goto put_tgt_net;
1776
1777                 err = 0;
1778                 if (fillargs.ifindex) {
1779                         dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1780                         if (!dev) {
1781                                 err = -ENODEV;
1782                                 goto put_tgt_net;
1783                         }
1784
1785                         in_dev = __in_dev_get_rtnl(dev);
1786                         if (in_dev) {
1787                                 err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1788                                                        &fillargs);
1789                         }
1790                         goto put_tgt_net;
1791                 }
1792         }
1793
1794         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1795                 idx = 0;
1796                 head = &tgt_net->dev_index_head[h];
1797                 rcu_read_lock();
1798                 cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1799                           tgt_net->dev_base_seq;
1800                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1801                         if (idx < s_idx)
1802                                 goto cont;
1803                         if (h > s_h || idx > s_idx)
1804                                 s_ip_idx = 0;
1805                         in_dev = __in_dev_get_rcu(dev);
1806                         if (!in_dev)
1807                                 goto cont;
1808
1809                         err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1810                                                &fillargs);
1811                         if (err < 0) {
1812                                 rcu_read_unlock();
1813                                 goto done;
1814                         }
1815 cont:
1816                         idx++;
1817                 }
1818                 rcu_read_unlock();
1819         }
1820
1821 done:
1822         cb->args[0] = h;
1823         cb->args[1] = idx;
1824 put_tgt_net:
1825         if (fillargs.netnsid >= 0)
1826                 put_net(tgt_net);
1827
1828         return err < 0 ? err : skb->len;
1829 }
1830
1831 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1832                       u32 portid)
1833 {
1834         struct inet_fill_args fillargs = {
1835                 .portid = portid,
1836                 .seq = nlh ? nlh->nlmsg_seq : 0,
1837                 .event = event,
1838                 .flags = 0,
1839                 .netnsid = -1,
1840         };
1841         struct sk_buff *skb;
1842         int err = -ENOBUFS;
1843         struct net *net;
1844
1845         net = dev_net(ifa->ifa_dev->dev);
1846         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1847         if (!skb)
1848                 goto errout;
1849
1850         err = inet_fill_ifaddr(skb, ifa, &fillargs);
1851         if (err < 0) {
1852                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1853                 WARN_ON(err == -EMSGSIZE);
1854                 kfree_skb(skb);
1855                 goto errout;
1856         }
1857         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1858         return;
1859 errout:
1860         if (err < 0)
1861                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1862 }
1863
1864 static size_t inet_get_link_af_size(const struct net_device *dev,
1865                                     u32 ext_filter_mask)
1866 {
1867         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1868
1869         if (!in_dev)
1870                 return 0;
1871
1872         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1873 }
1874
1875 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1876                              u32 ext_filter_mask)
1877 {
1878         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1879         struct nlattr *nla;
1880         int i;
1881
1882         if (!in_dev)
1883                 return -ENODATA;
1884
1885         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1886         if (!nla)
1887                 return -EMSGSIZE;
1888
1889         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1890                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1891
1892         return 0;
1893 }
1894
1895 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1896         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1897 };
1898
1899 static int inet_validate_link_af(const struct net_device *dev,
1900                                  const struct nlattr *nla)
1901 {
1902         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1903         int err, rem;
1904
1905         if (dev && !__in_dev_get_rcu(dev))
1906                 return -EAFNOSUPPORT;
1907
1908         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1909         if (err < 0)
1910                 return err;
1911
1912         if (tb[IFLA_INET_CONF]) {
1913                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1914                         int cfgid = nla_type(a);
1915
1916                         if (nla_len(a) < 4)
1917                                 return -EINVAL;
1918
1919                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1920                                 return -EINVAL;
1921                 }
1922         }
1923
1924         return 0;
1925 }
1926
1927 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1928 {
1929         struct in_device *in_dev = __in_dev_get_rcu(dev);
1930         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1931         int rem;
1932
1933         if (!in_dev)
1934                 return -EAFNOSUPPORT;
1935
1936         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1937                 BUG();
1938
1939         if (tb[IFLA_INET_CONF]) {
1940                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1941                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1942         }
1943
1944         return 0;
1945 }
1946
1947 static int inet_netconf_msgsize_devconf(int type)
1948 {
1949         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1950                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1951         bool all = false;
1952
1953         if (type == NETCONFA_ALL)
1954                 all = true;
1955
1956         if (all || type == NETCONFA_FORWARDING)
1957                 size += nla_total_size(4);
1958         if (all || type == NETCONFA_RP_FILTER)
1959                 size += nla_total_size(4);
1960         if (all || type == NETCONFA_MC_FORWARDING)
1961                 size += nla_total_size(4);
1962         if (all || type == NETCONFA_BC_FORWARDING)
1963                 size += nla_total_size(4);
1964         if (all || type == NETCONFA_PROXY_NEIGH)
1965                 size += nla_total_size(4);
1966         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1967                 size += nla_total_size(4);
1968
1969         return size;
1970 }
1971
1972 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1973                                      struct ipv4_devconf *devconf, u32 portid,
1974                                      u32 seq, int event, unsigned int flags,
1975                                      int type)
1976 {
1977         struct nlmsghdr  *nlh;
1978         struct netconfmsg *ncm;
1979         bool all = false;
1980
1981         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1982                         flags);
1983         if (!nlh)
1984                 return -EMSGSIZE;
1985
1986         if (type == NETCONFA_ALL)
1987                 all = true;
1988
1989         ncm = nlmsg_data(nlh);
1990         ncm->ncm_family = AF_INET;
1991
1992         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1993                 goto nla_put_failure;
1994
1995         if (!devconf)
1996                 goto out;
1997
1998         if ((all || type == NETCONFA_FORWARDING) &&
1999             nla_put_s32(skb, NETCONFA_FORWARDING,
2000                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2001                 goto nla_put_failure;
2002         if ((all || type == NETCONFA_RP_FILTER) &&
2003             nla_put_s32(skb, NETCONFA_RP_FILTER,
2004                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2005                 goto nla_put_failure;
2006         if ((all || type == NETCONFA_MC_FORWARDING) &&
2007             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2008                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2009                 goto nla_put_failure;
2010         if ((all || type == NETCONFA_BC_FORWARDING) &&
2011             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2012                         IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2013                 goto nla_put_failure;
2014         if ((all || type == NETCONFA_PROXY_NEIGH) &&
2015             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2016                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2017                 goto nla_put_failure;
2018         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2019             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2020                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2021                 goto nla_put_failure;
2022
2023 out:
2024         nlmsg_end(skb, nlh);
2025         return 0;
2026
2027 nla_put_failure:
2028         nlmsg_cancel(skb, nlh);
2029         return -EMSGSIZE;
2030 }
2031
2032 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2033                                  int ifindex, struct ipv4_devconf *devconf)
2034 {
2035         struct sk_buff *skb;
2036         int err = -ENOBUFS;
2037
2038         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2039         if (!skb)
2040                 goto errout;
2041
2042         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2043                                         event, 0, type);
2044         if (err < 0) {
2045                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2046                 WARN_ON(err == -EMSGSIZE);
2047                 kfree_skb(skb);
2048                 goto errout;
2049         }
2050         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2051         return;
2052 errout:
2053         if (err < 0)
2054                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2055 }
2056
2057 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2058         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
2059         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
2060         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
2061         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
2062         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
2063 };
2064
2065 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2066                                     struct nlmsghdr *nlh,
2067                                     struct netlink_ext_ack *extack)
2068 {
2069         struct net *net = sock_net(in_skb->sk);
2070         struct nlattr *tb[NETCONFA_MAX+1];
2071         struct netconfmsg *ncm;
2072         struct sk_buff *skb;
2073         struct ipv4_devconf *devconf;
2074         struct in_device *in_dev;
2075         struct net_device *dev;
2076         int ifindex;
2077         int err;
2078
2079         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
2080                           devconf_ipv4_policy, extack);
2081         if (err < 0)
2082                 goto errout;
2083
2084         err = -EINVAL;
2085         if (!tb[NETCONFA_IFINDEX])
2086                 goto errout;
2087
2088         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2089         switch (ifindex) {
2090         case NETCONFA_IFINDEX_ALL:
2091                 devconf = net->ipv4.devconf_all;
2092                 break;
2093         case NETCONFA_IFINDEX_DEFAULT:
2094                 devconf = net->ipv4.devconf_dflt;
2095                 break;
2096         default:
2097                 dev = __dev_get_by_index(net, ifindex);
2098                 if (!dev)
2099                         goto errout;
2100                 in_dev = __in_dev_get_rtnl(dev);
2101                 if (!in_dev)
2102                         goto errout;
2103                 devconf = &in_dev->cnf;
2104                 break;
2105         }
2106
2107         err = -ENOBUFS;
2108         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2109         if (!skb)
2110                 goto errout;
2111
2112         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2113                                         NETLINK_CB(in_skb).portid,
2114                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2115                                         NETCONFA_ALL);
2116         if (err < 0) {
2117                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2118                 WARN_ON(err == -EMSGSIZE);
2119                 kfree_skb(skb);
2120                 goto errout;
2121         }
2122         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2123 errout:
2124         return err;
2125 }
2126
2127 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2128                                      struct netlink_callback *cb)
2129 {
2130         const struct nlmsghdr *nlh = cb->nlh;
2131         struct net *net = sock_net(skb->sk);
2132         int h, s_h;
2133         int idx, s_idx;
2134         struct net_device *dev;
2135         struct in_device *in_dev;
2136         struct hlist_head *head;
2137
2138         if (cb->strict_check) {
2139                 struct netlink_ext_ack *extack = cb->extack;
2140                 struct netconfmsg *ncm;
2141
2142                 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2143                         NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2144                         return -EINVAL;
2145                 }
2146
2147                 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2148                         NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2149                         return -EINVAL;
2150                 }
2151         }
2152
2153         s_h = cb->args[0];
2154         s_idx = idx = cb->args[1];
2155
2156         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2157                 idx = 0;
2158                 head = &net->dev_index_head[h];
2159                 rcu_read_lock();
2160                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2161                           net->dev_base_seq;
2162                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2163                         if (idx < s_idx)
2164                                 goto cont;
2165                         in_dev = __in_dev_get_rcu(dev);
2166                         if (!in_dev)
2167                                 goto cont;
2168
2169                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2170                                                       &in_dev->cnf,
2171                                                       NETLINK_CB(cb->skb).portid,
2172                                                       nlh->nlmsg_seq,
2173                                                       RTM_NEWNETCONF,
2174                                                       NLM_F_MULTI,
2175                                                       NETCONFA_ALL) < 0) {
2176                                 rcu_read_unlock();
2177                                 goto done;
2178                         }
2179                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2180 cont:
2181                         idx++;
2182                 }
2183                 rcu_read_unlock();
2184         }
2185         if (h == NETDEV_HASHENTRIES) {
2186                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2187                                               net->ipv4.devconf_all,
2188                                               NETLINK_CB(cb->skb).portid,
2189                                               nlh->nlmsg_seq,
2190                                               RTM_NEWNETCONF, NLM_F_MULTI,
2191                                               NETCONFA_ALL) < 0)
2192                         goto done;
2193                 else
2194                         h++;
2195         }
2196         if (h == NETDEV_HASHENTRIES + 1) {
2197                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2198                                               net->ipv4.devconf_dflt,
2199                                               NETLINK_CB(cb->skb).portid,
2200                                               nlh->nlmsg_seq,
2201                                               RTM_NEWNETCONF, NLM_F_MULTI,
2202                                               NETCONFA_ALL) < 0)
2203                         goto done;
2204                 else
2205                         h++;
2206         }
2207 done:
2208         cb->args[0] = h;
2209         cb->args[1] = idx;
2210
2211         return skb->len;
2212 }
2213
2214 #ifdef CONFIG_SYSCTL
2215
2216 static void devinet_copy_dflt_conf(struct net *net, int i)
2217 {
2218         struct net_device *dev;
2219
2220         rcu_read_lock();
2221         for_each_netdev_rcu(net, dev) {
2222                 struct in_device *in_dev;
2223
2224                 in_dev = __in_dev_get_rcu(dev);
2225                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2226                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2227         }
2228         rcu_read_unlock();
2229 }
2230
2231 /* called with RTNL locked */
2232 static void inet_forward_change(struct net *net)
2233 {
2234         struct net_device *dev;
2235         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2236
2237         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2238         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2239         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2240                                     NETCONFA_FORWARDING,
2241                                     NETCONFA_IFINDEX_ALL,
2242                                     net->ipv4.devconf_all);
2243         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2244                                     NETCONFA_FORWARDING,
2245                                     NETCONFA_IFINDEX_DEFAULT,
2246                                     net->ipv4.devconf_dflt);
2247
2248         for_each_netdev(net, dev) {
2249                 struct in_device *in_dev;
2250
2251                 if (on)
2252                         dev_disable_lro(dev);
2253
2254                 in_dev = __in_dev_get_rtnl(dev);
2255                 if (in_dev) {
2256                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2257                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2258                                                     NETCONFA_FORWARDING,
2259                                                     dev->ifindex, &in_dev->cnf);
2260                 }
2261         }
2262 }
2263
2264 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2265 {
2266         if (cnf == net->ipv4.devconf_dflt)
2267                 return NETCONFA_IFINDEX_DEFAULT;
2268         else if (cnf == net->ipv4.devconf_all)
2269                 return NETCONFA_IFINDEX_ALL;
2270         else {
2271                 struct in_device *idev
2272                         = container_of(cnf, struct in_device, cnf);
2273                 return idev->dev->ifindex;
2274         }
2275 }
2276
2277 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2278                              void __user *buffer,
2279                              size_t *lenp, loff_t *ppos)
2280 {
2281         int old_value = *(int *)ctl->data;
2282         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2283         int new_value = *(int *)ctl->data;
2284
2285         if (write) {
2286                 struct ipv4_devconf *cnf = ctl->extra1;
2287                 struct net *net = ctl->extra2;
2288                 int i = (int *)ctl->data - cnf->data;
2289                 int ifindex;
2290
2291                 set_bit(i, cnf->state);
2292
2293                 if (cnf == net->ipv4.devconf_dflt)
2294                         devinet_copy_dflt_conf(net, i);
2295                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2296                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2297                         if ((new_value == 0) && (old_value != 0))
2298                                 rt_cache_flush(net);
2299
2300                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2301                     new_value != old_value)
2302                         rt_cache_flush(net);
2303
2304                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2305                     new_value != old_value) {
2306                         ifindex = devinet_conf_ifindex(net, cnf);
2307                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2308                                                     NETCONFA_RP_FILTER,
2309                                                     ifindex, cnf);
2310                 }
2311                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2312                     new_value != old_value) {
2313                         ifindex = devinet_conf_ifindex(net, cnf);
2314                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2315                                                     NETCONFA_PROXY_NEIGH,
2316                                                     ifindex, cnf);
2317                 }
2318                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2319                     new_value != old_value) {
2320                         ifindex = devinet_conf_ifindex(net, cnf);
2321                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2322                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2323                                                     ifindex, cnf);
2324                 }
2325         }
2326
2327         return ret;
2328 }
2329
2330 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2331                                   void __user *buffer,
2332                                   size_t *lenp, loff_t *ppos)
2333 {
2334         int *valp = ctl->data;
2335         int val = *valp;
2336         loff_t pos = *ppos;
2337         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2338
2339         if (write && *valp != val) {
2340                 struct net *net = ctl->extra2;
2341
2342                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2343                         if (!rtnl_trylock()) {
2344                                 /* Restore the original values before restarting */
2345                                 *valp = val;
2346                                 *ppos = pos;
2347                                 return restart_syscall();
2348                         }
2349                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2350                                 inet_forward_change(net);
2351                         } else {
2352                                 struct ipv4_devconf *cnf = ctl->extra1;
2353                                 struct in_device *idev =
2354                                         container_of(cnf, struct in_device, cnf);
2355                                 if (*valp)
2356                                         dev_disable_lro(idev->dev);
2357                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2358                                                             NETCONFA_FORWARDING,
2359                                                             idev->dev->ifindex,
2360                                                             cnf);
2361                         }
2362                         rtnl_unlock();
2363                         rt_cache_flush(net);
2364                 } else
2365                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2366                                                     NETCONFA_FORWARDING,
2367                                                     NETCONFA_IFINDEX_DEFAULT,
2368                                                     net->ipv4.devconf_dflt);
2369         }
2370
2371         return ret;
2372 }
2373
2374 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2375                                 void __user *buffer,
2376                                 size_t *lenp, loff_t *ppos)
2377 {
2378         int *valp = ctl->data;
2379         int val = *valp;
2380         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2381         struct net *net = ctl->extra2;
2382
2383         if (write && *valp != val)
2384                 rt_cache_flush(net);
2385
2386         return ret;
2387 }
2388
2389 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2390         { \
2391                 .procname       = name, \
2392                 .data           = ipv4_devconf.data + \
2393                                   IPV4_DEVCONF_ ## attr - 1, \
2394                 .maxlen         = sizeof(int), \
2395                 .mode           = mval, \
2396                 .proc_handler   = proc, \
2397                 .extra1         = &ipv4_devconf, \
2398         }
2399
2400 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2401         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2402
2403 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2404         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2405
2406 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2407         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2408
2409 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2410         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2411
2412 static struct devinet_sysctl_table {
2413         struct ctl_table_header *sysctl_header;
2414         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2415 } devinet_sysctl = {
2416         .devinet_vars = {
2417                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2418                                              devinet_sysctl_forward),
2419                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2420                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2421
2422                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2423                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2424                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2425                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2426                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2427                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2428                                         "accept_source_route"),
2429                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2430                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2431                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2432                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2433                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2434                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2435                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2436                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2437                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2438                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2439                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2440                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2441                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2442                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2443                                         "force_igmp_version"),
2444                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2445                                         "igmpv2_unsolicited_report_interval"),
2446                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2447                                         "igmpv3_unsolicited_report_interval"),
2448                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2449                                         "ignore_routes_with_linkdown"),
2450                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2451                                         "drop_gratuitous_arp"),
2452
2453                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2454                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2455                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2456                                               "promote_secondaries"),
2457                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2458                                               "route_localnet"),
2459                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2460                                               "drop_unicast_in_l2_multicast"),
2461         },
2462 };
2463
2464 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2465                                      int ifindex, struct ipv4_devconf *p)
2466 {
2467         int i;
2468         struct devinet_sysctl_table *t;
2469         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2470
2471         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2472         if (!t)
2473                 goto out;
2474
2475         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2476                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2477                 t->devinet_vars[i].extra1 = p;
2478                 t->devinet_vars[i].extra2 = net;
2479         }
2480
2481         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2482
2483         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2484         if (!t->sysctl_header)
2485                 goto free;
2486
2487         p->sysctl = t;
2488
2489         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2490                                     ifindex, p);
2491         return 0;
2492
2493 free:
2494         kfree(t);
2495 out:
2496         return -ENOBUFS;
2497 }
2498
2499 static void __devinet_sysctl_unregister(struct net *net,
2500                                         struct ipv4_devconf *cnf, int ifindex)
2501 {
2502         struct devinet_sysctl_table *t = cnf->sysctl;
2503
2504         if (t) {
2505                 cnf->sysctl = NULL;
2506                 unregister_net_sysctl_table(t->sysctl_header);
2507                 kfree(t);
2508         }
2509
2510         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2511 }
2512
2513 static int devinet_sysctl_register(struct in_device *idev)
2514 {
2515         int err;
2516
2517         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2518                 return -EINVAL;
2519
2520         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2521         if (err)
2522                 return err;
2523         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2524                                         idev->dev->ifindex, &idev->cnf);
2525         if (err)
2526                 neigh_sysctl_unregister(idev->arp_parms);
2527         return err;
2528 }
2529
2530 static void devinet_sysctl_unregister(struct in_device *idev)
2531 {
2532         struct net *net = dev_net(idev->dev);
2533
2534         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2535         neigh_sysctl_unregister(idev->arp_parms);
2536 }
2537
2538 static struct ctl_table ctl_forward_entry[] = {
2539         {
2540                 .procname       = "ip_forward",
2541                 .data           = &ipv4_devconf.data[
2542                                         IPV4_DEVCONF_FORWARDING - 1],
2543                 .maxlen         = sizeof(int),
2544                 .mode           = 0644,
2545                 .proc_handler   = devinet_sysctl_forward,
2546                 .extra1         = &ipv4_devconf,
2547                 .extra2         = &init_net,
2548         },
2549         { },
2550 };
2551 #endif
2552
2553 static __net_init int devinet_init_net(struct net *net)
2554 {
2555         int err;
2556         struct ipv4_devconf *all, *dflt;
2557 #ifdef CONFIG_SYSCTL
2558         struct ctl_table *tbl = ctl_forward_entry;
2559         struct ctl_table_header *forw_hdr;
2560 #endif
2561
2562         err = -ENOMEM;
2563         all = &ipv4_devconf;
2564         dflt = &ipv4_devconf_dflt;
2565
2566         if (!net_eq(net, &init_net)) {
2567                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2568                 if (!all)
2569                         goto err_alloc_all;
2570
2571                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2572                 if (!dflt)
2573                         goto err_alloc_dflt;
2574
2575 #ifdef CONFIG_SYSCTL
2576                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2577                 if (!tbl)
2578                         goto err_alloc_ctl;
2579
2580                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2581                 tbl[0].extra1 = all;
2582                 tbl[0].extra2 = net;
2583 #endif
2584         }
2585
2586 #ifdef CONFIG_SYSCTL
2587         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2588         if (err < 0)
2589                 goto err_reg_all;
2590
2591         err = __devinet_sysctl_register(net, "default",
2592                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2593         if (err < 0)
2594                 goto err_reg_dflt;
2595
2596         err = -ENOMEM;
2597         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2598         if (!forw_hdr)
2599                 goto err_reg_ctl;
2600         net->ipv4.forw_hdr = forw_hdr;
2601 #endif
2602
2603         net->ipv4.devconf_all = all;
2604         net->ipv4.devconf_dflt = dflt;
2605         return 0;
2606
2607 #ifdef CONFIG_SYSCTL
2608 err_reg_ctl:
2609         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2610 err_reg_dflt:
2611         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2612 err_reg_all:
2613         if (tbl != ctl_forward_entry)
2614                 kfree(tbl);
2615 err_alloc_ctl:
2616 #endif
2617         if (dflt != &ipv4_devconf_dflt)
2618                 kfree(dflt);
2619 err_alloc_dflt:
2620         if (all != &ipv4_devconf)
2621                 kfree(all);
2622 err_alloc_all:
2623         return err;
2624 }
2625
2626 static __net_exit void devinet_exit_net(struct net *net)
2627 {
2628 #ifdef CONFIG_SYSCTL
2629         struct ctl_table *tbl;
2630
2631         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2632         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2633         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2634                                     NETCONFA_IFINDEX_DEFAULT);
2635         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2636                                     NETCONFA_IFINDEX_ALL);
2637         kfree(tbl);
2638 #endif
2639         kfree(net->ipv4.devconf_dflt);
2640         kfree(net->ipv4.devconf_all);
2641 }
2642
2643 static __net_initdata struct pernet_operations devinet_ops = {
2644         .init = devinet_init_net,
2645         .exit = devinet_exit_net,
2646 };
2647
2648 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2649         .family           = AF_INET,
2650         .fill_link_af     = inet_fill_link_af,
2651         .get_link_af_size = inet_get_link_af_size,
2652         .validate_link_af = inet_validate_link_af,
2653         .set_link_af      = inet_set_link_af,
2654 };
2655
2656 void __init devinet_init(void)
2657 {
2658         int i;
2659
2660         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2661                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2662
2663         register_pernet_subsys(&devinet_ops);
2664
2665         register_gifconf(PF_INET, inet_gifconf);
2666         register_netdevice_notifier(&ip_netdev_notifier);
2667
2668         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2669
2670         rtnl_af_register(&inet_af_ops);
2671
2672         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2673         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2674         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2675         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2676                       inet_netconf_dump_devconf, 0);
2677 }