Merge branch 'siginfo-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebieder...
[linux-2.6-block.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 static struct ipv4_devconf ipv4_devconf = {
70         .data = {
71                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77         },
78 };
79
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81         .data = {
82                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89         },
90 };
91
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96         [IFA_LOCAL]             = { .type = NLA_U32 },
97         [IFA_ADDRESS]           = { .type = NLA_U32 },
98         [IFA_BROADCAST]         = { .type = NLA_U32 },
99         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
101         [IFA_FLAGS]             = { .type = NLA_U32 },
102         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
103         [IFA_TARGET_NETNSID]    = { .type = NLA_S32 },
104 };
105
106 struct inet_fill_args {
107         u32 portid;
108         u32 seq;
109         int event;
110         unsigned int flags;
111         int netnsid;
112         int ifindex;
113 };
114
115 #define IN4_ADDR_HSIZE_SHIFT    8
116 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
117
118 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
119
120 static u32 inet_addr_hash(const struct net *net, __be32 addr)
121 {
122         u32 val = (__force u32) addr ^ net_hash_mix(net);
123
124         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
125 }
126
127 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
128 {
129         u32 hash = inet_addr_hash(net, ifa->ifa_local);
130
131         ASSERT_RTNL();
132         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
133 }
134
135 static void inet_hash_remove(struct in_ifaddr *ifa)
136 {
137         ASSERT_RTNL();
138         hlist_del_init_rcu(&ifa->hash);
139 }
140
141 /**
142  * __ip_dev_find - find the first device with a given source address.
143  * @net: the net namespace
144  * @addr: the source address
145  * @devref: if true, take a reference on the found device
146  *
147  * If a caller uses devref=false, it should be protected by RCU, or RTNL
148  */
149 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
150 {
151         struct net_device *result = NULL;
152         struct in_ifaddr *ifa;
153
154         rcu_read_lock();
155         ifa = inet_lookup_ifaddr_rcu(net, addr);
156         if (!ifa) {
157                 struct flowi4 fl4 = { .daddr = addr };
158                 struct fib_result res = { 0 };
159                 struct fib_table *local;
160
161                 /* Fallback to FIB local table so that communication
162                  * over loopback subnets work.
163                  */
164                 local = fib_get_table(net, RT_TABLE_LOCAL);
165                 if (local &&
166                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167                     res.type == RTN_LOCAL)
168                         result = FIB_RES_DEV(res);
169         } else {
170                 result = ifa->ifa_dev->dev;
171         }
172         if (result && devref)
173                 dev_hold(result);
174         rcu_read_unlock();
175         return result;
176 }
177 EXPORT_SYMBOL(__ip_dev_find);
178
179 /* called under RCU lock */
180 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
181 {
182         u32 hash = inet_addr_hash(net, addr);
183         struct in_ifaddr *ifa;
184
185         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
186                 if (ifa->ifa_local == addr &&
187                     net_eq(dev_net(ifa->ifa_dev->dev), net))
188                         return ifa;
189
190         return NULL;
191 }
192
193 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
194
195 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
196 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
197 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
198                          int destroy);
199 #ifdef CONFIG_SYSCTL
200 static int devinet_sysctl_register(struct in_device *idev);
201 static void devinet_sysctl_unregister(struct in_device *idev);
202 #else
203 static int devinet_sysctl_register(struct in_device *idev)
204 {
205         return 0;
206 }
207 static void devinet_sysctl_unregister(struct in_device *idev)
208 {
209 }
210 #endif
211
212 /* Locks all the inet devices. */
213
214 static struct in_ifaddr *inet_alloc_ifa(void)
215 {
216         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
217 }
218
219 static void inet_rcu_free_ifa(struct rcu_head *head)
220 {
221         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
222         if (ifa->ifa_dev)
223                 in_dev_put(ifa->ifa_dev);
224         kfree(ifa);
225 }
226
227 static void inet_free_ifa(struct in_ifaddr *ifa)
228 {
229         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
230 }
231
232 void in_dev_finish_destroy(struct in_device *idev)
233 {
234         struct net_device *dev = idev->dev;
235
236         WARN_ON(idev->ifa_list);
237         WARN_ON(idev->mc_list);
238         kfree(rcu_dereference_protected(idev->mc_hash, 1));
239 #ifdef NET_REFCNT_DEBUG
240         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
241 #endif
242         dev_put(dev);
243         if (!idev->dead)
244                 pr_err("Freeing alive in_device %p\n", idev);
245         else
246                 kfree(idev);
247 }
248 EXPORT_SYMBOL(in_dev_finish_destroy);
249
250 static struct in_device *inetdev_init(struct net_device *dev)
251 {
252         struct in_device *in_dev;
253         int err = -ENOMEM;
254
255         ASSERT_RTNL();
256
257         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
258         if (!in_dev)
259                 goto out;
260         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
261                         sizeof(in_dev->cnf));
262         in_dev->cnf.sysctl = NULL;
263         in_dev->dev = dev;
264         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
265         if (!in_dev->arp_parms)
266                 goto out_kfree;
267         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
268                 dev_disable_lro(dev);
269         /* Reference in_dev->dev */
270         dev_hold(dev);
271         /* Account for reference dev->ip_ptr (below) */
272         refcount_set(&in_dev->refcnt, 1);
273
274         err = devinet_sysctl_register(in_dev);
275         if (err) {
276                 in_dev->dead = 1;
277                 in_dev_put(in_dev);
278                 in_dev = NULL;
279                 goto out;
280         }
281         ip_mc_init_dev(in_dev);
282         if (dev->flags & IFF_UP)
283                 ip_mc_up(in_dev);
284
285         /* we can receive as soon as ip_ptr is set -- do this last */
286         rcu_assign_pointer(dev->ip_ptr, in_dev);
287 out:
288         return in_dev ?: ERR_PTR(err);
289 out_kfree:
290         kfree(in_dev);
291         in_dev = NULL;
292         goto out;
293 }
294
295 static void in_dev_rcu_put(struct rcu_head *head)
296 {
297         struct in_device *idev = container_of(head, struct in_device, rcu_head);
298         in_dev_put(idev);
299 }
300
301 static void inetdev_destroy(struct in_device *in_dev)
302 {
303         struct in_ifaddr *ifa;
304         struct net_device *dev;
305
306         ASSERT_RTNL();
307
308         dev = in_dev->dev;
309
310         in_dev->dead = 1;
311
312         ip_mc_destroy_dev(in_dev);
313
314         while ((ifa = in_dev->ifa_list) != NULL) {
315                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
316                 inet_free_ifa(ifa);
317         }
318
319         RCU_INIT_POINTER(dev->ip_ptr, NULL);
320
321         devinet_sysctl_unregister(in_dev);
322         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
323         arp_ifdown(dev);
324
325         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
326 }
327
328 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
329 {
330         rcu_read_lock();
331         for_primary_ifa(in_dev) {
332                 if (inet_ifa_match(a, ifa)) {
333                         if (!b || inet_ifa_match(b, ifa)) {
334                                 rcu_read_unlock();
335                                 return 1;
336                         }
337                 }
338         } endfor_ifa(in_dev);
339         rcu_read_unlock();
340         return 0;
341 }
342
343 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
344                          int destroy, struct nlmsghdr *nlh, u32 portid)
345 {
346         struct in_ifaddr *promote = NULL;
347         struct in_ifaddr *ifa, *ifa1 = *ifap;
348         struct in_ifaddr *last_prim = in_dev->ifa_list;
349         struct in_ifaddr *prev_prom = NULL;
350         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
351
352         ASSERT_RTNL();
353
354         if (in_dev->dead)
355                 goto no_promotions;
356
357         /* 1. Deleting primary ifaddr forces deletion all secondaries
358          * unless alias promotion is set
359          **/
360
361         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
362                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
363
364                 while ((ifa = *ifap1) != NULL) {
365                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
366                             ifa1->ifa_scope <= ifa->ifa_scope)
367                                 last_prim = ifa;
368
369                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
370                             ifa1->ifa_mask != ifa->ifa_mask ||
371                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
372                                 ifap1 = &ifa->ifa_next;
373                                 prev_prom = ifa;
374                                 continue;
375                         }
376
377                         if (!do_promote) {
378                                 inet_hash_remove(ifa);
379                                 *ifap1 = ifa->ifa_next;
380
381                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
382                                 blocking_notifier_call_chain(&inetaddr_chain,
383                                                 NETDEV_DOWN, ifa);
384                                 inet_free_ifa(ifa);
385                         } else {
386                                 promote = ifa;
387                                 break;
388                         }
389                 }
390         }
391
392         /* On promotion all secondaries from subnet are changing
393          * the primary IP, we must remove all their routes silently
394          * and later to add them back with new prefsrc. Do this
395          * while all addresses are on the device list.
396          */
397         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
398                 if (ifa1->ifa_mask == ifa->ifa_mask &&
399                     inet_ifa_match(ifa1->ifa_address, ifa))
400                         fib_del_ifaddr(ifa, ifa1);
401         }
402
403 no_promotions:
404         /* 2. Unlink it */
405
406         *ifap = ifa1->ifa_next;
407         inet_hash_remove(ifa1);
408
409         /* 3. Announce address deletion */
410
411         /* Send message first, then call notifier.
412            At first sight, FIB update triggered by notifier
413            will refer to already deleted ifaddr, that could confuse
414            netlink listeners. It is not true: look, gated sees
415            that route deleted and if it still thinks that ifaddr
416            is valid, it will try to restore deleted routes... Grr.
417            So that, this order is correct.
418          */
419         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
420         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
421
422         if (promote) {
423                 struct in_ifaddr *next_sec = promote->ifa_next;
424
425                 if (prev_prom) {
426                         prev_prom->ifa_next = promote->ifa_next;
427                         promote->ifa_next = last_prim->ifa_next;
428                         last_prim->ifa_next = promote;
429                 }
430
431                 promote->ifa_flags &= ~IFA_F_SECONDARY;
432                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
433                 blocking_notifier_call_chain(&inetaddr_chain,
434                                 NETDEV_UP, promote);
435                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
436                         if (ifa1->ifa_mask != ifa->ifa_mask ||
437                             !inet_ifa_match(ifa1->ifa_address, ifa))
438                                         continue;
439                         fib_add_ifaddr(ifa);
440                 }
441
442         }
443         if (destroy)
444                 inet_free_ifa(ifa1);
445 }
446
447 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
448                          int destroy)
449 {
450         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
451 }
452
453 static void check_lifetime(struct work_struct *work);
454
455 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
456
457 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
458                              u32 portid, struct netlink_ext_ack *extack)
459 {
460         struct in_device *in_dev = ifa->ifa_dev;
461         struct in_ifaddr *ifa1, **ifap, **last_primary;
462         struct in_validator_info ivi;
463         int ret;
464
465         ASSERT_RTNL();
466
467         if (!ifa->ifa_local) {
468                 inet_free_ifa(ifa);
469                 return 0;
470         }
471
472         ifa->ifa_flags &= ~IFA_F_SECONDARY;
473         last_primary = &in_dev->ifa_list;
474
475         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
476              ifap = &ifa1->ifa_next) {
477                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
478                     ifa->ifa_scope <= ifa1->ifa_scope)
479                         last_primary = &ifa1->ifa_next;
480                 if (ifa1->ifa_mask == ifa->ifa_mask &&
481                     inet_ifa_match(ifa1->ifa_address, ifa)) {
482                         if (ifa1->ifa_local == ifa->ifa_local) {
483                                 inet_free_ifa(ifa);
484                                 return -EEXIST;
485                         }
486                         if (ifa1->ifa_scope != ifa->ifa_scope) {
487                                 inet_free_ifa(ifa);
488                                 return -EINVAL;
489                         }
490                         ifa->ifa_flags |= IFA_F_SECONDARY;
491                 }
492         }
493
494         /* Allow any devices that wish to register ifaddr validtors to weigh
495          * in now, before changes are committed.  The rntl lock is serializing
496          * access here, so the state should not change between a validator call
497          * and a final notify on commit.  This isn't invoked on promotion under
498          * the assumption that validators are checking the address itself, and
499          * not the flags.
500          */
501         ivi.ivi_addr = ifa->ifa_address;
502         ivi.ivi_dev = ifa->ifa_dev;
503         ivi.extack = extack;
504         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
505                                            NETDEV_UP, &ivi);
506         ret = notifier_to_errno(ret);
507         if (ret) {
508                 inet_free_ifa(ifa);
509                 return ret;
510         }
511
512         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
513                 prandom_seed((__force u32) ifa->ifa_local);
514                 ifap = last_primary;
515         }
516
517         ifa->ifa_next = *ifap;
518         *ifap = ifa;
519
520         inet_hash_insert(dev_net(in_dev->dev), ifa);
521
522         cancel_delayed_work(&check_lifetime_work);
523         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
524
525         /* Send message first, then call notifier.
526            Notifier will trigger FIB update, so that
527            listeners of netlink will know about new ifaddr */
528         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
529         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
530
531         return 0;
532 }
533
534 static int inet_insert_ifa(struct in_ifaddr *ifa)
535 {
536         return __inet_insert_ifa(ifa, NULL, 0, NULL);
537 }
538
539 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
540 {
541         struct in_device *in_dev = __in_dev_get_rtnl(dev);
542
543         ASSERT_RTNL();
544
545         if (!in_dev) {
546                 inet_free_ifa(ifa);
547                 return -ENOBUFS;
548         }
549         ipv4_devconf_setall(in_dev);
550         neigh_parms_data_state_setall(in_dev->arp_parms);
551         if (ifa->ifa_dev != in_dev) {
552                 WARN_ON(ifa->ifa_dev);
553                 in_dev_hold(in_dev);
554                 ifa->ifa_dev = in_dev;
555         }
556         if (ipv4_is_loopback(ifa->ifa_local))
557                 ifa->ifa_scope = RT_SCOPE_HOST;
558         return inet_insert_ifa(ifa);
559 }
560
561 /* Caller must hold RCU or RTNL :
562  * We dont take a reference on found in_device
563  */
564 struct in_device *inetdev_by_index(struct net *net, int ifindex)
565 {
566         struct net_device *dev;
567         struct in_device *in_dev = NULL;
568
569         rcu_read_lock();
570         dev = dev_get_by_index_rcu(net, ifindex);
571         if (dev)
572                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
573         rcu_read_unlock();
574         return in_dev;
575 }
576 EXPORT_SYMBOL(inetdev_by_index);
577
578 /* Called only from RTNL semaphored context. No locks. */
579
580 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
581                                     __be32 mask)
582 {
583         ASSERT_RTNL();
584
585         for_primary_ifa(in_dev) {
586                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
587                         return ifa;
588         } endfor_ifa(in_dev);
589         return NULL;
590 }
591
592 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
593 {
594         struct ip_mreqn mreq = {
595                 .imr_multiaddr.s_addr = ifa->ifa_address,
596                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
597         };
598         int ret;
599
600         ASSERT_RTNL();
601
602         lock_sock(sk);
603         if (join)
604                 ret = ip_mc_join_group(sk, &mreq);
605         else
606                 ret = ip_mc_leave_group(sk, &mreq);
607         release_sock(sk);
608
609         return ret;
610 }
611
612 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
613                             struct netlink_ext_ack *extack)
614 {
615         struct net *net = sock_net(skb->sk);
616         struct nlattr *tb[IFA_MAX+1];
617         struct in_device *in_dev;
618         struct ifaddrmsg *ifm;
619         struct in_ifaddr *ifa, **ifap;
620         int err = -EINVAL;
621
622         ASSERT_RTNL();
623
624         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
625                           extack);
626         if (err < 0)
627                 goto errout;
628
629         ifm = nlmsg_data(nlh);
630         in_dev = inetdev_by_index(net, ifm->ifa_index);
631         if (!in_dev) {
632                 err = -ENODEV;
633                 goto errout;
634         }
635
636         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
637              ifap = &ifa->ifa_next) {
638                 if (tb[IFA_LOCAL] &&
639                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
640                         continue;
641
642                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
643                         continue;
644
645                 if (tb[IFA_ADDRESS] &&
646                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
647                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
648                         continue;
649
650                 if (ipv4_is_multicast(ifa->ifa_address))
651                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
652                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
653                 return 0;
654         }
655
656         err = -EADDRNOTAVAIL;
657 errout:
658         return err;
659 }
660
661 #define INFINITY_LIFE_TIME      0xFFFFFFFF
662
663 static void check_lifetime(struct work_struct *work)
664 {
665         unsigned long now, next, next_sec, next_sched;
666         struct in_ifaddr *ifa;
667         struct hlist_node *n;
668         int i;
669
670         now = jiffies;
671         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
672
673         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
674                 bool change_needed = false;
675
676                 rcu_read_lock();
677                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
678                         unsigned long age;
679
680                         if (ifa->ifa_flags & IFA_F_PERMANENT)
681                                 continue;
682
683                         /* We try to batch several events at once. */
684                         age = (now - ifa->ifa_tstamp +
685                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
686
687                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
688                             age >= ifa->ifa_valid_lft) {
689                                 change_needed = true;
690                         } else if (ifa->ifa_preferred_lft ==
691                                    INFINITY_LIFE_TIME) {
692                                 continue;
693                         } else if (age >= ifa->ifa_preferred_lft) {
694                                 if (time_before(ifa->ifa_tstamp +
695                                                 ifa->ifa_valid_lft * HZ, next))
696                                         next = ifa->ifa_tstamp +
697                                                ifa->ifa_valid_lft * HZ;
698
699                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
700                                         change_needed = true;
701                         } else if (time_before(ifa->ifa_tstamp +
702                                                ifa->ifa_preferred_lft * HZ,
703                                                next)) {
704                                 next = ifa->ifa_tstamp +
705                                        ifa->ifa_preferred_lft * HZ;
706                         }
707                 }
708                 rcu_read_unlock();
709                 if (!change_needed)
710                         continue;
711                 rtnl_lock();
712                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
713                         unsigned long age;
714
715                         if (ifa->ifa_flags & IFA_F_PERMANENT)
716                                 continue;
717
718                         /* We try to batch several events at once. */
719                         age = (now - ifa->ifa_tstamp +
720                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
721
722                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
723                             age >= ifa->ifa_valid_lft) {
724                                 struct in_ifaddr **ifap;
725
726                                 for (ifap = &ifa->ifa_dev->ifa_list;
727                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
728                                         if (*ifap == ifa) {
729                                                 inet_del_ifa(ifa->ifa_dev,
730                                                              ifap, 1);
731                                                 break;
732                                         }
733                                 }
734                         } else if (ifa->ifa_preferred_lft !=
735                                    INFINITY_LIFE_TIME &&
736                                    age >= ifa->ifa_preferred_lft &&
737                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
738                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
739                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
740                         }
741                 }
742                 rtnl_unlock();
743         }
744
745         next_sec = round_jiffies_up(next);
746         next_sched = next;
747
748         /* If rounded timeout is accurate enough, accept it. */
749         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
750                 next_sched = next_sec;
751
752         now = jiffies;
753         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
754         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
755                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
756
757         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
758                         next_sched - now);
759 }
760
761 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
762                              __u32 prefered_lft)
763 {
764         unsigned long timeout;
765
766         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
767
768         timeout = addrconf_timeout_fixup(valid_lft, HZ);
769         if (addrconf_finite_timeout(timeout))
770                 ifa->ifa_valid_lft = timeout;
771         else
772                 ifa->ifa_flags |= IFA_F_PERMANENT;
773
774         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
775         if (addrconf_finite_timeout(timeout)) {
776                 if (timeout == 0)
777                         ifa->ifa_flags |= IFA_F_DEPRECATED;
778                 ifa->ifa_preferred_lft = timeout;
779         }
780         ifa->ifa_tstamp = jiffies;
781         if (!ifa->ifa_cstamp)
782                 ifa->ifa_cstamp = ifa->ifa_tstamp;
783 }
784
785 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
786                                        __u32 *pvalid_lft, __u32 *pprefered_lft,
787                                        struct netlink_ext_ack *extack)
788 {
789         struct nlattr *tb[IFA_MAX+1];
790         struct in_ifaddr *ifa;
791         struct ifaddrmsg *ifm;
792         struct net_device *dev;
793         struct in_device *in_dev;
794         int err;
795
796         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
797                           extack);
798         if (err < 0)
799                 goto errout;
800
801         ifm = nlmsg_data(nlh);
802         err = -EINVAL;
803         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
804                 goto errout;
805
806         dev = __dev_get_by_index(net, ifm->ifa_index);
807         err = -ENODEV;
808         if (!dev)
809                 goto errout;
810
811         in_dev = __in_dev_get_rtnl(dev);
812         err = -ENOBUFS;
813         if (!in_dev)
814                 goto errout;
815
816         ifa = inet_alloc_ifa();
817         if (!ifa)
818                 /*
819                  * A potential indev allocation can be left alive, it stays
820                  * assigned to its device and is destroy with it.
821                  */
822                 goto errout;
823
824         ipv4_devconf_setall(in_dev);
825         neigh_parms_data_state_setall(in_dev->arp_parms);
826         in_dev_hold(in_dev);
827
828         if (!tb[IFA_ADDRESS])
829                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
830
831         INIT_HLIST_NODE(&ifa->hash);
832         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
833         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
834         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
835                                          ifm->ifa_flags;
836         ifa->ifa_scope = ifm->ifa_scope;
837         ifa->ifa_dev = in_dev;
838
839         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
840         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
841
842         if (tb[IFA_BROADCAST])
843                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
844
845         if (tb[IFA_LABEL])
846                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
847         else
848                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
849
850         if (tb[IFA_RT_PRIORITY])
851                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
852
853         if (tb[IFA_CACHEINFO]) {
854                 struct ifa_cacheinfo *ci;
855
856                 ci = nla_data(tb[IFA_CACHEINFO]);
857                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
858                         err = -EINVAL;
859                         goto errout_free;
860                 }
861                 *pvalid_lft = ci->ifa_valid;
862                 *pprefered_lft = ci->ifa_prefered;
863         }
864
865         return ifa;
866
867 errout_free:
868         inet_free_ifa(ifa);
869 errout:
870         return ERR_PTR(err);
871 }
872
873 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
874 {
875         struct in_device *in_dev = ifa->ifa_dev;
876         struct in_ifaddr *ifa1, **ifap;
877
878         if (!ifa->ifa_local)
879                 return NULL;
880
881         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
882              ifap = &ifa1->ifa_next) {
883                 if (ifa1->ifa_mask == ifa->ifa_mask &&
884                     inet_ifa_match(ifa1->ifa_address, ifa) &&
885                     ifa1->ifa_local == ifa->ifa_local)
886                         return ifa1;
887         }
888         return NULL;
889 }
890
891 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
892                             struct netlink_ext_ack *extack)
893 {
894         struct net *net = sock_net(skb->sk);
895         struct in_ifaddr *ifa;
896         struct in_ifaddr *ifa_existing;
897         __u32 valid_lft = INFINITY_LIFE_TIME;
898         __u32 prefered_lft = INFINITY_LIFE_TIME;
899
900         ASSERT_RTNL();
901
902         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
903         if (IS_ERR(ifa))
904                 return PTR_ERR(ifa);
905
906         ifa_existing = find_matching_ifa(ifa);
907         if (!ifa_existing) {
908                 /* It would be best to check for !NLM_F_CREATE here but
909                  * userspace already relies on not having to provide this.
910                  */
911                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
912                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
913                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
914                                                true, ifa);
915
916                         if (ret < 0) {
917                                 inet_free_ifa(ifa);
918                                 return ret;
919                         }
920                 }
921                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
922                                          extack);
923         } else {
924                 u32 new_metric = ifa->ifa_rt_priority;
925
926                 inet_free_ifa(ifa);
927
928                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
929                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
930                         return -EEXIST;
931                 ifa = ifa_existing;
932
933                 if (ifa->ifa_rt_priority != new_metric) {
934                         fib_modify_prefix_metric(ifa, new_metric);
935                         ifa->ifa_rt_priority = new_metric;
936                 }
937
938                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
939                 cancel_delayed_work(&check_lifetime_work);
940                 queue_delayed_work(system_power_efficient_wq,
941                                 &check_lifetime_work, 0);
942                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
943         }
944         return 0;
945 }
946
947 /*
948  *      Determine a default network mask, based on the IP address.
949  */
950
951 static int inet_abc_len(__be32 addr)
952 {
953         int rc = -1;    /* Something else, probably a multicast. */
954
955         if (ipv4_is_zeronet(addr))
956                 rc = 0;
957         else {
958                 __u32 haddr = ntohl(addr);
959
960                 if (IN_CLASSA(haddr))
961                         rc = 8;
962                 else if (IN_CLASSB(haddr))
963                         rc = 16;
964                 else if (IN_CLASSC(haddr))
965                         rc = 24;
966         }
967
968         return rc;
969 }
970
971
972 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
973 {
974         struct sockaddr_in sin_orig;
975         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
976         struct in_device *in_dev;
977         struct in_ifaddr **ifap = NULL;
978         struct in_ifaddr *ifa = NULL;
979         struct net_device *dev;
980         char *colon;
981         int ret = -EFAULT;
982         int tryaddrmatch = 0;
983
984         ifr->ifr_name[IFNAMSIZ - 1] = 0;
985
986         /* save original address for comparison */
987         memcpy(&sin_orig, sin, sizeof(*sin));
988
989         colon = strchr(ifr->ifr_name, ':');
990         if (colon)
991                 *colon = 0;
992
993         dev_load(net, ifr->ifr_name);
994
995         switch (cmd) {
996         case SIOCGIFADDR:       /* Get interface address */
997         case SIOCGIFBRDADDR:    /* Get the broadcast address */
998         case SIOCGIFDSTADDR:    /* Get the destination address */
999         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1000                 /* Note that these ioctls will not sleep,
1001                    so that we do not impose a lock.
1002                    One day we will be forced to put shlock here (I mean SMP)
1003                  */
1004                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1005                 memset(sin, 0, sizeof(*sin));
1006                 sin->sin_family = AF_INET;
1007                 break;
1008
1009         case SIOCSIFFLAGS:
1010                 ret = -EPERM;
1011                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1012                         goto out;
1013                 break;
1014         case SIOCSIFADDR:       /* Set interface address (and family) */
1015         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1016         case SIOCSIFDSTADDR:    /* Set the destination address */
1017         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1018                 ret = -EPERM;
1019                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1020                         goto out;
1021                 ret = -EINVAL;
1022                 if (sin->sin_family != AF_INET)
1023                         goto out;
1024                 break;
1025         default:
1026                 ret = -EINVAL;
1027                 goto out;
1028         }
1029
1030         rtnl_lock();
1031
1032         ret = -ENODEV;
1033         dev = __dev_get_by_name(net, ifr->ifr_name);
1034         if (!dev)
1035                 goto done;
1036
1037         if (colon)
1038                 *colon = ':';
1039
1040         in_dev = __in_dev_get_rtnl(dev);
1041         if (in_dev) {
1042                 if (tryaddrmatch) {
1043                         /* Matthias Andree */
1044                         /* compare label and address (4.4BSD style) */
1045                         /* note: we only do this for a limited set of ioctls
1046                            and only if the original address family was AF_INET.
1047                            This is checked above. */
1048                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1049                              ifap = &ifa->ifa_next) {
1050                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1051                                     sin_orig.sin_addr.s_addr ==
1052                                                         ifa->ifa_local) {
1053                                         break; /* found */
1054                                 }
1055                         }
1056                 }
1057                 /* we didn't get a match, maybe the application is
1058                    4.3BSD-style and passed in junk so we fall back to
1059                    comparing just the label */
1060                 if (!ifa) {
1061                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1062                              ifap = &ifa->ifa_next)
1063                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1064                                         break;
1065                 }
1066         }
1067
1068         ret = -EADDRNOTAVAIL;
1069         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1070                 goto done;
1071
1072         switch (cmd) {
1073         case SIOCGIFADDR:       /* Get interface address */
1074                 ret = 0;
1075                 sin->sin_addr.s_addr = ifa->ifa_local;
1076                 break;
1077
1078         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1079                 ret = 0;
1080                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1081                 break;
1082
1083         case SIOCGIFDSTADDR:    /* Get the destination address */
1084                 ret = 0;
1085                 sin->sin_addr.s_addr = ifa->ifa_address;
1086                 break;
1087
1088         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1089                 ret = 0;
1090                 sin->sin_addr.s_addr = ifa->ifa_mask;
1091                 break;
1092
1093         case SIOCSIFFLAGS:
1094                 if (colon) {
1095                         ret = -EADDRNOTAVAIL;
1096                         if (!ifa)
1097                                 break;
1098                         ret = 0;
1099                         if (!(ifr->ifr_flags & IFF_UP))
1100                                 inet_del_ifa(in_dev, ifap, 1);
1101                         break;
1102                 }
1103                 ret = dev_change_flags(dev, ifr->ifr_flags);
1104                 break;
1105
1106         case SIOCSIFADDR:       /* Set interface address (and family) */
1107                 ret = -EINVAL;
1108                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1109                         break;
1110
1111                 if (!ifa) {
1112                         ret = -ENOBUFS;
1113                         ifa = inet_alloc_ifa();
1114                         if (!ifa)
1115                                 break;
1116                         INIT_HLIST_NODE(&ifa->hash);
1117                         if (colon)
1118                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1119                         else
1120                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1121                 } else {
1122                         ret = 0;
1123                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1124                                 break;
1125                         inet_del_ifa(in_dev, ifap, 0);
1126                         ifa->ifa_broadcast = 0;
1127                         ifa->ifa_scope = 0;
1128                 }
1129
1130                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1131
1132                 if (!(dev->flags & IFF_POINTOPOINT)) {
1133                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1134                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1135                         if ((dev->flags & IFF_BROADCAST) &&
1136                             ifa->ifa_prefixlen < 31)
1137                                 ifa->ifa_broadcast = ifa->ifa_address |
1138                                                      ~ifa->ifa_mask;
1139                 } else {
1140                         ifa->ifa_prefixlen = 32;
1141                         ifa->ifa_mask = inet_make_mask(32);
1142                 }
1143                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1144                 ret = inet_set_ifa(dev, ifa);
1145                 break;
1146
1147         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1148                 ret = 0;
1149                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1150                         inet_del_ifa(in_dev, ifap, 0);
1151                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1152                         inet_insert_ifa(ifa);
1153                 }
1154                 break;
1155
1156         case SIOCSIFDSTADDR:    /* Set the destination address */
1157                 ret = 0;
1158                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1159                         break;
1160                 ret = -EINVAL;
1161                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1162                         break;
1163                 ret = 0;
1164                 inet_del_ifa(in_dev, ifap, 0);
1165                 ifa->ifa_address = sin->sin_addr.s_addr;
1166                 inet_insert_ifa(ifa);
1167                 break;
1168
1169         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1170
1171                 /*
1172                  *      The mask we set must be legal.
1173                  */
1174                 ret = -EINVAL;
1175                 if (bad_mask(sin->sin_addr.s_addr, 0))
1176                         break;
1177                 ret = 0;
1178                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1179                         __be32 old_mask = ifa->ifa_mask;
1180                         inet_del_ifa(in_dev, ifap, 0);
1181                         ifa->ifa_mask = sin->sin_addr.s_addr;
1182                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1183
1184                         /* See if current broadcast address matches
1185                          * with current netmask, then recalculate
1186                          * the broadcast address. Otherwise it's a
1187                          * funny address, so don't touch it since
1188                          * the user seems to know what (s)he's doing...
1189                          */
1190                         if ((dev->flags & IFF_BROADCAST) &&
1191                             (ifa->ifa_prefixlen < 31) &&
1192                             (ifa->ifa_broadcast ==
1193                              (ifa->ifa_local|~old_mask))) {
1194                                 ifa->ifa_broadcast = (ifa->ifa_local |
1195                                                       ~sin->sin_addr.s_addr);
1196                         }
1197                         inet_insert_ifa(ifa);
1198                 }
1199                 break;
1200         }
1201 done:
1202         rtnl_unlock();
1203 out:
1204         return ret;
1205 }
1206
1207 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1208 {
1209         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1210         struct in_ifaddr *ifa;
1211         struct ifreq ifr;
1212         int done = 0;
1213
1214         if (WARN_ON(size > sizeof(struct ifreq)))
1215                 goto out;
1216
1217         if (!in_dev)
1218                 goto out;
1219
1220         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1221                 if (!buf) {
1222                         done += size;
1223                         continue;
1224                 }
1225                 if (len < size)
1226                         break;
1227                 memset(&ifr, 0, sizeof(struct ifreq));
1228                 strcpy(ifr.ifr_name, ifa->ifa_label);
1229
1230                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1231                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1232                                                                 ifa->ifa_local;
1233
1234                 if (copy_to_user(buf + done, &ifr, size)) {
1235                         done = -EFAULT;
1236                         break;
1237                 }
1238                 len  -= size;
1239                 done += size;
1240         }
1241 out:
1242         return done;
1243 }
1244
1245 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1246                                  int scope)
1247 {
1248         for_primary_ifa(in_dev) {
1249                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1250                     ifa->ifa_scope <= scope)
1251                         return ifa->ifa_local;
1252         } endfor_ifa(in_dev);
1253
1254         return 0;
1255 }
1256
1257 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1258 {
1259         __be32 addr = 0;
1260         struct in_device *in_dev;
1261         struct net *net = dev_net(dev);
1262         int master_idx;
1263
1264         rcu_read_lock();
1265         in_dev = __in_dev_get_rcu(dev);
1266         if (!in_dev)
1267                 goto no_in_dev;
1268
1269         for_primary_ifa(in_dev) {
1270                 if (ifa->ifa_scope > scope)
1271                         continue;
1272                 if (!dst || inet_ifa_match(dst, ifa)) {
1273                         addr = ifa->ifa_local;
1274                         break;
1275                 }
1276                 if (!addr)
1277                         addr = ifa->ifa_local;
1278         } endfor_ifa(in_dev);
1279
1280         if (addr)
1281                 goto out_unlock;
1282 no_in_dev:
1283         master_idx = l3mdev_master_ifindex_rcu(dev);
1284
1285         /* For VRFs, the VRF device takes the place of the loopback device,
1286          * with addresses on it being preferred.  Note in such cases the
1287          * loopback device will be among the devices that fail the master_idx
1288          * equality check in the loop below.
1289          */
1290         if (master_idx &&
1291             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1292             (in_dev = __in_dev_get_rcu(dev))) {
1293                 addr = in_dev_select_addr(in_dev, scope);
1294                 if (addr)
1295                         goto out_unlock;
1296         }
1297
1298         /* Not loopback addresses on loopback should be preferred
1299            in this case. It is important that lo is the first interface
1300            in dev_base list.
1301          */
1302         for_each_netdev_rcu(net, dev) {
1303                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1304                         continue;
1305
1306                 in_dev = __in_dev_get_rcu(dev);
1307                 if (!in_dev)
1308                         continue;
1309
1310                 addr = in_dev_select_addr(in_dev, scope);
1311                 if (addr)
1312                         goto out_unlock;
1313         }
1314 out_unlock:
1315         rcu_read_unlock();
1316         return addr;
1317 }
1318 EXPORT_SYMBOL(inet_select_addr);
1319
1320 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1321                               __be32 local, int scope)
1322 {
1323         int same = 0;
1324         __be32 addr = 0;
1325
1326         for_ifa(in_dev) {
1327                 if (!addr &&
1328                     (local == ifa->ifa_local || !local) &&
1329                     ifa->ifa_scope <= scope) {
1330                         addr = ifa->ifa_local;
1331                         if (same)
1332                                 break;
1333                 }
1334                 if (!same) {
1335                         same = (!local || inet_ifa_match(local, ifa)) &&
1336                                 (!dst || inet_ifa_match(dst, ifa));
1337                         if (same && addr) {
1338                                 if (local || !dst)
1339                                         break;
1340                                 /* Is the selected addr into dst subnet? */
1341                                 if (inet_ifa_match(addr, ifa))
1342                                         break;
1343                                 /* No, then can we use new local src? */
1344                                 if (ifa->ifa_scope <= scope) {
1345                                         addr = ifa->ifa_local;
1346                                         break;
1347                                 }
1348                                 /* search for large dst subnet for addr */
1349                                 same = 0;
1350                         }
1351                 }
1352         } endfor_ifa(in_dev);
1353
1354         return same ? addr : 0;
1355 }
1356
1357 /*
1358  * Confirm that local IP address exists using wildcards:
1359  * - net: netns to check, cannot be NULL
1360  * - in_dev: only on this interface, NULL=any interface
1361  * - dst: only in the same subnet as dst, 0=any dst
1362  * - local: address, 0=autoselect the local address
1363  * - scope: maximum allowed scope value for the local address
1364  */
1365 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1366                          __be32 dst, __be32 local, int scope)
1367 {
1368         __be32 addr = 0;
1369         struct net_device *dev;
1370
1371         if (in_dev)
1372                 return confirm_addr_indev(in_dev, dst, local, scope);
1373
1374         rcu_read_lock();
1375         for_each_netdev_rcu(net, dev) {
1376                 in_dev = __in_dev_get_rcu(dev);
1377                 if (in_dev) {
1378                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1379                         if (addr)
1380                                 break;
1381                 }
1382         }
1383         rcu_read_unlock();
1384
1385         return addr;
1386 }
1387 EXPORT_SYMBOL(inet_confirm_addr);
1388
1389 /*
1390  *      Device notifier
1391  */
1392
1393 int register_inetaddr_notifier(struct notifier_block *nb)
1394 {
1395         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1396 }
1397 EXPORT_SYMBOL(register_inetaddr_notifier);
1398
1399 int unregister_inetaddr_notifier(struct notifier_block *nb)
1400 {
1401         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1402 }
1403 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1404
1405 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1406 {
1407         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1408 }
1409 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1410
1411 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1412 {
1413         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1414             nb);
1415 }
1416 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1417
1418 /* Rename ifa_labels for a device name change. Make some effort to preserve
1419  * existing alias numbering and to create unique labels if possible.
1420 */
1421 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1422 {
1423         struct in_ifaddr *ifa;
1424         int named = 0;
1425
1426         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1427                 char old[IFNAMSIZ], *dot;
1428
1429                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1430                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1431                 if (named++ == 0)
1432                         goto skip;
1433                 dot = strchr(old, ':');
1434                 if (!dot) {
1435                         sprintf(old, ":%d", named);
1436                         dot = old;
1437                 }
1438                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1439                         strcat(ifa->ifa_label, dot);
1440                 else
1441                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1442 skip:
1443                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1444         }
1445 }
1446
1447 static bool inetdev_valid_mtu(unsigned int mtu)
1448 {
1449         return mtu >= IPV4_MIN_MTU;
1450 }
1451
1452 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1453                                         struct in_device *in_dev)
1454
1455 {
1456         struct in_ifaddr *ifa;
1457
1458         for (ifa = in_dev->ifa_list; ifa;
1459              ifa = ifa->ifa_next) {
1460                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1461                          ifa->ifa_local, dev,
1462                          ifa->ifa_local, NULL,
1463                          dev->dev_addr, NULL);
1464         }
1465 }
1466
1467 /* Called only under RTNL semaphore */
1468
1469 static int inetdev_event(struct notifier_block *this, unsigned long event,
1470                          void *ptr)
1471 {
1472         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1473         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1474
1475         ASSERT_RTNL();
1476
1477         if (!in_dev) {
1478                 if (event == NETDEV_REGISTER) {
1479                         in_dev = inetdev_init(dev);
1480                         if (IS_ERR(in_dev))
1481                                 return notifier_from_errno(PTR_ERR(in_dev));
1482                         if (dev->flags & IFF_LOOPBACK) {
1483                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1484                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1485                         }
1486                 } else if (event == NETDEV_CHANGEMTU) {
1487                         /* Re-enabling IP */
1488                         if (inetdev_valid_mtu(dev->mtu))
1489                                 in_dev = inetdev_init(dev);
1490                 }
1491                 goto out;
1492         }
1493
1494         switch (event) {
1495         case NETDEV_REGISTER:
1496                 pr_debug("%s: bug\n", __func__);
1497                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1498                 break;
1499         case NETDEV_UP:
1500                 if (!inetdev_valid_mtu(dev->mtu))
1501                         break;
1502                 if (dev->flags & IFF_LOOPBACK) {
1503                         struct in_ifaddr *ifa = inet_alloc_ifa();
1504
1505                         if (ifa) {
1506                                 INIT_HLIST_NODE(&ifa->hash);
1507                                 ifa->ifa_local =
1508                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1509                                 ifa->ifa_prefixlen = 8;
1510                                 ifa->ifa_mask = inet_make_mask(8);
1511                                 in_dev_hold(in_dev);
1512                                 ifa->ifa_dev = in_dev;
1513                                 ifa->ifa_scope = RT_SCOPE_HOST;
1514                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1515                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1516                                                  INFINITY_LIFE_TIME);
1517                                 ipv4_devconf_setall(in_dev);
1518                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1519                                 inet_insert_ifa(ifa);
1520                         }
1521                 }
1522                 ip_mc_up(in_dev);
1523                 /* fall through */
1524         case NETDEV_CHANGEADDR:
1525                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1526                         break;
1527                 /* fall through */
1528         case NETDEV_NOTIFY_PEERS:
1529                 /* Send gratuitous ARP to notify of link change */
1530                 inetdev_send_gratuitous_arp(dev, in_dev);
1531                 break;
1532         case NETDEV_DOWN:
1533                 ip_mc_down(in_dev);
1534                 break;
1535         case NETDEV_PRE_TYPE_CHANGE:
1536                 ip_mc_unmap(in_dev);
1537                 break;
1538         case NETDEV_POST_TYPE_CHANGE:
1539                 ip_mc_remap(in_dev);
1540                 break;
1541         case NETDEV_CHANGEMTU:
1542                 if (inetdev_valid_mtu(dev->mtu))
1543                         break;
1544                 /* disable IP when MTU is not enough */
1545                 /* fall through */
1546         case NETDEV_UNREGISTER:
1547                 inetdev_destroy(in_dev);
1548                 break;
1549         case NETDEV_CHANGENAME:
1550                 /* Do not notify about label change, this event is
1551                  * not interesting to applications using netlink.
1552                  */
1553                 inetdev_changename(dev, in_dev);
1554
1555                 devinet_sysctl_unregister(in_dev);
1556                 devinet_sysctl_register(in_dev);
1557                 break;
1558         }
1559 out:
1560         return NOTIFY_DONE;
1561 }
1562
1563 static struct notifier_block ip_netdev_notifier = {
1564         .notifier_call = inetdev_event,
1565 };
1566
1567 static size_t inet_nlmsg_size(void)
1568 {
1569         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1570                + nla_total_size(4) /* IFA_ADDRESS */
1571                + nla_total_size(4) /* IFA_LOCAL */
1572                + nla_total_size(4) /* IFA_BROADCAST */
1573                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1574                + nla_total_size(4)  /* IFA_FLAGS */
1575                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1576                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1577 }
1578
1579 static inline u32 cstamp_delta(unsigned long cstamp)
1580 {
1581         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1582 }
1583
1584 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1585                          unsigned long tstamp, u32 preferred, u32 valid)
1586 {
1587         struct ifa_cacheinfo ci;
1588
1589         ci.cstamp = cstamp_delta(cstamp);
1590         ci.tstamp = cstamp_delta(tstamp);
1591         ci.ifa_prefered = preferred;
1592         ci.ifa_valid = valid;
1593
1594         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1595 }
1596
1597 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1598                             struct inet_fill_args *args)
1599 {
1600         struct ifaddrmsg *ifm;
1601         struct nlmsghdr  *nlh;
1602         u32 preferred, valid;
1603
1604         nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1605                         args->flags);
1606         if (!nlh)
1607                 return -EMSGSIZE;
1608
1609         ifm = nlmsg_data(nlh);
1610         ifm->ifa_family = AF_INET;
1611         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1612         ifm->ifa_flags = ifa->ifa_flags;
1613         ifm->ifa_scope = ifa->ifa_scope;
1614         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1615
1616         if (args->netnsid >= 0 &&
1617             nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1618                 goto nla_put_failure;
1619
1620         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1621                 preferred = ifa->ifa_preferred_lft;
1622                 valid = ifa->ifa_valid_lft;
1623                 if (preferred != INFINITY_LIFE_TIME) {
1624                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1625
1626                         if (preferred > tval)
1627                                 preferred -= tval;
1628                         else
1629                                 preferred = 0;
1630                         if (valid != INFINITY_LIFE_TIME) {
1631                                 if (valid > tval)
1632                                         valid -= tval;
1633                                 else
1634                                         valid = 0;
1635                         }
1636                 }
1637         } else {
1638                 preferred = INFINITY_LIFE_TIME;
1639                 valid = INFINITY_LIFE_TIME;
1640         }
1641         if ((ifa->ifa_address &&
1642              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1643             (ifa->ifa_local &&
1644              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1645             (ifa->ifa_broadcast &&
1646              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1647             (ifa->ifa_label[0] &&
1648              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1649             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1650             (ifa->ifa_rt_priority &&
1651              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1652             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1653                           preferred, valid))
1654                 goto nla_put_failure;
1655
1656         nlmsg_end(skb, nlh);
1657         return 0;
1658
1659 nla_put_failure:
1660         nlmsg_cancel(skb, nlh);
1661         return -EMSGSIZE;
1662 }
1663
1664 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1665                                       struct inet_fill_args *fillargs,
1666                                       struct net **tgt_net, struct sock *sk,
1667                                       struct netlink_callback *cb)
1668 {
1669         struct netlink_ext_ack *extack = cb->extack;
1670         struct nlattr *tb[IFA_MAX+1];
1671         struct ifaddrmsg *ifm;
1672         int err, i;
1673
1674         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1675                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1676                 return -EINVAL;
1677         }
1678
1679         ifm = nlmsg_data(nlh);
1680         if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1681                 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1682                 return -EINVAL;
1683         }
1684
1685         fillargs->ifindex = ifm->ifa_index;
1686         if (fillargs->ifindex) {
1687                 cb->answer_flags |= NLM_F_DUMP_FILTERED;
1688                 fillargs->flags |= NLM_F_DUMP_FILTERED;
1689         }
1690
1691         err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1692                                  ifa_ipv4_policy, extack);
1693         if (err < 0)
1694                 return err;
1695
1696         for (i = 0; i <= IFA_MAX; ++i) {
1697                 if (!tb[i])
1698                         continue;
1699
1700                 if (i == IFA_TARGET_NETNSID) {
1701                         struct net *net;
1702
1703                         fillargs->netnsid = nla_get_s32(tb[i]);
1704
1705                         net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1706                         if (IS_ERR(net)) {
1707                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1708                                 return PTR_ERR(net);
1709                         }
1710                         *tgt_net = net;
1711                 } else {
1712                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1713                         return -EINVAL;
1714                 }
1715         }
1716
1717         return 0;
1718 }
1719
1720 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1721                             struct netlink_callback *cb, int s_ip_idx,
1722                             struct inet_fill_args *fillargs)
1723 {
1724         struct in_ifaddr *ifa;
1725         int ip_idx = 0;
1726         int err;
1727
1728         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next, ip_idx++) {
1729                 if (ip_idx < s_ip_idx)
1730                         continue;
1731
1732                 err = inet_fill_ifaddr(skb, ifa, fillargs);
1733                 if (err < 0)
1734                         goto done;
1735
1736                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1737         }
1738         err = 0;
1739
1740 done:
1741         cb->args[2] = ip_idx;
1742
1743         return err;
1744 }
1745
1746 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1747 {
1748         const struct nlmsghdr *nlh = cb->nlh;
1749         struct inet_fill_args fillargs = {
1750                 .portid = NETLINK_CB(cb->skb).portid,
1751                 .seq = nlh->nlmsg_seq,
1752                 .event = RTM_NEWADDR,
1753                 .flags = NLM_F_MULTI,
1754                 .netnsid = -1,
1755         };
1756         struct net *net = sock_net(skb->sk);
1757         struct net *tgt_net = net;
1758         int h, s_h;
1759         int idx, s_idx;
1760         int s_ip_idx;
1761         struct net_device *dev;
1762         struct in_device *in_dev;
1763         struct hlist_head *head;
1764         int err;
1765
1766         s_h = cb->args[0];
1767         s_idx = idx = cb->args[1];
1768         s_ip_idx = cb->args[2];
1769
1770         if (cb->strict_check) {
1771                 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1772                                                  skb->sk, cb);
1773                 if (err < 0)
1774                         return err;
1775
1776                 if (fillargs.ifindex) {
1777                         dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1778                         if (!dev)
1779                                 return -ENODEV;
1780
1781                         in_dev = __in_dev_get_rtnl(dev);
1782                         if (in_dev) {
1783                                 err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1784                                                        &fillargs);
1785                         }
1786                         goto put_tgt_net;
1787                 }
1788         }
1789
1790         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1791                 idx = 0;
1792                 head = &tgt_net->dev_index_head[h];
1793                 rcu_read_lock();
1794                 cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1795                           tgt_net->dev_base_seq;
1796                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1797                         if (idx < s_idx)
1798                                 goto cont;
1799                         if (h > s_h || idx > s_idx)
1800                                 s_ip_idx = 0;
1801                         in_dev = __in_dev_get_rcu(dev);
1802                         if (!in_dev)
1803                                 goto cont;
1804
1805                         err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1806                                                &fillargs);
1807                         if (err < 0) {
1808                                 rcu_read_unlock();
1809                                 goto done;
1810                         }
1811 cont:
1812                         idx++;
1813                 }
1814                 rcu_read_unlock();
1815         }
1816
1817 done:
1818         cb->args[0] = h;
1819         cb->args[1] = idx;
1820 put_tgt_net:
1821         if (fillargs.netnsid >= 0)
1822                 put_net(tgt_net);
1823
1824         return skb->len;
1825 }
1826
1827 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1828                       u32 portid)
1829 {
1830         struct inet_fill_args fillargs = {
1831                 .portid = portid,
1832                 .seq = nlh ? nlh->nlmsg_seq : 0,
1833                 .event = event,
1834                 .flags = 0,
1835                 .netnsid = -1,
1836         };
1837         struct sk_buff *skb;
1838         int err = -ENOBUFS;
1839         struct net *net;
1840
1841         net = dev_net(ifa->ifa_dev->dev);
1842         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1843         if (!skb)
1844                 goto errout;
1845
1846         err = inet_fill_ifaddr(skb, ifa, &fillargs);
1847         if (err < 0) {
1848                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1849                 WARN_ON(err == -EMSGSIZE);
1850                 kfree_skb(skb);
1851                 goto errout;
1852         }
1853         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1854         return;
1855 errout:
1856         if (err < 0)
1857                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1858 }
1859
1860 static size_t inet_get_link_af_size(const struct net_device *dev,
1861                                     u32 ext_filter_mask)
1862 {
1863         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1864
1865         if (!in_dev)
1866                 return 0;
1867
1868         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1869 }
1870
1871 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1872                              u32 ext_filter_mask)
1873 {
1874         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1875         struct nlattr *nla;
1876         int i;
1877
1878         if (!in_dev)
1879                 return -ENODATA;
1880
1881         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1882         if (!nla)
1883                 return -EMSGSIZE;
1884
1885         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1886                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1887
1888         return 0;
1889 }
1890
1891 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1892         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1893 };
1894
1895 static int inet_validate_link_af(const struct net_device *dev,
1896                                  const struct nlattr *nla)
1897 {
1898         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1899         int err, rem;
1900
1901         if (dev && !__in_dev_get_rcu(dev))
1902                 return -EAFNOSUPPORT;
1903
1904         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1905         if (err < 0)
1906                 return err;
1907
1908         if (tb[IFLA_INET_CONF]) {
1909                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1910                         int cfgid = nla_type(a);
1911
1912                         if (nla_len(a) < 4)
1913                                 return -EINVAL;
1914
1915                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1916                                 return -EINVAL;
1917                 }
1918         }
1919
1920         return 0;
1921 }
1922
1923 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1924 {
1925         struct in_device *in_dev = __in_dev_get_rcu(dev);
1926         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1927         int rem;
1928
1929         if (!in_dev)
1930                 return -EAFNOSUPPORT;
1931
1932         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1933                 BUG();
1934
1935         if (tb[IFLA_INET_CONF]) {
1936                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1937                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1938         }
1939
1940         return 0;
1941 }
1942
1943 static int inet_netconf_msgsize_devconf(int type)
1944 {
1945         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1946                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1947         bool all = false;
1948
1949         if (type == NETCONFA_ALL)
1950                 all = true;
1951
1952         if (all || type == NETCONFA_FORWARDING)
1953                 size += nla_total_size(4);
1954         if (all || type == NETCONFA_RP_FILTER)
1955                 size += nla_total_size(4);
1956         if (all || type == NETCONFA_MC_FORWARDING)
1957                 size += nla_total_size(4);
1958         if (all || type == NETCONFA_BC_FORWARDING)
1959                 size += nla_total_size(4);
1960         if (all || type == NETCONFA_PROXY_NEIGH)
1961                 size += nla_total_size(4);
1962         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1963                 size += nla_total_size(4);
1964
1965         return size;
1966 }
1967
1968 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1969                                      struct ipv4_devconf *devconf, u32 portid,
1970                                      u32 seq, int event, unsigned int flags,
1971                                      int type)
1972 {
1973         struct nlmsghdr  *nlh;
1974         struct netconfmsg *ncm;
1975         bool all = false;
1976
1977         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1978                         flags);
1979         if (!nlh)
1980                 return -EMSGSIZE;
1981
1982         if (type == NETCONFA_ALL)
1983                 all = true;
1984
1985         ncm = nlmsg_data(nlh);
1986         ncm->ncm_family = AF_INET;
1987
1988         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1989                 goto nla_put_failure;
1990
1991         if (!devconf)
1992                 goto out;
1993
1994         if ((all || type == NETCONFA_FORWARDING) &&
1995             nla_put_s32(skb, NETCONFA_FORWARDING,
1996                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1997                 goto nla_put_failure;
1998         if ((all || type == NETCONFA_RP_FILTER) &&
1999             nla_put_s32(skb, NETCONFA_RP_FILTER,
2000                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2001                 goto nla_put_failure;
2002         if ((all || type == NETCONFA_MC_FORWARDING) &&
2003             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2004                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2005                 goto nla_put_failure;
2006         if ((all || type == NETCONFA_BC_FORWARDING) &&
2007             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2008                         IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2009                 goto nla_put_failure;
2010         if ((all || type == NETCONFA_PROXY_NEIGH) &&
2011             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2012                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2013                 goto nla_put_failure;
2014         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2015             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2016                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2017                 goto nla_put_failure;
2018
2019 out:
2020         nlmsg_end(skb, nlh);
2021         return 0;
2022
2023 nla_put_failure:
2024         nlmsg_cancel(skb, nlh);
2025         return -EMSGSIZE;
2026 }
2027
2028 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2029                                  int ifindex, struct ipv4_devconf *devconf)
2030 {
2031         struct sk_buff *skb;
2032         int err = -ENOBUFS;
2033
2034         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2035         if (!skb)
2036                 goto errout;
2037
2038         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2039                                         event, 0, type);
2040         if (err < 0) {
2041                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2042                 WARN_ON(err == -EMSGSIZE);
2043                 kfree_skb(skb);
2044                 goto errout;
2045         }
2046         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2047         return;
2048 errout:
2049         if (err < 0)
2050                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2051 }
2052
2053 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2054         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
2055         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
2056         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
2057         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
2058         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
2059 };
2060
2061 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2062                                     struct nlmsghdr *nlh,
2063                                     struct netlink_ext_ack *extack)
2064 {
2065         struct net *net = sock_net(in_skb->sk);
2066         struct nlattr *tb[NETCONFA_MAX+1];
2067         struct netconfmsg *ncm;
2068         struct sk_buff *skb;
2069         struct ipv4_devconf *devconf;
2070         struct in_device *in_dev;
2071         struct net_device *dev;
2072         int ifindex;
2073         int err;
2074
2075         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
2076                           devconf_ipv4_policy, extack);
2077         if (err < 0)
2078                 goto errout;
2079
2080         err = -EINVAL;
2081         if (!tb[NETCONFA_IFINDEX])
2082                 goto errout;
2083
2084         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2085         switch (ifindex) {
2086         case NETCONFA_IFINDEX_ALL:
2087                 devconf = net->ipv4.devconf_all;
2088                 break;
2089         case NETCONFA_IFINDEX_DEFAULT:
2090                 devconf = net->ipv4.devconf_dflt;
2091                 break;
2092         default:
2093                 dev = __dev_get_by_index(net, ifindex);
2094                 if (!dev)
2095                         goto errout;
2096                 in_dev = __in_dev_get_rtnl(dev);
2097                 if (!in_dev)
2098                         goto errout;
2099                 devconf = &in_dev->cnf;
2100                 break;
2101         }
2102
2103         err = -ENOBUFS;
2104         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2105         if (!skb)
2106                 goto errout;
2107
2108         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2109                                         NETLINK_CB(in_skb).portid,
2110                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2111                                         NETCONFA_ALL);
2112         if (err < 0) {
2113                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2114                 WARN_ON(err == -EMSGSIZE);
2115                 kfree_skb(skb);
2116                 goto errout;
2117         }
2118         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2119 errout:
2120         return err;
2121 }
2122
2123 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2124                                      struct netlink_callback *cb)
2125 {
2126         const struct nlmsghdr *nlh = cb->nlh;
2127         struct net *net = sock_net(skb->sk);
2128         int h, s_h;
2129         int idx, s_idx;
2130         struct net_device *dev;
2131         struct in_device *in_dev;
2132         struct hlist_head *head;
2133
2134         if (cb->strict_check) {
2135                 struct netlink_ext_ack *extack = cb->extack;
2136                 struct netconfmsg *ncm;
2137
2138                 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2139                         NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2140                         return -EINVAL;
2141                 }
2142
2143                 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2144                         NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2145                         return -EINVAL;
2146                 }
2147         }
2148
2149         s_h = cb->args[0];
2150         s_idx = idx = cb->args[1];
2151
2152         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2153                 idx = 0;
2154                 head = &net->dev_index_head[h];
2155                 rcu_read_lock();
2156                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2157                           net->dev_base_seq;
2158                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2159                         if (idx < s_idx)
2160                                 goto cont;
2161                         in_dev = __in_dev_get_rcu(dev);
2162                         if (!in_dev)
2163                                 goto cont;
2164
2165                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2166                                                       &in_dev->cnf,
2167                                                       NETLINK_CB(cb->skb).portid,
2168                                                       nlh->nlmsg_seq,
2169                                                       RTM_NEWNETCONF,
2170                                                       NLM_F_MULTI,
2171                                                       NETCONFA_ALL) < 0) {
2172                                 rcu_read_unlock();
2173                                 goto done;
2174                         }
2175                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2176 cont:
2177                         idx++;
2178                 }
2179                 rcu_read_unlock();
2180         }
2181         if (h == NETDEV_HASHENTRIES) {
2182                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2183                                               net->ipv4.devconf_all,
2184                                               NETLINK_CB(cb->skb).portid,
2185                                               nlh->nlmsg_seq,
2186                                               RTM_NEWNETCONF, NLM_F_MULTI,
2187                                               NETCONFA_ALL) < 0)
2188                         goto done;
2189                 else
2190                         h++;
2191         }
2192         if (h == NETDEV_HASHENTRIES + 1) {
2193                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2194                                               net->ipv4.devconf_dflt,
2195                                               NETLINK_CB(cb->skb).portid,
2196                                               nlh->nlmsg_seq,
2197                                               RTM_NEWNETCONF, NLM_F_MULTI,
2198                                               NETCONFA_ALL) < 0)
2199                         goto done;
2200                 else
2201                         h++;
2202         }
2203 done:
2204         cb->args[0] = h;
2205         cb->args[1] = idx;
2206
2207         return skb->len;
2208 }
2209
2210 #ifdef CONFIG_SYSCTL
2211
2212 static void devinet_copy_dflt_conf(struct net *net, int i)
2213 {
2214         struct net_device *dev;
2215
2216         rcu_read_lock();
2217         for_each_netdev_rcu(net, dev) {
2218                 struct in_device *in_dev;
2219
2220                 in_dev = __in_dev_get_rcu(dev);
2221                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2222                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2223         }
2224         rcu_read_unlock();
2225 }
2226
2227 /* called with RTNL locked */
2228 static void inet_forward_change(struct net *net)
2229 {
2230         struct net_device *dev;
2231         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2232
2233         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2234         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2235         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2236                                     NETCONFA_FORWARDING,
2237                                     NETCONFA_IFINDEX_ALL,
2238                                     net->ipv4.devconf_all);
2239         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2240                                     NETCONFA_FORWARDING,
2241                                     NETCONFA_IFINDEX_DEFAULT,
2242                                     net->ipv4.devconf_dflt);
2243
2244         for_each_netdev(net, dev) {
2245                 struct in_device *in_dev;
2246
2247                 if (on)
2248                         dev_disable_lro(dev);
2249
2250                 in_dev = __in_dev_get_rtnl(dev);
2251                 if (in_dev) {
2252                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2253                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2254                                                     NETCONFA_FORWARDING,
2255                                                     dev->ifindex, &in_dev->cnf);
2256                 }
2257         }
2258 }
2259
2260 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2261 {
2262         if (cnf == net->ipv4.devconf_dflt)
2263                 return NETCONFA_IFINDEX_DEFAULT;
2264         else if (cnf == net->ipv4.devconf_all)
2265                 return NETCONFA_IFINDEX_ALL;
2266         else {
2267                 struct in_device *idev
2268                         = container_of(cnf, struct in_device, cnf);
2269                 return idev->dev->ifindex;
2270         }
2271 }
2272
2273 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2274                              void __user *buffer,
2275                              size_t *lenp, loff_t *ppos)
2276 {
2277         int old_value = *(int *)ctl->data;
2278         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2279         int new_value = *(int *)ctl->data;
2280
2281         if (write) {
2282                 struct ipv4_devconf *cnf = ctl->extra1;
2283                 struct net *net = ctl->extra2;
2284                 int i = (int *)ctl->data - cnf->data;
2285                 int ifindex;
2286
2287                 set_bit(i, cnf->state);
2288
2289                 if (cnf == net->ipv4.devconf_dflt)
2290                         devinet_copy_dflt_conf(net, i);
2291                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2292                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2293                         if ((new_value == 0) && (old_value != 0))
2294                                 rt_cache_flush(net);
2295
2296                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2297                     new_value != old_value)
2298                         rt_cache_flush(net);
2299
2300                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2301                     new_value != old_value) {
2302                         ifindex = devinet_conf_ifindex(net, cnf);
2303                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2304                                                     NETCONFA_RP_FILTER,
2305                                                     ifindex, cnf);
2306                 }
2307                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2308                     new_value != old_value) {
2309                         ifindex = devinet_conf_ifindex(net, cnf);
2310                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2311                                                     NETCONFA_PROXY_NEIGH,
2312                                                     ifindex, cnf);
2313                 }
2314                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2315                     new_value != old_value) {
2316                         ifindex = devinet_conf_ifindex(net, cnf);
2317                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2318                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2319                                                     ifindex, cnf);
2320                 }
2321         }
2322
2323         return ret;
2324 }
2325
2326 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2327                                   void __user *buffer,
2328                                   size_t *lenp, loff_t *ppos)
2329 {
2330         int *valp = ctl->data;
2331         int val = *valp;
2332         loff_t pos = *ppos;
2333         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2334
2335         if (write && *valp != val) {
2336                 struct net *net = ctl->extra2;
2337
2338                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2339                         if (!rtnl_trylock()) {
2340                                 /* Restore the original values before restarting */
2341                                 *valp = val;
2342                                 *ppos = pos;
2343                                 return restart_syscall();
2344                         }
2345                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2346                                 inet_forward_change(net);
2347                         } else {
2348                                 struct ipv4_devconf *cnf = ctl->extra1;
2349                                 struct in_device *idev =
2350                                         container_of(cnf, struct in_device, cnf);
2351                                 if (*valp)
2352                                         dev_disable_lro(idev->dev);
2353                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2354                                                             NETCONFA_FORWARDING,
2355                                                             idev->dev->ifindex,
2356                                                             cnf);
2357                         }
2358                         rtnl_unlock();
2359                         rt_cache_flush(net);
2360                 } else
2361                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2362                                                     NETCONFA_FORWARDING,
2363                                                     NETCONFA_IFINDEX_DEFAULT,
2364                                                     net->ipv4.devconf_dflt);
2365         }
2366
2367         return ret;
2368 }
2369
2370 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2371                                 void __user *buffer,
2372                                 size_t *lenp, loff_t *ppos)
2373 {
2374         int *valp = ctl->data;
2375         int val = *valp;
2376         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2377         struct net *net = ctl->extra2;
2378
2379         if (write && *valp != val)
2380                 rt_cache_flush(net);
2381
2382         return ret;
2383 }
2384
2385 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2386         { \
2387                 .procname       = name, \
2388                 .data           = ipv4_devconf.data + \
2389                                   IPV4_DEVCONF_ ## attr - 1, \
2390                 .maxlen         = sizeof(int), \
2391                 .mode           = mval, \
2392                 .proc_handler   = proc, \
2393                 .extra1         = &ipv4_devconf, \
2394         }
2395
2396 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2397         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2398
2399 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2400         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2401
2402 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2403         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2404
2405 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2406         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2407
2408 static struct devinet_sysctl_table {
2409         struct ctl_table_header *sysctl_header;
2410         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2411 } devinet_sysctl = {
2412         .devinet_vars = {
2413                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2414                                              devinet_sysctl_forward),
2415                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2416                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2417
2418                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2419                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2420                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2421                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2422                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2423                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2424                                         "accept_source_route"),
2425                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2426                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2427                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2428                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2429                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2430                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2431                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2432                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2433                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2434                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2435                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2436                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2437                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2438                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2439                                         "force_igmp_version"),
2440                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2441                                         "igmpv2_unsolicited_report_interval"),
2442                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2443                                         "igmpv3_unsolicited_report_interval"),
2444                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2445                                         "ignore_routes_with_linkdown"),
2446                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2447                                         "drop_gratuitous_arp"),
2448
2449                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2450                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2451                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2452                                               "promote_secondaries"),
2453                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2454                                               "route_localnet"),
2455                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2456                                               "drop_unicast_in_l2_multicast"),
2457         },
2458 };
2459
2460 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2461                                      int ifindex, struct ipv4_devconf *p)
2462 {
2463         int i;
2464         struct devinet_sysctl_table *t;
2465         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2466
2467         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2468         if (!t)
2469                 goto out;
2470
2471         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2472                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2473                 t->devinet_vars[i].extra1 = p;
2474                 t->devinet_vars[i].extra2 = net;
2475         }
2476
2477         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2478
2479         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2480         if (!t->sysctl_header)
2481                 goto free;
2482
2483         p->sysctl = t;
2484
2485         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2486                                     ifindex, p);
2487         return 0;
2488
2489 free:
2490         kfree(t);
2491 out:
2492         return -ENOBUFS;
2493 }
2494
2495 static void __devinet_sysctl_unregister(struct net *net,
2496                                         struct ipv4_devconf *cnf, int ifindex)
2497 {
2498         struct devinet_sysctl_table *t = cnf->sysctl;
2499
2500         if (t) {
2501                 cnf->sysctl = NULL;
2502                 unregister_net_sysctl_table(t->sysctl_header);
2503                 kfree(t);
2504         }
2505
2506         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2507 }
2508
2509 static int devinet_sysctl_register(struct in_device *idev)
2510 {
2511         int err;
2512
2513         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2514                 return -EINVAL;
2515
2516         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2517         if (err)
2518                 return err;
2519         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2520                                         idev->dev->ifindex, &idev->cnf);
2521         if (err)
2522                 neigh_sysctl_unregister(idev->arp_parms);
2523         return err;
2524 }
2525
2526 static void devinet_sysctl_unregister(struct in_device *idev)
2527 {
2528         struct net *net = dev_net(idev->dev);
2529
2530         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2531         neigh_sysctl_unregister(idev->arp_parms);
2532 }
2533
2534 static struct ctl_table ctl_forward_entry[] = {
2535         {
2536                 .procname       = "ip_forward",
2537                 .data           = &ipv4_devconf.data[
2538                                         IPV4_DEVCONF_FORWARDING - 1],
2539                 .maxlen         = sizeof(int),
2540                 .mode           = 0644,
2541                 .proc_handler   = devinet_sysctl_forward,
2542                 .extra1         = &ipv4_devconf,
2543                 .extra2         = &init_net,
2544         },
2545         { },
2546 };
2547 #endif
2548
2549 static __net_init int devinet_init_net(struct net *net)
2550 {
2551         int err;
2552         struct ipv4_devconf *all, *dflt;
2553 #ifdef CONFIG_SYSCTL
2554         struct ctl_table *tbl = ctl_forward_entry;
2555         struct ctl_table_header *forw_hdr;
2556 #endif
2557
2558         err = -ENOMEM;
2559         all = &ipv4_devconf;
2560         dflt = &ipv4_devconf_dflt;
2561
2562         if (!net_eq(net, &init_net)) {
2563                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2564                 if (!all)
2565                         goto err_alloc_all;
2566
2567                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2568                 if (!dflt)
2569                         goto err_alloc_dflt;
2570
2571 #ifdef CONFIG_SYSCTL
2572                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2573                 if (!tbl)
2574                         goto err_alloc_ctl;
2575
2576                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2577                 tbl[0].extra1 = all;
2578                 tbl[0].extra2 = net;
2579 #endif
2580         }
2581
2582 #ifdef CONFIG_SYSCTL
2583         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2584         if (err < 0)
2585                 goto err_reg_all;
2586
2587         err = __devinet_sysctl_register(net, "default",
2588                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2589         if (err < 0)
2590                 goto err_reg_dflt;
2591
2592         err = -ENOMEM;
2593         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2594         if (!forw_hdr)
2595                 goto err_reg_ctl;
2596         net->ipv4.forw_hdr = forw_hdr;
2597 #endif
2598
2599         net->ipv4.devconf_all = all;
2600         net->ipv4.devconf_dflt = dflt;
2601         return 0;
2602
2603 #ifdef CONFIG_SYSCTL
2604 err_reg_ctl:
2605         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2606 err_reg_dflt:
2607         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2608 err_reg_all:
2609         if (tbl != ctl_forward_entry)
2610                 kfree(tbl);
2611 err_alloc_ctl:
2612 #endif
2613         if (dflt != &ipv4_devconf_dflt)
2614                 kfree(dflt);
2615 err_alloc_dflt:
2616         if (all != &ipv4_devconf)
2617                 kfree(all);
2618 err_alloc_all:
2619         return err;
2620 }
2621
2622 static __net_exit void devinet_exit_net(struct net *net)
2623 {
2624 #ifdef CONFIG_SYSCTL
2625         struct ctl_table *tbl;
2626
2627         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2628         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2629         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2630                                     NETCONFA_IFINDEX_DEFAULT);
2631         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2632                                     NETCONFA_IFINDEX_ALL);
2633         kfree(tbl);
2634 #endif
2635         kfree(net->ipv4.devconf_dflt);
2636         kfree(net->ipv4.devconf_all);
2637 }
2638
2639 static __net_initdata struct pernet_operations devinet_ops = {
2640         .init = devinet_init_net,
2641         .exit = devinet_exit_net,
2642 };
2643
2644 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2645         .family           = AF_INET,
2646         .fill_link_af     = inet_fill_link_af,
2647         .get_link_af_size = inet_get_link_af_size,
2648         .validate_link_af = inet_validate_link_af,
2649         .set_link_af      = inet_set_link_af,
2650 };
2651
2652 void __init devinet_init(void)
2653 {
2654         int i;
2655
2656         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2657                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2658
2659         register_pernet_subsys(&devinet_ops);
2660
2661         register_gifconf(PF_INET, inet_gifconf);
2662         register_netdevice_notifier(&ip_netdev_notifier);
2663
2664         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2665
2666         rtnl_af_register(&inet_af_ops);
2667
2668         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2669         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2670         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2671         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2672                       inet_netconf_dump_devconf, 0);
2673 }