Merge tag 'for-4.16-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
[linux-2.6-block.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 static struct ipv4_devconf ipv4_devconf = {
70         .data = {
71                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77         },
78 };
79
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81         .data = {
82                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89         },
90 };
91
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96         [IFA_LOCAL]             = { .type = NLA_U32 },
97         [IFA_ADDRESS]           = { .type = NLA_U32 },
98         [IFA_BROADCAST]         = { .type = NLA_U32 },
99         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
101         [IFA_FLAGS]             = { .type = NLA_U32 },
102 };
103
104 #define IN4_ADDR_HSIZE_SHIFT    8
105 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
106
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108
109 static u32 inet_addr_hash(const struct net *net, __be32 addr)
110 {
111         u32 val = (__force u32) addr ^ net_hash_mix(net);
112
113         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
114 }
115
116 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
117 {
118         u32 hash = inet_addr_hash(net, ifa->ifa_local);
119
120         ASSERT_RTNL();
121         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
122 }
123
124 static void inet_hash_remove(struct in_ifaddr *ifa)
125 {
126         ASSERT_RTNL();
127         hlist_del_init_rcu(&ifa->hash);
128 }
129
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140         struct net_device *result = NULL;
141         struct in_ifaddr *ifa;
142
143         rcu_read_lock();
144         ifa = inet_lookup_ifaddr_rcu(net, addr);
145         if (!ifa) {
146                 struct flowi4 fl4 = { .daddr = addr };
147                 struct fib_result res = { 0 };
148                 struct fib_table *local;
149
150                 /* Fallback to FIB local table so that communication
151                  * over loopback subnets work.
152                  */
153                 local = fib_get_table(net, RT_TABLE_LOCAL);
154                 if (local &&
155                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
156                     res.type == RTN_LOCAL)
157                         result = FIB_RES_DEV(res);
158         } else {
159                 result = ifa->ifa_dev->dev;
160         }
161         if (result && devref)
162                 dev_hold(result);
163         rcu_read_unlock();
164         return result;
165 }
166 EXPORT_SYMBOL(__ip_dev_find);
167
168 /* called under RCU lock */
169 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
170 {
171         u32 hash = inet_addr_hash(net, addr);
172         struct in_ifaddr *ifa;
173
174         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
175                 if (ifa->ifa_local == addr &&
176                     net_eq(dev_net(ifa->ifa_dev->dev), net))
177                         return ifa;
178
179         return NULL;
180 }
181
182 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
183
184 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
185 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
186 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
187                          int destroy);
188 #ifdef CONFIG_SYSCTL
189 static int devinet_sysctl_register(struct in_device *idev);
190 static void devinet_sysctl_unregister(struct in_device *idev);
191 #else
192 static int devinet_sysctl_register(struct in_device *idev)
193 {
194         return 0;
195 }
196 static void devinet_sysctl_unregister(struct in_device *idev)
197 {
198 }
199 #endif
200
201 /* Locks all the inet devices. */
202
203 static struct in_ifaddr *inet_alloc_ifa(void)
204 {
205         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
206 }
207
208 static void inet_rcu_free_ifa(struct rcu_head *head)
209 {
210         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
211         if (ifa->ifa_dev)
212                 in_dev_put(ifa->ifa_dev);
213         kfree(ifa);
214 }
215
216 static void inet_free_ifa(struct in_ifaddr *ifa)
217 {
218         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
219 }
220
221 void in_dev_finish_destroy(struct in_device *idev)
222 {
223         struct net_device *dev = idev->dev;
224
225         WARN_ON(idev->ifa_list);
226         WARN_ON(idev->mc_list);
227         kfree(rcu_dereference_protected(idev->mc_hash, 1));
228 #ifdef NET_REFCNT_DEBUG
229         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
230 #endif
231         dev_put(dev);
232         if (!idev->dead)
233                 pr_err("Freeing alive in_device %p\n", idev);
234         else
235                 kfree(idev);
236 }
237 EXPORT_SYMBOL(in_dev_finish_destroy);
238
239 static struct in_device *inetdev_init(struct net_device *dev)
240 {
241         struct in_device *in_dev;
242         int err = -ENOMEM;
243
244         ASSERT_RTNL();
245
246         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
247         if (!in_dev)
248                 goto out;
249         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
250                         sizeof(in_dev->cnf));
251         in_dev->cnf.sysctl = NULL;
252         in_dev->dev = dev;
253         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
254         if (!in_dev->arp_parms)
255                 goto out_kfree;
256         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
257                 dev_disable_lro(dev);
258         /* Reference in_dev->dev */
259         dev_hold(dev);
260         /* Account for reference dev->ip_ptr (below) */
261         refcount_set(&in_dev->refcnt, 1);
262
263         err = devinet_sysctl_register(in_dev);
264         if (err) {
265                 in_dev->dead = 1;
266                 in_dev_put(in_dev);
267                 in_dev = NULL;
268                 goto out;
269         }
270         ip_mc_init_dev(in_dev);
271         if (dev->flags & IFF_UP)
272                 ip_mc_up(in_dev);
273
274         /* we can receive as soon as ip_ptr is set -- do this last */
275         rcu_assign_pointer(dev->ip_ptr, in_dev);
276 out:
277         return in_dev ?: ERR_PTR(err);
278 out_kfree:
279         kfree(in_dev);
280         in_dev = NULL;
281         goto out;
282 }
283
284 static void in_dev_rcu_put(struct rcu_head *head)
285 {
286         struct in_device *idev = container_of(head, struct in_device, rcu_head);
287         in_dev_put(idev);
288 }
289
290 static void inetdev_destroy(struct in_device *in_dev)
291 {
292         struct in_ifaddr *ifa;
293         struct net_device *dev;
294
295         ASSERT_RTNL();
296
297         dev = in_dev->dev;
298
299         in_dev->dead = 1;
300
301         ip_mc_destroy_dev(in_dev);
302
303         while ((ifa = in_dev->ifa_list) != NULL) {
304                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
305                 inet_free_ifa(ifa);
306         }
307
308         RCU_INIT_POINTER(dev->ip_ptr, NULL);
309
310         devinet_sysctl_unregister(in_dev);
311         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
312         arp_ifdown(dev);
313
314         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
315 }
316
317 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
318 {
319         rcu_read_lock();
320         for_primary_ifa(in_dev) {
321                 if (inet_ifa_match(a, ifa)) {
322                         if (!b || inet_ifa_match(b, ifa)) {
323                                 rcu_read_unlock();
324                                 return 1;
325                         }
326                 }
327         } endfor_ifa(in_dev);
328         rcu_read_unlock();
329         return 0;
330 }
331
332 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
333                          int destroy, struct nlmsghdr *nlh, u32 portid)
334 {
335         struct in_ifaddr *promote = NULL;
336         struct in_ifaddr *ifa, *ifa1 = *ifap;
337         struct in_ifaddr *last_prim = in_dev->ifa_list;
338         struct in_ifaddr *prev_prom = NULL;
339         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
340
341         ASSERT_RTNL();
342
343         if (in_dev->dead)
344                 goto no_promotions;
345
346         /* 1. Deleting primary ifaddr forces deletion all secondaries
347          * unless alias promotion is set
348          **/
349
350         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
351                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
352
353                 while ((ifa = *ifap1) != NULL) {
354                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
355                             ifa1->ifa_scope <= ifa->ifa_scope)
356                                 last_prim = ifa;
357
358                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
359                             ifa1->ifa_mask != ifa->ifa_mask ||
360                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
361                                 ifap1 = &ifa->ifa_next;
362                                 prev_prom = ifa;
363                                 continue;
364                         }
365
366                         if (!do_promote) {
367                                 inet_hash_remove(ifa);
368                                 *ifap1 = ifa->ifa_next;
369
370                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
371                                 blocking_notifier_call_chain(&inetaddr_chain,
372                                                 NETDEV_DOWN, ifa);
373                                 inet_free_ifa(ifa);
374                         } else {
375                                 promote = ifa;
376                                 break;
377                         }
378                 }
379         }
380
381         /* On promotion all secondaries from subnet are changing
382          * the primary IP, we must remove all their routes silently
383          * and later to add them back with new prefsrc. Do this
384          * while all addresses are on the device list.
385          */
386         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
387                 if (ifa1->ifa_mask == ifa->ifa_mask &&
388                     inet_ifa_match(ifa1->ifa_address, ifa))
389                         fib_del_ifaddr(ifa, ifa1);
390         }
391
392 no_promotions:
393         /* 2. Unlink it */
394
395         *ifap = ifa1->ifa_next;
396         inet_hash_remove(ifa1);
397
398         /* 3. Announce address deletion */
399
400         /* Send message first, then call notifier.
401            At first sight, FIB update triggered by notifier
402            will refer to already deleted ifaddr, that could confuse
403            netlink listeners. It is not true: look, gated sees
404            that route deleted and if it still thinks that ifaddr
405            is valid, it will try to restore deleted routes... Grr.
406            So that, this order is correct.
407          */
408         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
409         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
410
411         if (promote) {
412                 struct in_ifaddr *next_sec = promote->ifa_next;
413
414                 if (prev_prom) {
415                         prev_prom->ifa_next = promote->ifa_next;
416                         promote->ifa_next = last_prim->ifa_next;
417                         last_prim->ifa_next = promote;
418                 }
419
420                 promote->ifa_flags &= ~IFA_F_SECONDARY;
421                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
422                 blocking_notifier_call_chain(&inetaddr_chain,
423                                 NETDEV_UP, promote);
424                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
425                         if (ifa1->ifa_mask != ifa->ifa_mask ||
426                             !inet_ifa_match(ifa1->ifa_address, ifa))
427                                         continue;
428                         fib_add_ifaddr(ifa);
429                 }
430
431         }
432         if (destroy)
433                 inet_free_ifa(ifa1);
434 }
435
436 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
437                          int destroy)
438 {
439         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
440 }
441
442 static void check_lifetime(struct work_struct *work);
443
444 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
445
446 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
447                              u32 portid, struct netlink_ext_ack *extack)
448 {
449         struct in_device *in_dev = ifa->ifa_dev;
450         struct in_ifaddr *ifa1, **ifap, **last_primary;
451         struct in_validator_info ivi;
452         int ret;
453
454         ASSERT_RTNL();
455
456         if (!ifa->ifa_local) {
457                 inet_free_ifa(ifa);
458                 return 0;
459         }
460
461         ifa->ifa_flags &= ~IFA_F_SECONDARY;
462         last_primary = &in_dev->ifa_list;
463
464         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
465              ifap = &ifa1->ifa_next) {
466                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
467                     ifa->ifa_scope <= ifa1->ifa_scope)
468                         last_primary = &ifa1->ifa_next;
469                 if (ifa1->ifa_mask == ifa->ifa_mask &&
470                     inet_ifa_match(ifa1->ifa_address, ifa)) {
471                         if (ifa1->ifa_local == ifa->ifa_local) {
472                                 inet_free_ifa(ifa);
473                                 return -EEXIST;
474                         }
475                         if (ifa1->ifa_scope != ifa->ifa_scope) {
476                                 inet_free_ifa(ifa);
477                                 return -EINVAL;
478                         }
479                         ifa->ifa_flags |= IFA_F_SECONDARY;
480                 }
481         }
482
483         /* Allow any devices that wish to register ifaddr validtors to weigh
484          * in now, before changes are committed.  The rntl lock is serializing
485          * access here, so the state should not change between a validator call
486          * and a final notify on commit.  This isn't invoked on promotion under
487          * the assumption that validators are checking the address itself, and
488          * not the flags.
489          */
490         ivi.ivi_addr = ifa->ifa_address;
491         ivi.ivi_dev = ifa->ifa_dev;
492         ivi.extack = extack;
493         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
494                                            NETDEV_UP, &ivi);
495         ret = notifier_to_errno(ret);
496         if (ret) {
497                 inet_free_ifa(ifa);
498                 return ret;
499         }
500
501         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
502                 prandom_seed((__force u32) ifa->ifa_local);
503                 ifap = last_primary;
504         }
505
506         ifa->ifa_next = *ifap;
507         *ifap = ifa;
508
509         inet_hash_insert(dev_net(in_dev->dev), ifa);
510
511         cancel_delayed_work(&check_lifetime_work);
512         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
513
514         /* Send message first, then call notifier.
515            Notifier will trigger FIB update, so that
516            listeners of netlink will know about new ifaddr */
517         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
518         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
519
520         return 0;
521 }
522
523 static int inet_insert_ifa(struct in_ifaddr *ifa)
524 {
525         return __inet_insert_ifa(ifa, NULL, 0, NULL);
526 }
527
528 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
529 {
530         struct in_device *in_dev = __in_dev_get_rtnl(dev);
531
532         ASSERT_RTNL();
533
534         if (!in_dev) {
535                 inet_free_ifa(ifa);
536                 return -ENOBUFS;
537         }
538         ipv4_devconf_setall(in_dev);
539         neigh_parms_data_state_setall(in_dev->arp_parms);
540         if (ifa->ifa_dev != in_dev) {
541                 WARN_ON(ifa->ifa_dev);
542                 in_dev_hold(in_dev);
543                 ifa->ifa_dev = in_dev;
544         }
545         if (ipv4_is_loopback(ifa->ifa_local))
546                 ifa->ifa_scope = RT_SCOPE_HOST;
547         return inet_insert_ifa(ifa);
548 }
549
550 /* Caller must hold RCU or RTNL :
551  * We dont take a reference on found in_device
552  */
553 struct in_device *inetdev_by_index(struct net *net, int ifindex)
554 {
555         struct net_device *dev;
556         struct in_device *in_dev = NULL;
557
558         rcu_read_lock();
559         dev = dev_get_by_index_rcu(net, ifindex);
560         if (dev)
561                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
562         rcu_read_unlock();
563         return in_dev;
564 }
565 EXPORT_SYMBOL(inetdev_by_index);
566
567 /* Called only from RTNL semaphored context. No locks. */
568
569 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
570                                     __be32 mask)
571 {
572         ASSERT_RTNL();
573
574         for_primary_ifa(in_dev) {
575                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
576                         return ifa;
577         } endfor_ifa(in_dev);
578         return NULL;
579 }
580
581 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
582 {
583         struct ip_mreqn mreq = {
584                 .imr_multiaddr.s_addr = ifa->ifa_address,
585                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
586         };
587         int ret;
588
589         ASSERT_RTNL();
590
591         lock_sock(sk);
592         if (join)
593                 ret = ip_mc_join_group(sk, &mreq);
594         else
595                 ret = ip_mc_leave_group(sk, &mreq);
596         release_sock(sk);
597
598         return ret;
599 }
600
601 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
602                             struct netlink_ext_ack *extack)
603 {
604         struct net *net = sock_net(skb->sk);
605         struct nlattr *tb[IFA_MAX+1];
606         struct in_device *in_dev;
607         struct ifaddrmsg *ifm;
608         struct in_ifaddr *ifa, **ifap;
609         int err = -EINVAL;
610
611         ASSERT_RTNL();
612
613         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
614                           extack);
615         if (err < 0)
616                 goto errout;
617
618         ifm = nlmsg_data(nlh);
619         in_dev = inetdev_by_index(net, ifm->ifa_index);
620         if (!in_dev) {
621                 err = -ENODEV;
622                 goto errout;
623         }
624
625         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
626              ifap = &ifa->ifa_next) {
627                 if (tb[IFA_LOCAL] &&
628                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
629                         continue;
630
631                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
632                         continue;
633
634                 if (tb[IFA_ADDRESS] &&
635                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
636                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
637                         continue;
638
639                 if (ipv4_is_multicast(ifa->ifa_address))
640                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
641                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
642                 return 0;
643         }
644
645         err = -EADDRNOTAVAIL;
646 errout:
647         return err;
648 }
649
650 #define INFINITY_LIFE_TIME      0xFFFFFFFF
651
652 static void check_lifetime(struct work_struct *work)
653 {
654         unsigned long now, next, next_sec, next_sched;
655         struct in_ifaddr *ifa;
656         struct hlist_node *n;
657         int i;
658
659         now = jiffies;
660         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
661
662         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
663                 bool change_needed = false;
664
665                 rcu_read_lock();
666                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
667                         unsigned long age;
668
669                         if (ifa->ifa_flags & IFA_F_PERMANENT)
670                                 continue;
671
672                         /* We try to batch several events at once. */
673                         age = (now - ifa->ifa_tstamp +
674                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
675
676                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
677                             age >= ifa->ifa_valid_lft) {
678                                 change_needed = true;
679                         } else if (ifa->ifa_preferred_lft ==
680                                    INFINITY_LIFE_TIME) {
681                                 continue;
682                         } else if (age >= ifa->ifa_preferred_lft) {
683                                 if (time_before(ifa->ifa_tstamp +
684                                                 ifa->ifa_valid_lft * HZ, next))
685                                         next = ifa->ifa_tstamp +
686                                                ifa->ifa_valid_lft * HZ;
687
688                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
689                                         change_needed = true;
690                         } else if (time_before(ifa->ifa_tstamp +
691                                                ifa->ifa_preferred_lft * HZ,
692                                                next)) {
693                                 next = ifa->ifa_tstamp +
694                                        ifa->ifa_preferred_lft * HZ;
695                         }
696                 }
697                 rcu_read_unlock();
698                 if (!change_needed)
699                         continue;
700                 rtnl_lock();
701                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
702                         unsigned long age;
703
704                         if (ifa->ifa_flags & IFA_F_PERMANENT)
705                                 continue;
706
707                         /* We try to batch several events at once. */
708                         age = (now - ifa->ifa_tstamp +
709                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
710
711                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
712                             age >= ifa->ifa_valid_lft) {
713                                 struct in_ifaddr **ifap;
714
715                                 for (ifap = &ifa->ifa_dev->ifa_list;
716                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
717                                         if (*ifap == ifa) {
718                                                 inet_del_ifa(ifa->ifa_dev,
719                                                              ifap, 1);
720                                                 break;
721                                         }
722                                 }
723                         } else if (ifa->ifa_preferred_lft !=
724                                    INFINITY_LIFE_TIME &&
725                                    age >= ifa->ifa_preferred_lft &&
726                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
727                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
728                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
729                         }
730                 }
731                 rtnl_unlock();
732         }
733
734         next_sec = round_jiffies_up(next);
735         next_sched = next;
736
737         /* If rounded timeout is accurate enough, accept it. */
738         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
739                 next_sched = next_sec;
740
741         now = jiffies;
742         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
743         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
744                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
745
746         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
747                         next_sched - now);
748 }
749
750 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
751                              __u32 prefered_lft)
752 {
753         unsigned long timeout;
754
755         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
756
757         timeout = addrconf_timeout_fixup(valid_lft, HZ);
758         if (addrconf_finite_timeout(timeout))
759                 ifa->ifa_valid_lft = timeout;
760         else
761                 ifa->ifa_flags |= IFA_F_PERMANENT;
762
763         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
764         if (addrconf_finite_timeout(timeout)) {
765                 if (timeout == 0)
766                         ifa->ifa_flags |= IFA_F_DEPRECATED;
767                 ifa->ifa_preferred_lft = timeout;
768         }
769         ifa->ifa_tstamp = jiffies;
770         if (!ifa->ifa_cstamp)
771                 ifa->ifa_cstamp = ifa->ifa_tstamp;
772 }
773
774 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
775                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
776 {
777         struct nlattr *tb[IFA_MAX+1];
778         struct in_ifaddr *ifa;
779         struct ifaddrmsg *ifm;
780         struct net_device *dev;
781         struct in_device *in_dev;
782         int err;
783
784         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
785                           NULL);
786         if (err < 0)
787                 goto errout;
788
789         ifm = nlmsg_data(nlh);
790         err = -EINVAL;
791         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
792                 goto errout;
793
794         dev = __dev_get_by_index(net, ifm->ifa_index);
795         err = -ENODEV;
796         if (!dev)
797                 goto errout;
798
799         in_dev = __in_dev_get_rtnl(dev);
800         err = -ENOBUFS;
801         if (!in_dev)
802                 goto errout;
803
804         ifa = inet_alloc_ifa();
805         if (!ifa)
806                 /*
807                  * A potential indev allocation can be left alive, it stays
808                  * assigned to its device and is destroy with it.
809                  */
810                 goto errout;
811
812         ipv4_devconf_setall(in_dev);
813         neigh_parms_data_state_setall(in_dev->arp_parms);
814         in_dev_hold(in_dev);
815
816         if (!tb[IFA_ADDRESS])
817                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
818
819         INIT_HLIST_NODE(&ifa->hash);
820         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
821         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
822         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
823                                          ifm->ifa_flags;
824         ifa->ifa_scope = ifm->ifa_scope;
825         ifa->ifa_dev = in_dev;
826
827         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
828         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
829
830         if (tb[IFA_BROADCAST])
831                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
832
833         if (tb[IFA_LABEL])
834                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
835         else
836                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
837
838         if (tb[IFA_CACHEINFO]) {
839                 struct ifa_cacheinfo *ci;
840
841                 ci = nla_data(tb[IFA_CACHEINFO]);
842                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
843                         err = -EINVAL;
844                         goto errout_free;
845                 }
846                 *pvalid_lft = ci->ifa_valid;
847                 *pprefered_lft = ci->ifa_prefered;
848         }
849
850         return ifa;
851
852 errout_free:
853         inet_free_ifa(ifa);
854 errout:
855         return ERR_PTR(err);
856 }
857
858 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
859 {
860         struct in_device *in_dev = ifa->ifa_dev;
861         struct in_ifaddr *ifa1, **ifap;
862
863         if (!ifa->ifa_local)
864                 return NULL;
865
866         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
867              ifap = &ifa1->ifa_next) {
868                 if (ifa1->ifa_mask == ifa->ifa_mask &&
869                     inet_ifa_match(ifa1->ifa_address, ifa) &&
870                     ifa1->ifa_local == ifa->ifa_local)
871                         return ifa1;
872         }
873         return NULL;
874 }
875
876 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
877                             struct netlink_ext_ack *extack)
878 {
879         struct net *net = sock_net(skb->sk);
880         struct in_ifaddr *ifa;
881         struct in_ifaddr *ifa_existing;
882         __u32 valid_lft = INFINITY_LIFE_TIME;
883         __u32 prefered_lft = INFINITY_LIFE_TIME;
884
885         ASSERT_RTNL();
886
887         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
888         if (IS_ERR(ifa))
889                 return PTR_ERR(ifa);
890
891         ifa_existing = find_matching_ifa(ifa);
892         if (!ifa_existing) {
893                 /* It would be best to check for !NLM_F_CREATE here but
894                  * userspace already relies on not having to provide this.
895                  */
896                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
897                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
898                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
899                                                true, ifa);
900
901                         if (ret < 0) {
902                                 inet_free_ifa(ifa);
903                                 return ret;
904                         }
905                 }
906                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
907                                          extack);
908         } else {
909                 inet_free_ifa(ifa);
910
911                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
912                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
913                         return -EEXIST;
914                 ifa = ifa_existing;
915                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
916                 cancel_delayed_work(&check_lifetime_work);
917                 queue_delayed_work(system_power_efficient_wq,
918                                 &check_lifetime_work, 0);
919                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
920         }
921         return 0;
922 }
923
924 /*
925  *      Determine a default network mask, based on the IP address.
926  */
927
928 static int inet_abc_len(__be32 addr)
929 {
930         int rc = -1;    /* Something else, probably a multicast. */
931
932         if (ipv4_is_zeronet(addr))
933                 rc = 0;
934         else {
935                 __u32 haddr = ntohl(addr);
936
937                 if (IN_CLASSA(haddr))
938                         rc = 8;
939                 else if (IN_CLASSB(haddr))
940                         rc = 16;
941                 else if (IN_CLASSC(haddr))
942                         rc = 24;
943         }
944
945         return rc;
946 }
947
948
949 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
950 {
951         struct sockaddr_in sin_orig;
952         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
953         struct in_device *in_dev;
954         struct in_ifaddr **ifap = NULL;
955         struct in_ifaddr *ifa = NULL;
956         struct net_device *dev;
957         char *colon;
958         int ret = -EFAULT;
959         int tryaddrmatch = 0;
960
961         ifr->ifr_name[IFNAMSIZ - 1] = 0;
962
963         /* save original address for comparison */
964         memcpy(&sin_orig, sin, sizeof(*sin));
965
966         colon = strchr(ifr->ifr_name, ':');
967         if (colon)
968                 *colon = 0;
969
970         dev_load(net, ifr->ifr_name);
971
972         switch (cmd) {
973         case SIOCGIFADDR:       /* Get interface address */
974         case SIOCGIFBRDADDR:    /* Get the broadcast address */
975         case SIOCGIFDSTADDR:    /* Get the destination address */
976         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
977                 /* Note that these ioctls will not sleep,
978                    so that we do not impose a lock.
979                    One day we will be forced to put shlock here (I mean SMP)
980                  */
981                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
982                 memset(sin, 0, sizeof(*sin));
983                 sin->sin_family = AF_INET;
984                 break;
985
986         case SIOCSIFFLAGS:
987                 ret = -EPERM;
988                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
989                         goto out;
990                 break;
991         case SIOCSIFADDR:       /* Set interface address (and family) */
992         case SIOCSIFBRDADDR:    /* Set the broadcast address */
993         case SIOCSIFDSTADDR:    /* Set the destination address */
994         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
995                 ret = -EPERM;
996                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
997                         goto out;
998                 ret = -EINVAL;
999                 if (sin->sin_family != AF_INET)
1000                         goto out;
1001                 break;
1002         default:
1003                 ret = -EINVAL;
1004                 goto out;
1005         }
1006
1007         rtnl_lock();
1008
1009         ret = -ENODEV;
1010         dev = __dev_get_by_name(net, ifr->ifr_name);
1011         if (!dev)
1012                 goto done;
1013
1014         if (colon)
1015                 *colon = ':';
1016
1017         in_dev = __in_dev_get_rtnl(dev);
1018         if (in_dev) {
1019                 if (tryaddrmatch) {
1020                         /* Matthias Andree */
1021                         /* compare label and address (4.4BSD style) */
1022                         /* note: we only do this for a limited set of ioctls
1023                            and only if the original address family was AF_INET.
1024                            This is checked above. */
1025                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1026                              ifap = &ifa->ifa_next) {
1027                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1028                                     sin_orig.sin_addr.s_addr ==
1029                                                         ifa->ifa_local) {
1030                                         break; /* found */
1031                                 }
1032                         }
1033                 }
1034                 /* we didn't get a match, maybe the application is
1035                    4.3BSD-style and passed in junk so we fall back to
1036                    comparing just the label */
1037                 if (!ifa) {
1038                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1039                              ifap = &ifa->ifa_next)
1040                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1041                                         break;
1042                 }
1043         }
1044
1045         ret = -EADDRNOTAVAIL;
1046         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1047                 goto done;
1048
1049         switch (cmd) {
1050         case SIOCGIFADDR:       /* Get interface address */
1051                 ret = 0;
1052                 sin->sin_addr.s_addr = ifa->ifa_local;
1053                 break;
1054
1055         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1056                 ret = 0;
1057                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1058                 break;
1059
1060         case SIOCGIFDSTADDR:    /* Get the destination address */
1061                 ret = 0;
1062                 sin->sin_addr.s_addr = ifa->ifa_address;
1063                 break;
1064
1065         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1066                 ret = 0;
1067                 sin->sin_addr.s_addr = ifa->ifa_mask;
1068                 break;
1069
1070         case SIOCSIFFLAGS:
1071                 if (colon) {
1072                         ret = -EADDRNOTAVAIL;
1073                         if (!ifa)
1074                                 break;
1075                         ret = 0;
1076                         if (!(ifr->ifr_flags & IFF_UP))
1077                                 inet_del_ifa(in_dev, ifap, 1);
1078                         break;
1079                 }
1080                 ret = dev_change_flags(dev, ifr->ifr_flags);
1081                 break;
1082
1083         case SIOCSIFADDR:       /* Set interface address (and family) */
1084                 ret = -EINVAL;
1085                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1086                         break;
1087
1088                 if (!ifa) {
1089                         ret = -ENOBUFS;
1090                         ifa = inet_alloc_ifa();
1091                         if (!ifa)
1092                                 break;
1093                         INIT_HLIST_NODE(&ifa->hash);
1094                         if (colon)
1095                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1096                         else
1097                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1098                 } else {
1099                         ret = 0;
1100                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1101                                 break;
1102                         inet_del_ifa(in_dev, ifap, 0);
1103                         ifa->ifa_broadcast = 0;
1104                         ifa->ifa_scope = 0;
1105                 }
1106
1107                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1108
1109                 if (!(dev->flags & IFF_POINTOPOINT)) {
1110                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1111                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1112                         if ((dev->flags & IFF_BROADCAST) &&
1113                             ifa->ifa_prefixlen < 31)
1114                                 ifa->ifa_broadcast = ifa->ifa_address |
1115                                                      ~ifa->ifa_mask;
1116                 } else {
1117                         ifa->ifa_prefixlen = 32;
1118                         ifa->ifa_mask = inet_make_mask(32);
1119                 }
1120                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1121                 ret = inet_set_ifa(dev, ifa);
1122                 break;
1123
1124         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1125                 ret = 0;
1126                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1127                         inet_del_ifa(in_dev, ifap, 0);
1128                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1129                         inet_insert_ifa(ifa);
1130                 }
1131                 break;
1132
1133         case SIOCSIFDSTADDR:    /* Set the destination address */
1134                 ret = 0;
1135                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1136                         break;
1137                 ret = -EINVAL;
1138                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1139                         break;
1140                 ret = 0;
1141                 inet_del_ifa(in_dev, ifap, 0);
1142                 ifa->ifa_address = sin->sin_addr.s_addr;
1143                 inet_insert_ifa(ifa);
1144                 break;
1145
1146         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1147
1148                 /*
1149                  *      The mask we set must be legal.
1150                  */
1151                 ret = -EINVAL;
1152                 if (bad_mask(sin->sin_addr.s_addr, 0))
1153                         break;
1154                 ret = 0;
1155                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1156                         __be32 old_mask = ifa->ifa_mask;
1157                         inet_del_ifa(in_dev, ifap, 0);
1158                         ifa->ifa_mask = sin->sin_addr.s_addr;
1159                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1160
1161                         /* See if current broadcast address matches
1162                          * with current netmask, then recalculate
1163                          * the broadcast address. Otherwise it's a
1164                          * funny address, so don't touch it since
1165                          * the user seems to know what (s)he's doing...
1166                          */
1167                         if ((dev->flags & IFF_BROADCAST) &&
1168                             (ifa->ifa_prefixlen < 31) &&
1169                             (ifa->ifa_broadcast ==
1170                              (ifa->ifa_local|~old_mask))) {
1171                                 ifa->ifa_broadcast = (ifa->ifa_local |
1172                                                       ~sin->sin_addr.s_addr);
1173                         }
1174                         inet_insert_ifa(ifa);
1175                 }
1176                 break;
1177         }
1178 done:
1179         rtnl_unlock();
1180 out:
1181         return ret;
1182 }
1183
1184 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1185 {
1186         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1187         struct in_ifaddr *ifa;
1188         struct ifreq ifr;
1189         int done = 0;
1190
1191         if (WARN_ON(size > sizeof(struct ifreq)))
1192                 goto out;
1193
1194         if (!in_dev)
1195                 goto out;
1196
1197         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1198                 if (!buf) {
1199                         done += size;
1200                         continue;
1201                 }
1202                 if (len < size)
1203                         break;
1204                 memset(&ifr, 0, sizeof(struct ifreq));
1205                 strcpy(ifr.ifr_name, ifa->ifa_label);
1206
1207                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1208                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1209                                                                 ifa->ifa_local;
1210
1211                 if (copy_to_user(buf + done, &ifr, size)) {
1212                         done = -EFAULT;
1213                         break;
1214                 }
1215                 len  -= size;
1216                 done += size;
1217         }
1218 out:
1219         return done;
1220 }
1221
1222 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1223                                  int scope)
1224 {
1225         for_primary_ifa(in_dev) {
1226                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1227                     ifa->ifa_scope <= scope)
1228                         return ifa->ifa_local;
1229         } endfor_ifa(in_dev);
1230
1231         return 0;
1232 }
1233
1234 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1235 {
1236         __be32 addr = 0;
1237         struct in_device *in_dev;
1238         struct net *net = dev_net(dev);
1239         int master_idx;
1240
1241         rcu_read_lock();
1242         in_dev = __in_dev_get_rcu(dev);
1243         if (!in_dev)
1244                 goto no_in_dev;
1245
1246         for_primary_ifa(in_dev) {
1247                 if (ifa->ifa_scope > scope)
1248                         continue;
1249                 if (!dst || inet_ifa_match(dst, ifa)) {
1250                         addr = ifa->ifa_local;
1251                         break;
1252                 }
1253                 if (!addr)
1254                         addr = ifa->ifa_local;
1255         } endfor_ifa(in_dev);
1256
1257         if (addr)
1258                 goto out_unlock;
1259 no_in_dev:
1260         master_idx = l3mdev_master_ifindex_rcu(dev);
1261
1262         /* For VRFs, the VRF device takes the place of the loopback device,
1263          * with addresses on it being preferred.  Note in such cases the
1264          * loopback device will be among the devices that fail the master_idx
1265          * equality check in the loop below.
1266          */
1267         if (master_idx &&
1268             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1269             (in_dev = __in_dev_get_rcu(dev))) {
1270                 addr = in_dev_select_addr(in_dev, scope);
1271                 if (addr)
1272                         goto out_unlock;
1273         }
1274
1275         /* Not loopback addresses on loopback should be preferred
1276            in this case. It is important that lo is the first interface
1277            in dev_base list.
1278          */
1279         for_each_netdev_rcu(net, dev) {
1280                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1281                         continue;
1282
1283                 in_dev = __in_dev_get_rcu(dev);
1284                 if (!in_dev)
1285                         continue;
1286
1287                 addr = in_dev_select_addr(in_dev, scope);
1288                 if (addr)
1289                         goto out_unlock;
1290         }
1291 out_unlock:
1292         rcu_read_unlock();
1293         return addr;
1294 }
1295 EXPORT_SYMBOL(inet_select_addr);
1296
1297 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1298                               __be32 local, int scope)
1299 {
1300         int same = 0;
1301         __be32 addr = 0;
1302
1303         for_ifa(in_dev) {
1304                 if (!addr &&
1305                     (local == ifa->ifa_local || !local) &&
1306                     ifa->ifa_scope <= scope) {
1307                         addr = ifa->ifa_local;
1308                         if (same)
1309                                 break;
1310                 }
1311                 if (!same) {
1312                         same = (!local || inet_ifa_match(local, ifa)) &&
1313                                 (!dst || inet_ifa_match(dst, ifa));
1314                         if (same && addr) {
1315                                 if (local || !dst)
1316                                         break;
1317                                 /* Is the selected addr into dst subnet? */
1318                                 if (inet_ifa_match(addr, ifa))
1319                                         break;
1320                                 /* No, then can we use new local src? */
1321                                 if (ifa->ifa_scope <= scope) {
1322                                         addr = ifa->ifa_local;
1323                                         break;
1324                                 }
1325                                 /* search for large dst subnet for addr */
1326                                 same = 0;
1327                         }
1328                 }
1329         } endfor_ifa(in_dev);
1330
1331         return same ? addr : 0;
1332 }
1333
1334 /*
1335  * Confirm that local IP address exists using wildcards:
1336  * - net: netns to check, cannot be NULL
1337  * - in_dev: only on this interface, NULL=any interface
1338  * - dst: only in the same subnet as dst, 0=any dst
1339  * - local: address, 0=autoselect the local address
1340  * - scope: maximum allowed scope value for the local address
1341  */
1342 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1343                          __be32 dst, __be32 local, int scope)
1344 {
1345         __be32 addr = 0;
1346         struct net_device *dev;
1347
1348         if (in_dev)
1349                 return confirm_addr_indev(in_dev, dst, local, scope);
1350
1351         rcu_read_lock();
1352         for_each_netdev_rcu(net, dev) {
1353                 in_dev = __in_dev_get_rcu(dev);
1354                 if (in_dev) {
1355                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1356                         if (addr)
1357                                 break;
1358                 }
1359         }
1360         rcu_read_unlock();
1361
1362         return addr;
1363 }
1364 EXPORT_SYMBOL(inet_confirm_addr);
1365
1366 /*
1367  *      Device notifier
1368  */
1369
1370 int register_inetaddr_notifier(struct notifier_block *nb)
1371 {
1372         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1373 }
1374 EXPORT_SYMBOL(register_inetaddr_notifier);
1375
1376 int unregister_inetaddr_notifier(struct notifier_block *nb)
1377 {
1378         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1379 }
1380 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1381
1382 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1383 {
1384         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1385 }
1386 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1387
1388 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1389 {
1390         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1391             nb);
1392 }
1393 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1394
1395 /* Rename ifa_labels for a device name change. Make some effort to preserve
1396  * existing alias numbering and to create unique labels if possible.
1397 */
1398 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1399 {
1400         struct in_ifaddr *ifa;
1401         int named = 0;
1402
1403         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1404                 char old[IFNAMSIZ], *dot;
1405
1406                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1407                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1408                 if (named++ == 0)
1409                         goto skip;
1410                 dot = strchr(old, ':');
1411                 if (!dot) {
1412                         sprintf(old, ":%d", named);
1413                         dot = old;
1414                 }
1415                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1416                         strcat(ifa->ifa_label, dot);
1417                 else
1418                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1419 skip:
1420                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1421         }
1422 }
1423
1424 static bool inetdev_valid_mtu(unsigned int mtu)
1425 {
1426         return mtu >= IPV4_MIN_MTU;
1427 }
1428
1429 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1430                                         struct in_device *in_dev)
1431
1432 {
1433         struct in_ifaddr *ifa;
1434
1435         for (ifa = in_dev->ifa_list; ifa;
1436              ifa = ifa->ifa_next) {
1437                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1438                          ifa->ifa_local, dev,
1439                          ifa->ifa_local, NULL,
1440                          dev->dev_addr, NULL);
1441         }
1442 }
1443
1444 /* Called only under RTNL semaphore */
1445
1446 static int inetdev_event(struct notifier_block *this, unsigned long event,
1447                          void *ptr)
1448 {
1449         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1450         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1451
1452         ASSERT_RTNL();
1453
1454         if (!in_dev) {
1455                 if (event == NETDEV_REGISTER) {
1456                         in_dev = inetdev_init(dev);
1457                         if (IS_ERR(in_dev))
1458                                 return notifier_from_errno(PTR_ERR(in_dev));
1459                         if (dev->flags & IFF_LOOPBACK) {
1460                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1461                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1462                         }
1463                 } else if (event == NETDEV_CHANGEMTU) {
1464                         /* Re-enabling IP */
1465                         if (inetdev_valid_mtu(dev->mtu))
1466                                 in_dev = inetdev_init(dev);
1467                 }
1468                 goto out;
1469         }
1470
1471         switch (event) {
1472         case NETDEV_REGISTER:
1473                 pr_debug("%s: bug\n", __func__);
1474                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1475                 break;
1476         case NETDEV_UP:
1477                 if (!inetdev_valid_mtu(dev->mtu))
1478                         break;
1479                 if (dev->flags & IFF_LOOPBACK) {
1480                         struct in_ifaddr *ifa = inet_alloc_ifa();
1481
1482                         if (ifa) {
1483                                 INIT_HLIST_NODE(&ifa->hash);
1484                                 ifa->ifa_local =
1485                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1486                                 ifa->ifa_prefixlen = 8;
1487                                 ifa->ifa_mask = inet_make_mask(8);
1488                                 in_dev_hold(in_dev);
1489                                 ifa->ifa_dev = in_dev;
1490                                 ifa->ifa_scope = RT_SCOPE_HOST;
1491                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1492                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1493                                                  INFINITY_LIFE_TIME);
1494                                 ipv4_devconf_setall(in_dev);
1495                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1496                                 inet_insert_ifa(ifa);
1497                         }
1498                 }
1499                 ip_mc_up(in_dev);
1500                 /* fall through */
1501         case NETDEV_CHANGEADDR:
1502                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1503                         break;
1504                 /* fall through */
1505         case NETDEV_NOTIFY_PEERS:
1506                 /* Send gratuitous ARP to notify of link change */
1507                 inetdev_send_gratuitous_arp(dev, in_dev);
1508                 break;
1509         case NETDEV_DOWN:
1510                 ip_mc_down(in_dev);
1511                 break;
1512         case NETDEV_PRE_TYPE_CHANGE:
1513                 ip_mc_unmap(in_dev);
1514                 break;
1515         case NETDEV_POST_TYPE_CHANGE:
1516                 ip_mc_remap(in_dev);
1517                 break;
1518         case NETDEV_CHANGEMTU:
1519                 if (inetdev_valid_mtu(dev->mtu))
1520                         break;
1521                 /* disable IP when MTU is not enough */
1522                 /* fall through */
1523         case NETDEV_UNREGISTER:
1524                 inetdev_destroy(in_dev);
1525                 break;
1526         case NETDEV_CHANGENAME:
1527                 /* Do not notify about label change, this event is
1528                  * not interesting to applications using netlink.
1529                  */
1530                 inetdev_changename(dev, in_dev);
1531
1532                 devinet_sysctl_unregister(in_dev);
1533                 devinet_sysctl_register(in_dev);
1534                 break;
1535         }
1536 out:
1537         return NOTIFY_DONE;
1538 }
1539
1540 static struct notifier_block ip_netdev_notifier = {
1541         .notifier_call = inetdev_event,
1542 };
1543
1544 static size_t inet_nlmsg_size(void)
1545 {
1546         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1547                + nla_total_size(4) /* IFA_ADDRESS */
1548                + nla_total_size(4) /* IFA_LOCAL */
1549                + nla_total_size(4) /* IFA_BROADCAST */
1550                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1551                + nla_total_size(4)  /* IFA_FLAGS */
1552                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1553 }
1554
1555 static inline u32 cstamp_delta(unsigned long cstamp)
1556 {
1557         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1558 }
1559
1560 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1561                          unsigned long tstamp, u32 preferred, u32 valid)
1562 {
1563         struct ifa_cacheinfo ci;
1564
1565         ci.cstamp = cstamp_delta(cstamp);
1566         ci.tstamp = cstamp_delta(tstamp);
1567         ci.ifa_prefered = preferred;
1568         ci.ifa_valid = valid;
1569
1570         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1571 }
1572
1573 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1574                             u32 portid, u32 seq, int event, unsigned int flags)
1575 {
1576         struct ifaddrmsg *ifm;
1577         struct nlmsghdr  *nlh;
1578         u32 preferred, valid;
1579
1580         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1581         if (!nlh)
1582                 return -EMSGSIZE;
1583
1584         ifm = nlmsg_data(nlh);
1585         ifm->ifa_family = AF_INET;
1586         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1587         ifm->ifa_flags = ifa->ifa_flags;
1588         ifm->ifa_scope = ifa->ifa_scope;
1589         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1590
1591         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1592                 preferred = ifa->ifa_preferred_lft;
1593                 valid = ifa->ifa_valid_lft;
1594                 if (preferred != INFINITY_LIFE_TIME) {
1595                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1596
1597                         if (preferred > tval)
1598                                 preferred -= tval;
1599                         else
1600                                 preferred = 0;
1601                         if (valid != INFINITY_LIFE_TIME) {
1602                                 if (valid > tval)
1603                                         valid -= tval;
1604                                 else
1605                                         valid = 0;
1606                         }
1607                 }
1608         } else {
1609                 preferred = INFINITY_LIFE_TIME;
1610                 valid = INFINITY_LIFE_TIME;
1611         }
1612         if ((ifa->ifa_address &&
1613              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1614             (ifa->ifa_local &&
1615              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1616             (ifa->ifa_broadcast &&
1617              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1618             (ifa->ifa_label[0] &&
1619              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1620             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1621             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1622                           preferred, valid))
1623                 goto nla_put_failure;
1624
1625         nlmsg_end(skb, nlh);
1626         return 0;
1627
1628 nla_put_failure:
1629         nlmsg_cancel(skb, nlh);
1630         return -EMSGSIZE;
1631 }
1632
1633 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1634 {
1635         struct net *net = sock_net(skb->sk);
1636         int h, s_h;
1637         int idx, s_idx;
1638         int ip_idx, s_ip_idx;
1639         struct net_device *dev;
1640         struct in_device *in_dev;
1641         struct in_ifaddr *ifa;
1642         struct hlist_head *head;
1643
1644         s_h = cb->args[0];
1645         s_idx = idx = cb->args[1];
1646         s_ip_idx = ip_idx = cb->args[2];
1647
1648         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1649                 idx = 0;
1650                 head = &net->dev_index_head[h];
1651                 rcu_read_lock();
1652                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1653                           net->dev_base_seq;
1654                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1655                         if (idx < s_idx)
1656                                 goto cont;
1657                         if (h > s_h || idx > s_idx)
1658                                 s_ip_idx = 0;
1659                         in_dev = __in_dev_get_rcu(dev);
1660                         if (!in_dev)
1661                                 goto cont;
1662
1663                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1664                              ifa = ifa->ifa_next, ip_idx++) {
1665                                 if (ip_idx < s_ip_idx)
1666                                         continue;
1667                                 if (inet_fill_ifaddr(skb, ifa,
1668                                              NETLINK_CB(cb->skb).portid,
1669                                              cb->nlh->nlmsg_seq,
1670                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1671                                         rcu_read_unlock();
1672                                         goto done;
1673                                 }
1674                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1675                         }
1676 cont:
1677                         idx++;
1678                 }
1679                 rcu_read_unlock();
1680         }
1681
1682 done:
1683         cb->args[0] = h;
1684         cb->args[1] = idx;
1685         cb->args[2] = ip_idx;
1686
1687         return skb->len;
1688 }
1689
1690 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1691                       u32 portid)
1692 {
1693         struct sk_buff *skb;
1694         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1695         int err = -ENOBUFS;
1696         struct net *net;
1697
1698         net = dev_net(ifa->ifa_dev->dev);
1699         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1700         if (!skb)
1701                 goto errout;
1702
1703         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1704         if (err < 0) {
1705                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1706                 WARN_ON(err == -EMSGSIZE);
1707                 kfree_skb(skb);
1708                 goto errout;
1709         }
1710         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1711         return;
1712 errout:
1713         if (err < 0)
1714                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1715 }
1716
1717 static size_t inet_get_link_af_size(const struct net_device *dev,
1718                                     u32 ext_filter_mask)
1719 {
1720         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1721
1722         if (!in_dev)
1723                 return 0;
1724
1725         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1726 }
1727
1728 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1729                              u32 ext_filter_mask)
1730 {
1731         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1732         struct nlattr *nla;
1733         int i;
1734
1735         if (!in_dev)
1736                 return -ENODATA;
1737
1738         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1739         if (!nla)
1740                 return -EMSGSIZE;
1741
1742         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1743                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1744
1745         return 0;
1746 }
1747
1748 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1749         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1750 };
1751
1752 static int inet_validate_link_af(const struct net_device *dev,
1753                                  const struct nlattr *nla)
1754 {
1755         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1756         int err, rem;
1757
1758         if (dev && !__in_dev_get_rcu(dev))
1759                 return -EAFNOSUPPORT;
1760
1761         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1762         if (err < 0)
1763                 return err;
1764
1765         if (tb[IFLA_INET_CONF]) {
1766                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1767                         int cfgid = nla_type(a);
1768
1769                         if (nla_len(a) < 4)
1770                                 return -EINVAL;
1771
1772                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1773                                 return -EINVAL;
1774                 }
1775         }
1776
1777         return 0;
1778 }
1779
1780 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1781 {
1782         struct in_device *in_dev = __in_dev_get_rcu(dev);
1783         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1784         int rem;
1785
1786         if (!in_dev)
1787                 return -EAFNOSUPPORT;
1788
1789         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1790                 BUG();
1791
1792         if (tb[IFLA_INET_CONF]) {
1793                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1794                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1795         }
1796
1797         return 0;
1798 }
1799
1800 static int inet_netconf_msgsize_devconf(int type)
1801 {
1802         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1803                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1804         bool all = false;
1805
1806         if (type == NETCONFA_ALL)
1807                 all = true;
1808
1809         if (all || type == NETCONFA_FORWARDING)
1810                 size += nla_total_size(4);
1811         if (all || type == NETCONFA_RP_FILTER)
1812                 size += nla_total_size(4);
1813         if (all || type == NETCONFA_MC_FORWARDING)
1814                 size += nla_total_size(4);
1815         if (all || type == NETCONFA_PROXY_NEIGH)
1816                 size += nla_total_size(4);
1817         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1818                 size += nla_total_size(4);
1819
1820         return size;
1821 }
1822
1823 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1824                                      struct ipv4_devconf *devconf, u32 portid,
1825                                      u32 seq, int event, unsigned int flags,
1826                                      int type)
1827 {
1828         struct nlmsghdr  *nlh;
1829         struct netconfmsg *ncm;
1830         bool all = false;
1831
1832         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1833                         flags);
1834         if (!nlh)
1835                 return -EMSGSIZE;
1836
1837         if (type == NETCONFA_ALL)
1838                 all = true;
1839
1840         ncm = nlmsg_data(nlh);
1841         ncm->ncm_family = AF_INET;
1842
1843         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1844                 goto nla_put_failure;
1845
1846         if (!devconf)
1847                 goto out;
1848
1849         if ((all || type == NETCONFA_FORWARDING) &&
1850             nla_put_s32(skb, NETCONFA_FORWARDING,
1851                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1852                 goto nla_put_failure;
1853         if ((all || type == NETCONFA_RP_FILTER) &&
1854             nla_put_s32(skb, NETCONFA_RP_FILTER,
1855                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1856                 goto nla_put_failure;
1857         if ((all || type == NETCONFA_MC_FORWARDING) &&
1858             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1859                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1860                 goto nla_put_failure;
1861         if ((all || type == NETCONFA_PROXY_NEIGH) &&
1862             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1863                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1864                 goto nla_put_failure;
1865         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1866             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1867                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1868                 goto nla_put_failure;
1869
1870 out:
1871         nlmsg_end(skb, nlh);
1872         return 0;
1873
1874 nla_put_failure:
1875         nlmsg_cancel(skb, nlh);
1876         return -EMSGSIZE;
1877 }
1878
1879 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1880                                  int ifindex, struct ipv4_devconf *devconf)
1881 {
1882         struct sk_buff *skb;
1883         int err = -ENOBUFS;
1884
1885         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1886         if (!skb)
1887                 goto errout;
1888
1889         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1890                                         event, 0, type);
1891         if (err < 0) {
1892                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1893                 WARN_ON(err == -EMSGSIZE);
1894                 kfree_skb(skb);
1895                 goto errout;
1896         }
1897         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1898         return;
1899 errout:
1900         if (err < 0)
1901                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1902 }
1903
1904 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1905         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1906         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1907         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1908         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1909         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1910 };
1911
1912 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1913                                     struct nlmsghdr *nlh,
1914                                     struct netlink_ext_ack *extack)
1915 {
1916         struct net *net = sock_net(in_skb->sk);
1917         struct nlattr *tb[NETCONFA_MAX+1];
1918         struct netconfmsg *ncm;
1919         struct sk_buff *skb;
1920         struct ipv4_devconf *devconf;
1921         struct in_device *in_dev;
1922         struct net_device *dev;
1923         int ifindex;
1924         int err;
1925
1926         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1927                           devconf_ipv4_policy, extack);
1928         if (err < 0)
1929                 goto errout;
1930
1931         err = -EINVAL;
1932         if (!tb[NETCONFA_IFINDEX])
1933                 goto errout;
1934
1935         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1936         switch (ifindex) {
1937         case NETCONFA_IFINDEX_ALL:
1938                 devconf = net->ipv4.devconf_all;
1939                 break;
1940         case NETCONFA_IFINDEX_DEFAULT:
1941                 devconf = net->ipv4.devconf_dflt;
1942                 break;
1943         default:
1944                 dev = __dev_get_by_index(net, ifindex);
1945                 if (!dev)
1946                         goto errout;
1947                 in_dev = __in_dev_get_rtnl(dev);
1948                 if (!in_dev)
1949                         goto errout;
1950                 devconf = &in_dev->cnf;
1951                 break;
1952         }
1953
1954         err = -ENOBUFS;
1955         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1956         if (!skb)
1957                 goto errout;
1958
1959         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1960                                         NETLINK_CB(in_skb).portid,
1961                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1962                                         NETCONFA_ALL);
1963         if (err < 0) {
1964                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1965                 WARN_ON(err == -EMSGSIZE);
1966                 kfree_skb(skb);
1967                 goto errout;
1968         }
1969         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1970 errout:
1971         return err;
1972 }
1973
1974 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1975                                      struct netlink_callback *cb)
1976 {
1977         struct net *net = sock_net(skb->sk);
1978         int h, s_h;
1979         int idx, s_idx;
1980         struct net_device *dev;
1981         struct in_device *in_dev;
1982         struct hlist_head *head;
1983
1984         s_h = cb->args[0];
1985         s_idx = idx = cb->args[1];
1986
1987         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1988                 idx = 0;
1989                 head = &net->dev_index_head[h];
1990                 rcu_read_lock();
1991                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1992                           net->dev_base_seq;
1993                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1994                         if (idx < s_idx)
1995                                 goto cont;
1996                         in_dev = __in_dev_get_rcu(dev);
1997                         if (!in_dev)
1998                                 goto cont;
1999
2000                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2001                                                       &in_dev->cnf,
2002                                                       NETLINK_CB(cb->skb).portid,
2003                                                       cb->nlh->nlmsg_seq,
2004                                                       RTM_NEWNETCONF,
2005                                                       NLM_F_MULTI,
2006                                                       NETCONFA_ALL) < 0) {
2007                                 rcu_read_unlock();
2008                                 goto done;
2009                         }
2010                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2011 cont:
2012                         idx++;
2013                 }
2014                 rcu_read_unlock();
2015         }
2016         if (h == NETDEV_HASHENTRIES) {
2017                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2018                                               net->ipv4.devconf_all,
2019                                               NETLINK_CB(cb->skb).portid,
2020                                               cb->nlh->nlmsg_seq,
2021                                               RTM_NEWNETCONF, NLM_F_MULTI,
2022                                               NETCONFA_ALL) < 0)
2023                         goto done;
2024                 else
2025                         h++;
2026         }
2027         if (h == NETDEV_HASHENTRIES + 1) {
2028                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2029                                               net->ipv4.devconf_dflt,
2030                                               NETLINK_CB(cb->skb).portid,
2031                                               cb->nlh->nlmsg_seq,
2032                                               RTM_NEWNETCONF, NLM_F_MULTI,
2033                                               NETCONFA_ALL) < 0)
2034                         goto done;
2035                 else
2036                         h++;
2037         }
2038 done:
2039         cb->args[0] = h;
2040         cb->args[1] = idx;
2041
2042         return skb->len;
2043 }
2044
2045 #ifdef CONFIG_SYSCTL
2046
2047 static void devinet_copy_dflt_conf(struct net *net, int i)
2048 {
2049         struct net_device *dev;
2050
2051         rcu_read_lock();
2052         for_each_netdev_rcu(net, dev) {
2053                 struct in_device *in_dev;
2054
2055                 in_dev = __in_dev_get_rcu(dev);
2056                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2057                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2058         }
2059         rcu_read_unlock();
2060 }
2061
2062 /* called with RTNL locked */
2063 static void inet_forward_change(struct net *net)
2064 {
2065         struct net_device *dev;
2066         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2067
2068         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2069         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2070         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2071                                     NETCONFA_FORWARDING,
2072                                     NETCONFA_IFINDEX_ALL,
2073                                     net->ipv4.devconf_all);
2074         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2075                                     NETCONFA_FORWARDING,
2076                                     NETCONFA_IFINDEX_DEFAULT,
2077                                     net->ipv4.devconf_dflt);
2078
2079         for_each_netdev(net, dev) {
2080                 struct in_device *in_dev;
2081
2082                 if (on)
2083                         dev_disable_lro(dev);
2084
2085                 in_dev = __in_dev_get_rtnl(dev);
2086                 if (in_dev) {
2087                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2088                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2089                                                     NETCONFA_FORWARDING,
2090                                                     dev->ifindex, &in_dev->cnf);
2091                 }
2092         }
2093 }
2094
2095 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2096 {
2097         if (cnf == net->ipv4.devconf_dflt)
2098                 return NETCONFA_IFINDEX_DEFAULT;
2099         else if (cnf == net->ipv4.devconf_all)
2100                 return NETCONFA_IFINDEX_ALL;
2101         else {
2102                 struct in_device *idev
2103                         = container_of(cnf, struct in_device, cnf);
2104                 return idev->dev->ifindex;
2105         }
2106 }
2107
2108 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2109                              void __user *buffer,
2110                              size_t *lenp, loff_t *ppos)
2111 {
2112         int old_value = *(int *)ctl->data;
2113         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2114         int new_value = *(int *)ctl->data;
2115
2116         if (write) {
2117                 struct ipv4_devconf *cnf = ctl->extra1;
2118                 struct net *net = ctl->extra2;
2119                 int i = (int *)ctl->data - cnf->data;
2120                 int ifindex;
2121
2122                 set_bit(i, cnf->state);
2123
2124                 if (cnf == net->ipv4.devconf_dflt)
2125                         devinet_copy_dflt_conf(net, i);
2126                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2127                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2128                         if ((new_value == 0) && (old_value != 0))
2129                                 rt_cache_flush(net);
2130
2131                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2132                     new_value != old_value) {
2133                         ifindex = devinet_conf_ifindex(net, cnf);
2134                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2135                                                     NETCONFA_RP_FILTER,
2136                                                     ifindex, cnf);
2137                 }
2138                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2139                     new_value != old_value) {
2140                         ifindex = devinet_conf_ifindex(net, cnf);
2141                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2142                                                     NETCONFA_PROXY_NEIGH,
2143                                                     ifindex, cnf);
2144                 }
2145                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2146                     new_value != old_value) {
2147                         ifindex = devinet_conf_ifindex(net, cnf);
2148                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2149                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2150                                                     ifindex, cnf);
2151                 }
2152         }
2153
2154         return ret;
2155 }
2156
2157 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2158                                   void __user *buffer,
2159                                   size_t *lenp, loff_t *ppos)
2160 {
2161         int *valp = ctl->data;
2162         int val = *valp;
2163         loff_t pos = *ppos;
2164         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2165
2166         if (write && *valp != val) {
2167                 struct net *net = ctl->extra2;
2168
2169                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2170                         if (!rtnl_trylock()) {
2171                                 /* Restore the original values before restarting */
2172                                 *valp = val;
2173                                 *ppos = pos;
2174                                 return restart_syscall();
2175                         }
2176                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2177                                 inet_forward_change(net);
2178                         } else {
2179                                 struct ipv4_devconf *cnf = ctl->extra1;
2180                                 struct in_device *idev =
2181                                         container_of(cnf, struct in_device, cnf);
2182                                 if (*valp)
2183                                         dev_disable_lro(idev->dev);
2184                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2185                                                             NETCONFA_FORWARDING,
2186                                                             idev->dev->ifindex,
2187                                                             cnf);
2188                         }
2189                         rtnl_unlock();
2190                         rt_cache_flush(net);
2191                 } else
2192                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2193                                                     NETCONFA_FORWARDING,
2194                                                     NETCONFA_IFINDEX_DEFAULT,
2195                                                     net->ipv4.devconf_dflt);
2196         }
2197
2198         return ret;
2199 }
2200
2201 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2202                                 void __user *buffer,
2203                                 size_t *lenp, loff_t *ppos)
2204 {
2205         int *valp = ctl->data;
2206         int val = *valp;
2207         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2208         struct net *net = ctl->extra2;
2209
2210         if (write && *valp != val)
2211                 rt_cache_flush(net);
2212
2213         return ret;
2214 }
2215
2216 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2217         { \
2218                 .procname       = name, \
2219                 .data           = ipv4_devconf.data + \
2220                                   IPV4_DEVCONF_ ## attr - 1, \
2221                 .maxlen         = sizeof(int), \
2222                 .mode           = mval, \
2223                 .proc_handler   = proc, \
2224                 .extra1         = &ipv4_devconf, \
2225         }
2226
2227 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2228         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2229
2230 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2231         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2232
2233 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2234         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2235
2236 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2237         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2238
2239 static struct devinet_sysctl_table {
2240         struct ctl_table_header *sysctl_header;
2241         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2242 } devinet_sysctl = {
2243         .devinet_vars = {
2244                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2245                                              devinet_sysctl_forward),
2246                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2247
2248                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2249                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2250                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2251                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2252                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2253                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2254                                         "accept_source_route"),
2255                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2256                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2257                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2258                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2259                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2260                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2261                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2262                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2263                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2264                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2265                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2266                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2267                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2268                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2269                                         "force_igmp_version"),
2270                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2271                                         "igmpv2_unsolicited_report_interval"),
2272                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2273                                         "igmpv3_unsolicited_report_interval"),
2274                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2275                                         "ignore_routes_with_linkdown"),
2276                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2277                                         "drop_gratuitous_arp"),
2278
2279                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2280                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2281                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2282                                               "promote_secondaries"),
2283                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2284                                               "route_localnet"),
2285                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2286                                               "drop_unicast_in_l2_multicast"),
2287         },
2288 };
2289
2290 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2291                                      int ifindex, struct ipv4_devconf *p)
2292 {
2293         int i;
2294         struct devinet_sysctl_table *t;
2295         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2296
2297         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2298         if (!t)
2299                 goto out;
2300
2301         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2302                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2303                 t->devinet_vars[i].extra1 = p;
2304                 t->devinet_vars[i].extra2 = net;
2305         }
2306
2307         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2308
2309         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2310         if (!t->sysctl_header)
2311                 goto free;
2312
2313         p->sysctl = t;
2314
2315         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2316                                     ifindex, p);
2317         return 0;
2318
2319 free:
2320         kfree(t);
2321 out:
2322         return -ENOBUFS;
2323 }
2324
2325 static void __devinet_sysctl_unregister(struct net *net,
2326                                         struct ipv4_devconf *cnf, int ifindex)
2327 {
2328         struct devinet_sysctl_table *t = cnf->sysctl;
2329
2330         if (t) {
2331                 cnf->sysctl = NULL;
2332                 unregister_net_sysctl_table(t->sysctl_header);
2333                 kfree(t);
2334         }
2335
2336         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2337 }
2338
2339 static int devinet_sysctl_register(struct in_device *idev)
2340 {
2341         int err;
2342
2343         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2344                 return -EINVAL;
2345
2346         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2347         if (err)
2348                 return err;
2349         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2350                                         idev->dev->ifindex, &idev->cnf);
2351         if (err)
2352                 neigh_sysctl_unregister(idev->arp_parms);
2353         return err;
2354 }
2355
2356 static void devinet_sysctl_unregister(struct in_device *idev)
2357 {
2358         struct net *net = dev_net(idev->dev);
2359
2360         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2361         neigh_sysctl_unregister(idev->arp_parms);
2362 }
2363
2364 static struct ctl_table ctl_forward_entry[] = {
2365         {
2366                 .procname       = "ip_forward",
2367                 .data           = &ipv4_devconf.data[
2368                                         IPV4_DEVCONF_FORWARDING - 1],
2369                 .maxlen         = sizeof(int),
2370                 .mode           = 0644,
2371                 .proc_handler   = devinet_sysctl_forward,
2372                 .extra1         = &ipv4_devconf,
2373                 .extra2         = &init_net,
2374         },
2375         { },
2376 };
2377 #endif
2378
2379 static __net_init int devinet_init_net(struct net *net)
2380 {
2381         int err;
2382         struct ipv4_devconf *all, *dflt;
2383 #ifdef CONFIG_SYSCTL
2384         struct ctl_table *tbl = ctl_forward_entry;
2385         struct ctl_table_header *forw_hdr;
2386 #endif
2387
2388         err = -ENOMEM;
2389         all = &ipv4_devconf;
2390         dflt = &ipv4_devconf_dflt;
2391
2392         if (!net_eq(net, &init_net)) {
2393                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2394                 if (!all)
2395                         goto err_alloc_all;
2396
2397                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2398                 if (!dflt)
2399                         goto err_alloc_dflt;
2400
2401 #ifdef CONFIG_SYSCTL
2402                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2403                 if (!tbl)
2404                         goto err_alloc_ctl;
2405
2406                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2407                 tbl[0].extra1 = all;
2408                 tbl[0].extra2 = net;
2409 #endif
2410         }
2411
2412 #ifdef CONFIG_SYSCTL
2413         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2414         if (err < 0)
2415                 goto err_reg_all;
2416
2417         err = __devinet_sysctl_register(net, "default",
2418                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2419         if (err < 0)
2420                 goto err_reg_dflt;
2421
2422         err = -ENOMEM;
2423         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2424         if (!forw_hdr)
2425                 goto err_reg_ctl;
2426         net->ipv4.forw_hdr = forw_hdr;
2427 #endif
2428
2429         net->ipv4.devconf_all = all;
2430         net->ipv4.devconf_dflt = dflt;
2431         return 0;
2432
2433 #ifdef CONFIG_SYSCTL
2434 err_reg_ctl:
2435         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2436 err_reg_dflt:
2437         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2438 err_reg_all:
2439         if (tbl != ctl_forward_entry)
2440                 kfree(tbl);
2441 err_alloc_ctl:
2442 #endif
2443         if (dflt != &ipv4_devconf_dflt)
2444                 kfree(dflt);
2445 err_alloc_dflt:
2446         if (all != &ipv4_devconf)
2447                 kfree(all);
2448 err_alloc_all:
2449         return err;
2450 }
2451
2452 static __net_exit void devinet_exit_net(struct net *net)
2453 {
2454 #ifdef CONFIG_SYSCTL
2455         struct ctl_table *tbl;
2456
2457         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2458         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2459         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2460                                     NETCONFA_IFINDEX_DEFAULT);
2461         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2462                                     NETCONFA_IFINDEX_ALL);
2463         kfree(tbl);
2464 #endif
2465         kfree(net->ipv4.devconf_dflt);
2466         kfree(net->ipv4.devconf_all);
2467 }
2468
2469 static __net_initdata struct pernet_operations devinet_ops = {
2470         .init = devinet_init_net,
2471         .exit = devinet_exit_net,
2472 };
2473
2474 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2475         .family           = AF_INET,
2476         .fill_link_af     = inet_fill_link_af,
2477         .get_link_af_size = inet_get_link_af_size,
2478         .validate_link_af = inet_validate_link_af,
2479         .set_link_af      = inet_set_link_af,
2480 };
2481
2482 void __init devinet_init(void)
2483 {
2484         int i;
2485
2486         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2487                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2488
2489         register_pernet_subsys(&devinet_ops);
2490
2491         register_gifconf(PF_INET, inet_gifconf);
2492         register_netdevice_notifier(&ip_netdev_notifier);
2493
2494         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2495
2496         rtnl_af_register(&inet_af_ops);
2497
2498         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2499         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2500         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2501         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2502                       inet_netconf_dump_devconf, 0);
2503 }