9p: Remove INET dependency
[linux-block.git] / net / ipv4 / devinet.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      NET3    IP device support routines.
4  *
5  *      Derived from the IP parts of dev.c 1.0.19
6  *              Authors:        Ross Biro
7  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *      Additional Authors:
11  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *      Changes:
15  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
16  *                                      lists.
17  *              Cyrus Durgin:           updated for kmod
18  *              Matthias Andree:        in devinet_ioctl, compare label and
19  *                                      address (4.4BSD alias style support),
20  *                                      fall back to comparing just the label
21  *                                      if no match found.
22  */
23
24
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64
65 #define IPV6ONLY_FLAGS  \
66                 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67                  IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68                  IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78                 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79         },
80 };
81
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83         .data = {
84                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91                 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92         },
93 };
94
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99         [IFA_LOCAL]             = { .type = NLA_U32 },
100         [IFA_ADDRESS]           = { .type = NLA_U32 },
101         [IFA_BROADCAST]         = { .type = NLA_U32 },
102         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
104         [IFA_FLAGS]             = { .type = NLA_U32 },
105         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
106         [IFA_TARGET_NETNSID]    = { .type = NLA_S32 },
107         [IFA_PROTO]             = { .type = NLA_U8 },
108 };
109
110 struct inet_fill_args {
111         u32 portid;
112         u32 seq;
113         int event;
114         unsigned int flags;
115         int netnsid;
116         int ifindex;
117 };
118
119 #define IN4_ADDR_HSIZE_SHIFT    8
120 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
121
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126         u32 val = (__force u32) addr ^ net_hash_mix(net);
127
128         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133         u32 hash = inet_addr_hash(net, ifa->ifa_local);
134
135         ASSERT_RTNL();
136         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141         ASSERT_RTNL();
142         hlist_del_init_rcu(&ifa->hash);
143 }
144
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155         struct net_device *result = NULL;
156         struct in_ifaddr *ifa;
157
158         rcu_read_lock();
159         ifa = inet_lookup_ifaddr_rcu(net, addr);
160         if (!ifa) {
161                 struct flowi4 fl4 = { .daddr = addr };
162                 struct fib_result res = { 0 };
163                 struct fib_table *local;
164
165                 /* Fallback to FIB local table so that communication
166                  * over loopback subnets work.
167                  */
168                 local = fib_get_table(net, RT_TABLE_LOCAL);
169                 if (local &&
170                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171                     res.type == RTN_LOCAL)
172                         result = FIB_RES_DEV(res);
173         } else {
174                 result = ifa->ifa_dev->dev;
175         }
176         if (result && devref)
177                 dev_hold(result);
178         rcu_read_unlock();
179         return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186         u32 hash = inet_addr_hash(net, addr);
187         struct in_ifaddr *ifa;
188
189         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190                 if (ifa->ifa_local == addr &&
191                     net_eq(dev_net(ifa->ifa_dev->dev), net))
192                         return ifa;
193
194         return NULL;
195 }
196
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202                          struct in_ifaddr __rcu **ifap,
203                          int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210         return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216
217 /* Locks all the inet devices. */
218
219 static struct in_ifaddr *inet_alloc_ifa(void)
220 {
221         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222 }
223
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227         if (ifa->ifa_dev)
228                 in_dev_put(ifa->ifa_dev);
229         kfree(ifa);
230 }
231
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235 }
236
237 static void in_dev_free_rcu(struct rcu_head *head)
238 {
239         struct in_device *idev = container_of(head, struct in_device, rcu_head);
240
241         kfree(rcu_dereference_protected(idev->mc_hash, 1));
242         kfree(idev);
243 }
244
245 void in_dev_finish_destroy(struct in_device *idev)
246 {
247         struct net_device *dev = idev->dev;
248
249         WARN_ON(idev->ifa_list);
250         WARN_ON(idev->mc_list);
251 #ifdef NET_REFCNT_DEBUG
252         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
253 #endif
254         netdev_put(dev, &idev->dev_tracker);
255         if (!idev->dead)
256                 pr_err("Freeing alive in_device %p\n", idev);
257         else
258                 call_rcu(&idev->rcu_head, in_dev_free_rcu);
259 }
260 EXPORT_SYMBOL(in_dev_finish_destroy);
261
262 static struct in_device *inetdev_init(struct net_device *dev)
263 {
264         struct in_device *in_dev;
265         int err = -ENOMEM;
266
267         ASSERT_RTNL();
268
269         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
270         if (!in_dev)
271                 goto out;
272         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
273                         sizeof(in_dev->cnf));
274         in_dev->cnf.sysctl = NULL;
275         in_dev->dev = dev;
276         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
277         if (!in_dev->arp_parms)
278                 goto out_kfree;
279         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
280                 dev_disable_lro(dev);
281         /* Reference in_dev->dev */
282         netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
283         /* Account for reference dev->ip_ptr (below) */
284         refcount_set(&in_dev->refcnt, 1);
285
286         err = devinet_sysctl_register(in_dev);
287         if (err) {
288                 in_dev->dead = 1;
289                 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
290                 in_dev_put(in_dev);
291                 in_dev = NULL;
292                 goto out;
293         }
294         ip_mc_init_dev(in_dev);
295         if (dev->flags & IFF_UP)
296                 ip_mc_up(in_dev);
297
298         /* we can receive as soon as ip_ptr is set -- do this last */
299         rcu_assign_pointer(dev->ip_ptr, in_dev);
300 out:
301         return in_dev ?: ERR_PTR(err);
302 out_kfree:
303         kfree(in_dev);
304         in_dev = NULL;
305         goto out;
306 }
307
308 static void inetdev_destroy(struct in_device *in_dev)
309 {
310         struct net_device *dev;
311         struct in_ifaddr *ifa;
312
313         ASSERT_RTNL();
314
315         dev = in_dev->dev;
316
317         in_dev->dead = 1;
318
319         ip_mc_destroy_dev(in_dev);
320
321         while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
322                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
323                 inet_free_ifa(ifa);
324         }
325
326         RCU_INIT_POINTER(dev->ip_ptr, NULL);
327
328         devinet_sysctl_unregister(in_dev);
329         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
330         arp_ifdown(dev);
331
332         in_dev_put(in_dev);
333 }
334
335 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
336 {
337         const struct in_ifaddr *ifa;
338
339         rcu_read_lock();
340         in_dev_for_each_ifa_rcu(ifa, in_dev) {
341                 if (inet_ifa_match(a, ifa)) {
342                         if (!b || inet_ifa_match(b, ifa)) {
343                                 rcu_read_unlock();
344                                 return 1;
345                         }
346                 }
347         }
348         rcu_read_unlock();
349         return 0;
350 }
351
352 static void __inet_del_ifa(struct in_device *in_dev,
353                            struct in_ifaddr __rcu **ifap,
354                            int destroy, struct nlmsghdr *nlh, u32 portid)
355 {
356         struct in_ifaddr *promote = NULL;
357         struct in_ifaddr *ifa, *ifa1;
358         struct in_ifaddr *last_prim;
359         struct in_ifaddr *prev_prom = NULL;
360         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
361
362         ASSERT_RTNL();
363
364         ifa1 = rtnl_dereference(*ifap);
365         last_prim = rtnl_dereference(in_dev->ifa_list);
366         if (in_dev->dead)
367                 goto no_promotions;
368
369         /* 1. Deleting primary ifaddr forces deletion all secondaries
370          * unless alias promotion is set
371          **/
372
373         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
374                 struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
375
376                 while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
377                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
378                             ifa1->ifa_scope <= ifa->ifa_scope)
379                                 last_prim = ifa;
380
381                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
382                             ifa1->ifa_mask != ifa->ifa_mask ||
383                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
384                                 ifap1 = &ifa->ifa_next;
385                                 prev_prom = ifa;
386                                 continue;
387                         }
388
389                         if (!do_promote) {
390                                 inet_hash_remove(ifa);
391                                 *ifap1 = ifa->ifa_next;
392
393                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
394                                 blocking_notifier_call_chain(&inetaddr_chain,
395                                                 NETDEV_DOWN, ifa);
396                                 inet_free_ifa(ifa);
397                         } else {
398                                 promote = ifa;
399                                 break;
400                         }
401                 }
402         }
403
404         /* On promotion all secondaries from subnet are changing
405          * the primary IP, we must remove all their routes silently
406          * and later to add them back with new prefsrc. Do this
407          * while all addresses are on the device list.
408          */
409         for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
410                 if (ifa1->ifa_mask == ifa->ifa_mask &&
411                     inet_ifa_match(ifa1->ifa_address, ifa))
412                         fib_del_ifaddr(ifa, ifa1);
413         }
414
415 no_promotions:
416         /* 2. Unlink it */
417
418         *ifap = ifa1->ifa_next;
419         inet_hash_remove(ifa1);
420
421         /* 3. Announce address deletion */
422
423         /* Send message first, then call notifier.
424            At first sight, FIB update triggered by notifier
425            will refer to already deleted ifaddr, that could confuse
426            netlink listeners. It is not true: look, gated sees
427            that route deleted and if it still thinks that ifaddr
428            is valid, it will try to restore deleted routes... Grr.
429            So that, this order is correct.
430          */
431         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
432         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
433
434         if (promote) {
435                 struct in_ifaddr *next_sec;
436
437                 next_sec = rtnl_dereference(promote->ifa_next);
438                 if (prev_prom) {
439                         struct in_ifaddr *last_sec;
440
441                         rcu_assign_pointer(prev_prom->ifa_next, next_sec);
442
443                         last_sec = rtnl_dereference(last_prim->ifa_next);
444                         rcu_assign_pointer(promote->ifa_next, last_sec);
445                         rcu_assign_pointer(last_prim->ifa_next, promote);
446                 }
447
448                 promote->ifa_flags &= ~IFA_F_SECONDARY;
449                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
450                 blocking_notifier_call_chain(&inetaddr_chain,
451                                 NETDEV_UP, promote);
452                 for (ifa = next_sec; ifa;
453                      ifa = rtnl_dereference(ifa->ifa_next)) {
454                         if (ifa1->ifa_mask != ifa->ifa_mask ||
455                             !inet_ifa_match(ifa1->ifa_address, ifa))
456                                         continue;
457                         fib_add_ifaddr(ifa);
458                 }
459
460         }
461         if (destroy)
462                 inet_free_ifa(ifa1);
463 }
464
465 static void inet_del_ifa(struct in_device *in_dev,
466                          struct in_ifaddr __rcu **ifap,
467                          int destroy)
468 {
469         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
470 }
471
472 static void check_lifetime(struct work_struct *work);
473
474 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
475
476 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
477                              u32 portid, struct netlink_ext_ack *extack)
478 {
479         struct in_ifaddr __rcu **last_primary, **ifap;
480         struct in_device *in_dev = ifa->ifa_dev;
481         struct in_validator_info ivi;
482         struct in_ifaddr *ifa1;
483         int ret;
484
485         ASSERT_RTNL();
486
487         if (!ifa->ifa_local) {
488                 inet_free_ifa(ifa);
489                 return 0;
490         }
491
492         ifa->ifa_flags &= ~IFA_F_SECONDARY;
493         last_primary = &in_dev->ifa_list;
494
495         /* Don't set IPv6 only flags to IPv4 addresses */
496         ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
497
498         ifap = &in_dev->ifa_list;
499         ifa1 = rtnl_dereference(*ifap);
500
501         while (ifa1) {
502                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
503                     ifa->ifa_scope <= ifa1->ifa_scope)
504                         last_primary = &ifa1->ifa_next;
505                 if (ifa1->ifa_mask == ifa->ifa_mask &&
506                     inet_ifa_match(ifa1->ifa_address, ifa)) {
507                         if (ifa1->ifa_local == ifa->ifa_local) {
508                                 inet_free_ifa(ifa);
509                                 return -EEXIST;
510                         }
511                         if (ifa1->ifa_scope != ifa->ifa_scope) {
512                                 inet_free_ifa(ifa);
513                                 return -EINVAL;
514                         }
515                         ifa->ifa_flags |= IFA_F_SECONDARY;
516                 }
517
518                 ifap = &ifa1->ifa_next;
519                 ifa1 = rtnl_dereference(*ifap);
520         }
521
522         /* Allow any devices that wish to register ifaddr validtors to weigh
523          * in now, before changes are committed.  The rntl lock is serializing
524          * access here, so the state should not change between a validator call
525          * and a final notify on commit.  This isn't invoked on promotion under
526          * the assumption that validators are checking the address itself, and
527          * not the flags.
528          */
529         ivi.ivi_addr = ifa->ifa_address;
530         ivi.ivi_dev = ifa->ifa_dev;
531         ivi.extack = extack;
532         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
533                                            NETDEV_UP, &ivi);
534         ret = notifier_to_errno(ret);
535         if (ret) {
536                 inet_free_ifa(ifa);
537                 return ret;
538         }
539
540         if (!(ifa->ifa_flags & IFA_F_SECONDARY))
541                 ifap = last_primary;
542
543         rcu_assign_pointer(ifa->ifa_next, *ifap);
544         rcu_assign_pointer(*ifap, ifa);
545
546         inet_hash_insert(dev_net(in_dev->dev), ifa);
547
548         cancel_delayed_work(&check_lifetime_work);
549         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
550
551         /* Send message first, then call notifier.
552            Notifier will trigger FIB update, so that
553            listeners of netlink will know about new ifaddr */
554         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
555         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
556
557         return 0;
558 }
559
560 static int inet_insert_ifa(struct in_ifaddr *ifa)
561 {
562         return __inet_insert_ifa(ifa, NULL, 0, NULL);
563 }
564
565 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
566 {
567         struct in_device *in_dev = __in_dev_get_rtnl(dev);
568
569         ASSERT_RTNL();
570
571         if (!in_dev) {
572                 inet_free_ifa(ifa);
573                 return -ENOBUFS;
574         }
575         ipv4_devconf_setall(in_dev);
576         neigh_parms_data_state_setall(in_dev->arp_parms);
577         if (ifa->ifa_dev != in_dev) {
578                 WARN_ON(ifa->ifa_dev);
579                 in_dev_hold(in_dev);
580                 ifa->ifa_dev = in_dev;
581         }
582         if (ipv4_is_loopback(ifa->ifa_local))
583                 ifa->ifa_scope = RT_SCOPE_HOST;
584         return inet_insert_ifa(ifa);
585 }
586
587 /* Caller must hold RCU or RTNL :
588  * We dont take a reference on found in_device
589  */
590 struct in_device *inetdev_by_index(struct net *net, int ifindex)
591 {
592         struct net_device *dev;
593         struct in_device *in_dev = NULL;
594
595         rcu_read_lock();
596         dev = dev_get_by_index_rcu(net, ifindex);
597         if (dev)
598                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
599         rcu_read_unlock();
600         return in_dev;
601 }
602 EXPORT_SYMBOL(inetdev_by_index);
603
604 /* Called only from RTNL semaphored context. No locks. */
605
606 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
607                                     __be32 mask)
608 {
609         struct in_ifaddr *ifa;
610
611         ASSERT_RTNL();
612
613         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
614                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
615                         return ifa;
616         }
617         return NULL;
618 }
619
620 static int ip_mc_autojoin_config(struct net *net, bool join,
621                                  const struct in_ifaddr *ifa)
622 {
623 #if defined(CONFIG_IP_MULTICAST)
624         struct ip_mreqn mreq = {
625                 .imr_multiaddr.s_addr = ifa->ifa_address,
626                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
627         };
628         struct sock *sk = net->ipv4.mc_autojoin_sk;
629         int ret;
630
631         ASSERT_RTNL();
632
633         lock_sock(sk);
634         if (join)
635                 ret = ip_mc_join_group(sk, &mreq);
636         else
637                 ret = ip_mc_leave_group(sk, &mreq);
638         release_sock(sk);
639
640         return ret;
641 #else
642         return -EOPNOTSUPP;
643 #endif
644 }
645
646 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
647                             struct netlink_ext_ack *extack)
648 {
649         struct net *net = sock_net(skb->sk);
650         struct in_ifaddr __rcu **ifap;
651         struct nlattr *tb[IFA_MAX+1];
652         struct in_device *in_dev;
653         struct ifaddrmsg *ifm;
654         struct in_ifaddr *ifa;
655         int err;
656
657         ASSERT_RTNL();
658
659         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
660                                      ifa_ipv4_policy, extack);
661         if (err < 0)
662                 goto errout;
663
664         ifm = nlmsg_data(nlh);
665         in_dev = inetdev_by_index(net, ifm->ifa_index);
666         if (!in_dev) {
667                 err = -ENODEV;
668                 goto errout;
669         }
670
671         for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
672              ifap = &ifa->ifa_next) {
673                 if (tb[IFA_LOCAL] &&
674                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
675                         continue;
676
677                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
678                         continue;
679
680                 if (tb[IFA_ADDRESS] &&
681                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
682                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
683                         continue;
684
685                 if (ipv4_is_multicast(ifa->ifa_address))
686                         ip_mc_autojoin_config(net, false, ifa);
687                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
688                 return 0;
689         }
690
691         err = -EADDRNOTAVAIL;
692 errout:
693         return err;
694 }
695
696 #define INFINITY_LIFE_TIME      0xFFFFFFFF
697
698 static void check_lifetime(struct work_struct *work)
699 {
700         unsigned long now, next, next_sec, next_sched;
701         struct in_ifaddr *ifa;
702         struct hlist_node *n;
703         int i;
704
705         now = jiffies;
706         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
707
708         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
709                 bool change_needed = false;
710
711                 rcu_read_lock();
712                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
713                         unsigned long age;
714
715                         if (ifa->ifa_flags & IFA_F_PERMANENT)
716                                 continue;
717
718                         /* We try to batch several events at once. */
719                         age = (now - ifa->ifa_tstamp +
720                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
721
722                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
723                             age >= ifa->ifa_valid_lft) {
724                                 change_needed = true;
725                         } else if (ifa->ifa_preferred_lft ==
726                                    INFINITY_LIFE_TIME) {
727                                 continue;
728                         } else if (age >= ifa->ifa_preferred_lft) {
729                                 if (time_before(ifa->ifa_tstamp +
730                                                 ifa->ifa_valid_lft * HZ, next))
731                                         next = ifa->ifa_tstamp +
732                                                ifa->ifa_valid_lft * HZ;
733
734                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
735                                         change_needed = true;
736                         } else if (time_before(ifa->ifa_tstamp +
737                                                ifa->ifa_preferred_lft * HZ,
738                                                next)) {
739                                 next = ifa->ifa_tstamp +
740                                        ifa->ifa_preferred_lft * HZ;
741                         }
742                 }
743                 rcu_read_unlock();
744                 if (!change_needed)
745                         continue;
746                 rtnl_lock();
747                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
748                         unsigned long age;
749
750                         if (ifa->ifa_flags & IFA_F_PERMANENT)
751                                 continue;
752
753                         /* We try to batch several events at once. */
754                         age = (now - ifa->ifa_tstamp +
755                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
756
757                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
758                             age >= ifa->ifa_valid_lft) {
759                                 struct in_ifaddr __rcu **ifap;
760                                 struct in_ifaddr *tmp;
761
762                                 ifap = &ifa->ifa_dev->ifa_list;
763                                 tmp = rtnl_dereference(*ifap);
764                                 while (tmp) {
765                                         if (tmp == ifa) {
766                                                 inet_del_ifa(ifa->ifa_dev,
767                                                              ifap, 1);
768                                                 break;
769                                         }
770                                         ifap = &tmp->ifa_next;
771                                         tmp = rtnl_dereference(*ifap);
772                                 }
773                         } else if (ifa->ifa_preferred_lft !=
774                                    INFINITY_LIFE_TIME &&
775                                    age >= ifa->ifa_preferred_lft &&
776                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
777                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
778                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
779                         }
780                 }
781                 rtnl_unlock();
782         }
783
784         next_sec = round_jiffies_up(next);
785         next_sched = next;
786
787         /* If rounded timeout is accurate enough, accept it. */
788         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
789                 next_sched = next_sec;
790
791         now = jiffies;
792         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
793         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
794                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
795
796         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
797                         next_sched - now);
798 }
799
800 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
801                              __u32 prefered_lft)
802 {
803         unsigned long timeout;
804
805         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
806
807         timeout = addrconf_timeout_fixup(valid_lft, HZ);
808         if (addrconf_finite_timeout(timeout))
809                 ifa->ifa_valid_lft = timeout;
810         else
811                 ifa->ifa_flags |= IFA_F_PERMANENT;
812
813         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
814         if (addrconf_finite_timeout(timeout)) {
815                 if (timeout == 0)
816                         ifa->ifa_flags |= IFA_F_DEPRECATED;
817                 ifa->ifa_preferred_lft = timeout;
818         }
819         ifa->ifa_tstamp = jiffies;
820         if (!ifa->ifa_cstamp)
821                 ifa->ifa_cstamp = ifa->ifa_tstamp;
822 }
823
824 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
825                                        __u32 *pvalid_lft, __u32 *pprefered_lft,
826                                        struct netlink_ext_ack *extack)
827 {
828         struct nlattr *tb[IFA_MAX+1];
829         struct in_ifaddr *ifa;
830         struct ifaddrmsg *ifm;
831         struct net_device *dev;
832         struct in_device *in_dev;
833         int err;
834
835         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
836                                      ifa_ipv4_policy, extack);
837         if (err < 0)
838                 goto errout;
839
840         ifm = nlmsg_data(nlh);
841         err = -EINVAL;
842         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
843                 goto errout;
844
845         dev = __dev_get_by_index(net, ifm->ifa_index);
846         err = -ENODEV;
847         if (!dev)
848                 goto errout;
849
850         in_dev = __in_dev_get_rtnl(dev);
851         err = -ENOBUFS;
852         if (!in_dev)
853                 goto errout;
854
855         ifa = inet_alloc_ifa();
856         if (!ifa)
857                 /*
858                  * A potential indev allocation can be left alive, it stays
859                  * assigned to its device and is destroy with it.
860                  */
861                 goto errout;
862
863         ipv4_devconf_setall(in_dev);
864         neigh_parms_data_state_setall(in_dev->arp_parms);
865         in_dev_hold(in_dev);
866
867         if (!tb[IFA_ADDRESS])
868                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
869
870         INIT_HLIST_NODE(&ifa->hash);
871         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
872         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
873         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
874                                          ifm->ifa_flags;
875         ifa->ifa_scope = ifm->ifa_scope;
876         ifa->ifa_dev = in_dev;
877
878         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
879         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
880
881         if (tb[IFA_BROADCAST])
882                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
883
884         if (tb[IFA_LABEL])
885                 nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
886         else
887                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
888
889         if (tb[IFA_RT_PRIORITY])
890                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
891
892         if (tb[IFA_PROTO])
893                 ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
894
895         if (tb[IFA_CACHEINFO]) {
896                 struct ifa_cacheinfo *ci;
897
898                 ci = nla_data(tb[IFA_CACHEINFO]);
899                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
900                         err = -EINVAL;
901                         goto errout_free;
902                 }
903                 *pvalid_lft = ci->ifa_valid;
904                 *pprefered_lft = ci->ifa_prefered;
905         }
906
907         return ifa;
908
909 errout_free:
910         inet_free_ifa(ifa);
911 errout:
912         return ERR_PTR(err);
913 }
914
915 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
916 {
917         struct in_device *in_dev = ifa->ifa_dev;
918         struct in_ifaddr *ifa1;
919
920         if (!ifa->ifa_local)
921                 return NULL;
922
923         in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
924                 if (ifa1->ifa_mask == ifa->ifa_mask &&
925                     inet_ifa_match(ifa1->ifa_address, ifa) &&
926                     ifa1->ifa_local == ifa->ifa_local)
927                         return ifa1;
928         }
929         return NULL;
930 }
931
932 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
933                             struct netlink_ext_ack *extack)
934 {
935         struct net *net = sock_net(skb->sk);
936         struct in_ifaddr *ifa;
937         struct in_ifaddr *ifa_existing;
938         __u32 valid_lft = INFINITY_LIFE_TIME;
939         __u32 prefered_lft = INFINITY_LIFE_TIME;
940
941         ASSERT_RTNL();
942
943         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
944         if (IS_ERR(ifa))
945                 return PTR_ERR(ifa);
946
947         ifa_existing = find_matching_ifa(ifa);
948         if (!ifa_existing) {
949                 /* It would be best to check for !NLM_F_CREATE here but
950                  * userspace already relies on not having to provide this.
951                  */
952                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
953                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
954                         int ret = ip_mc_autojoin_config(net, true, ifa);
955
956                         if (ret < 0) {
957                                 inet_free_ifa(ifa);
958                                 return ret;
959                         }
960                 }
961                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
962                                          extack);
963         } else {
964                 u32 new_metric = ifa->ifa_rt_priority;
965                 u8 new_proto = ifa->ifa_proto;
966
967                 inet_free_ifa(ifa);
968
969                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
970                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
971                         return -EEXIST;
972                 ifa = ifa_existing;
973
974                 if (ifa->ifa_rt_priority != new_metric) {
975                         fib_modify_prefix_metric(ifa, new_metric);
976                         ifa->ifa_rt_priority = new_metric;
977                 }
978
979                 ifa->ifa_proto = new_proto;
980
981                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
982                 cancel_delayed_work(&check_lifetime_work);
983                 queue_delayed_work(system_power_efficient_wq,
984                                 &check_lifetime_work, 0);
985                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
986         }
987         return 0;
988 }
989
990 /*
991  *      Determine a default network mask, based on the IP address.
992  */
993
994 static int inet_abc_len(__be32 addr)
995 {
996         int rc = -1;    /* Something else, probably a multicast. */
997
998         if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
999                 rc = 0;
1000         else {
1001                 __u32 haddr = ntohl(addr);
1002                 if (IN_CLASSA(haddr))
1003                         rc = 8;
1004                 else if (IN_CLASSB(haddr))
1005                         rc = 16;
1006                 else if (IN_CLASSC(haddr))
1007                         rc = 24;
1008                 else if (IN_CLASSE(haddr))
1009                         rc = 32;
1010         }
1011
1012         return rc;
1013 }
1014
1015
1016 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1017 {
1018         struct sockaddr_in sin_orig;
1019         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1020         struct in_ifaddr __rcu **ifap = NULL;
1021         struct in_device *in_dev;
1022         struct in_ifaddr *ifa = NULL;
1023         struct net_device *dev;
1024         char *colon;
1025         int ret = -EFAULT;
1026         int tryaddrmatch = 0;
1027
1028         ifr->ifr_name[IFNAMSIZ - 1] = 0;
1029
1030         /* save original address for comparison */
1031         memcpy(&sin_orig, sin, sizeof(*sin));
1032
1033         colon = strchr(ifr->ifr_name, ':');
1034         if (colon)
1035                 *colon = 0;
1036
1037         dev_load(net, ifr->ifr_name);
1038
1039         switch (cmd) {
1040         case SIOCGIFADDR:       /* Get interface address */
1041         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1042         case SIOCGIFDSTADDR:    /* Get the destination address */
1043         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1044                 /* Note that these ioctls will not sleep,
1045                    so that we do not impose a lock.
1046                    One day we will be forced to put shlock here (I mean SMP)
1047                  */
1048                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1049                 memset(sin, 0, sizeof(*sin));
1050                 sin->sin_family = AF_INET;
1051                 break;
1052
1053         case SIOCSIFFLAGS:
1054                 ret = -EPERM;
1055                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1056                         goto out;
1057                 break;
1058         case SIOCSIFADDR:       /* Set interface address (and family) */
1059         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1060         case SIOCSIFDSTADDR:    /* Set the destination address */
1061         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1062                 ret = -EPERM;
1063                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1064                         goto out;
1065                 ret = -EINVAL;
1066                 if (sin->sin_family != AF_INET)
1067                         goto out;
1068                 break;
1069         default:
1070                 ret = -EINVAL;
1071                 goto out;
1072         }
1073
1074         rtnl_lock();
1075
1076         ret = -ENODEV;
1077         dev = __dev_get_by_name(net, ifr->ifr_name);
1078         if (!dev)
1079                 goto done;
1080
1081         if (colon)
1082                 *colon = ':';
1083
1084         in_dev = __in_dev_get_rtnl(dev);
1085         if (in_dev) {
1086                 if (tryaddrmatch) {
1087                         /* Matthias Andree */
1088                         /* compare label and address (4.4BSD style) */
1089                         /* note: we only do this for a limited set of ioctls
1090                            and only if the original address family was AF_INET.
1091                            This is checked above. */
1092
1093                         for (ifap = &in_dev->ifa_list;
1094                              (ifa = rtnl_dereference(*ifap)) != NULL;
1095                              ifap = &ifa->ifa_next) {
1096                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1097                                     sin_orig.sin_addr.s_addr ==
1098                                                         ifa->ifa_local) {
1099                                         break; /* found */
1100                                 }
1101                         }
1102                 }
1103                 /* we didn't get a match, maybe the application is
1104                    4.3BSD-style and passed in junk so we fall back to
1105                    comparing just the label */
1106                 if (!ifa) {
1107                         for (ifap = &in_dev->ifa_list;
1108                              (ifa = rtnl_dereference(*ifap)) != NULL;
1109                              ifap = &ifa->ifa_next)
1110                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1111                                         break;
1112                 }
1113         }
1114
1115         ret = -EADDRNOTAVAIL;
1116         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1117                 goto done;
1118
1119         switch (cmd) {
1120         case SIOCGIFADDR:       /* Get interface address */
1121                 ret = 0;
1122                 sin->sin_addr.s_addr = ifa->ifa_local;
1123                 break;
1124
1125         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1126                 ret = 0;
1127                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1128                 break;
1129
1130         case SIOCGIFDSTADDR:    /* Get the destination address */
1131                 ret = 0;
1132                 sin->sin_addr.s_addr = ifa->ifa_address;
1133                 break;
1134
1135         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1136                 ret = 0;
1137                 sin->sin_addr.s_addr = ifa->ifa_mask;
1138                 break;
1139
1140         case SIOCSIFFLAGS:
1141                 if (colon) {
1142                         ret = -EADDRNOTAVAIL;
1143                         if (!ifa)
1144                                 break;
1145                         ret = 0;
1146                         if (!(ifr->ifr_flags & IFF_UP))
1147                                 inet_del_ifa(in_dev, ifap, 1);
1148                         break;
1149                 }
1150                 ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1151                 break;
1152
1153         case SIOCSIFADDR:       /* Set interface address (and family) */
1154                 ret = -EINVAL;
1155                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1156                         break;
1157
1158                 if (!ifa) {
1159                         ret = -ENOBUFS;
1160                         ifa = inet_alloc_ifa();
1161                         if (!ifa)
1162                                 break;
1163                         INIT_HLIST_NODE(&ifa->hash);
1164                         if (colon)
1165                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1166                         else
1167                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1168                 } else {
1169                         ret = 0;
1170                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1171                                 break;
1172                         inet_del_ifa(in_dev, ifap, 0);
1173                         ifa->ifa_broadcast = 0;
1174                         ifa->ifa_scope = 0;
1175                 }
1176
1177                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1178
1179                 if (!(dev->flags & IFF_POINTOPOINT)) {
1180                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1181                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1182                         if ((dev->flags & IFF_BROADCAST) &&
1183                             ifa->ifa_prefixlen < 31)
1184                                 ifa->ifa_broadcast = ifa->ifa_address |
1185                                                      ~ifa->ifa_mask;
1186                 } else {
1187                         ifa->ifa_prefixlen = 32;
1188                         ifa->ifa_mask = inet_make_mask(32);
1189                 }
1190                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1191                 ret = inet_set_ifa(dev, ifa);
1192                 break;
1193
1194         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1195                 ret = 0;
1196                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1197                         inet_del_ifa(in_dev, ifap, 0);
1198                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1199                         inet_insert_ifa(ifa);
1200                 }
1201                 break;
1202
1203         case SIOCSIFDSTADDR:    /* Set the destination address */
1204                 ret = 0;
1205                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1206                         break;
1207                 ret = -EINVAL;
1208                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1209                         break;
1210                 ret = 0;
1211                 inet_del_ifa(in_dev, ifap, 0);
1212                 ifa->ifa_address = sin->sin_addr.s_addr;
1213                 inet_insert_ifa(ifa);
1214                 break;
1215
1216         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1217
1218                 /*
1219                  *      The mask we set must be legal.
1220                  */
1221                 ret = -EINVAL;
1222                 if (bad_mask(sin->sin_addr.s_addr, 0))
1223                         break;
1224                 ret = 0;
1225                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1226                         __be32 old_mask = ifa->ifa_mask;
1227                         inet_del_ifa(in_dev, ifap, 0);
1228                         ifa->ifa_mask = sin->sin_addr.s_addr;
1229                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1230
1231                         /* See if current broadcast address matches
1232                          * with current netmask, then recalculate
1233                          * the broadcast address. Otherwise it's a
1234                          * funny address, so don't touch it since
1235                          * the user seems to know what (s)he's doing...
1236                          */
1237                         if ((dev->flags & IFF_BROADCAST) &&
1238                             (ifa->ifa_prefixlen < 31) &&
1239                             (ifa->ifa_broadcast ==
1240                              (ifa->ifa_local|~old_mask))) {
1241                                 ifa->ifa_broadcast = (ifa->ifa_local |
1242                                                       ~sin->sin_addr.s_addr);
1243                         }
1244                         inet_insert_ifa(ifa);
1245                 }
1246                 break;
1247         }
1248 done:
1249         rtnl_unlock();
1250 out:
1251         return ret;
1252 }
1253
1254 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1255 {
1256         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1257         const struct in_ifaddr *ifa;
1258         struct ifreq ifr;
1259         int done = 0;
1260
1261         if (WARN_ON(size > sizeof(struct ifreq)))
1262                 goto out;
1263
1264         if (!in_dev)
1265                 goto out;
1266
1267         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1268                 if (!buf) {
1269                         done += size;
1270                         continue;
1271                 }
1272                 if (len < size)
1273                         break;
1274                 memset(&ifr, 0, sizeof(struct ifreq));
1275                 strcpy(ifr.ifr_name, ifa->ifa_label);
1276
1277                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1278                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1279                                                                 ifa->ifa_local;
1280
1281                 if (copy_to_user(buf + done, &ifr, size)) {
1282                         done = -EFAULT;
1283                         break;
1284                 }
1285                 len  -= size;
1286                 done += size;
1287         }
1288 out:
1289         return done;
1290 }
1291
1292 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1293                                  int scope)
1294 {
1295         const struct in_ifaddr *ifa;
1296
1297         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1298                 if (ifa->ifa_flags & IFA_F_SECONDARY)
1299                         continue;
1300                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1301                     ifa->ifa_scope <= scope)
1302                         return ifa->ifa_local;
1303         }
1304
1305         return 0;
1306 }
1307
1308 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1309 {
1310         const struct in_ifaddr *ifa;
1311         __be32 addr = 0;
1312         unsigned char localnet_scope = RT_SCOPE_HOST;
1313         struct in_device *in_dev;
1314         struct net *net = dev_net(dev);
1315         int master_idx;
1316
1317         rcu_read_lock();
1318         in_dev = __in_dev_get_rcu(dev);
1319         if (!in_dev)
1320                 goto no_in_dev;
1321
1322         if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1323                 localnet_scope = RT_SCOPE_LINK;
1324
1325         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1326                 if (ifa->ifa_flags & IFA_F_SECONDARY)
1327                         continue;
1328                 if (min(ifa->ifa_scope, localnet_scope) > scope)
1329                         continue;
1330                 if (!dst || inet_ifa_match(dst, ifa)) {
1331                         addr = ifa->ifa_local;
1332                         break;
1333                 }
1334                 if (!addr)
1335                         addr = ifa->ifa_local;
1336         }
1337
1338         if (addr)
1339                 goto out_unlock;
1340 no_in_dev:
1341         master_idx = l3mdev_master_ifindex_rcu(dev);
1342
1343         /* For VRFs, the VRF device takes the place of the loopback device,
1344          * with addresses on it being preferred.  Note in such cases the
1345          * loopback device will be among the devices that fail the master_idx
1346          * equality check in the loop below.
1347          */
1348         if (master_idx &&
1349             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1350             (in_dev = __in_dev_get_rcu(dev))) {
1351                 addr = in_dev_select_addr(in_dev, scope);
1352                 if (addr)
1353                         goto out_unlock;
1354         }
1355
1356         /* Not loopback addresses on loopback should be preferred
1357            in this case. It is important that lo is the first interface
1358            in dev_base list.
1359          */
1360         for_each_netdev_rcu(net, dev) {
1361                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1362                         continue;
1363
1364                 in_dev = __in_dev_get_rcu(dev);
1365                 if (!in_dev)
1366                         continue;
1367
1368                 addr = in_dev_select_addr(in_dev, scope);
1369                 if (addr)
1370                         goto out_unlock;
1371         }
1372 out_unlock:
1373         rcu_read_unlock();
1374         return addr;
1375 }
1376 EXPORT_SYMBOL(inet_select_addr);
1377
1378 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1379                               __be32 local, int scope)
1380 {
1381         unsigned char localnet_scope = RT_SCOPE_HOST;
1382         const struct in_ifaddr *ifa;
1383         __be32 addr = 0;
1384         int same = 0;
1385
1386         if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1387                 localnet_scope = RT_SCOPE_LINK;
1388
1389         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1390                 unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1391
1392                 if (!addr &&
1393                     (local == ifa->ifa_local || !local) &&
1394                     min_scope <= scope) {
1395                         addr = ifa->ifa_local;
1396                         if (same)
1397                                 break;
1398                 }
1399                 if (!same) {
1400                         same = (!local || inet_ifa_match(local, ifa)) &&
1401                                 (!dst || inet_ifa_match(dst, ifa));
1402                         if (same && addr) {
1403                                 if (local || !dst)
1404                                         break;
1405                                 /* Is the selected addr into dst subnet? */
1406                                 if (inet_ifa_match(addr, ifa))
1407                                         break;
1408                                 /* No, then can we use new local src? */
1409                                 if (min_scope <= scope) {
1410                                         addr = ifa->ifa_local;
1411                                         break;
1412                                 }
1413                                 /* search for large dst subnet for addr */
1414                                 same = 0;
1415                         }
1416                 }
1417         }
1418
1419         return same ? addr : 0;
1420 }
1421
1422 /*
1423  * Confirm that local IP address exists using wildcards:
1424  * - net: netns to check, cannot be NULL
1425  * - in_dev: only on this interface, NULL=any interface
1426  * - dst: only in the same subnet as dst, 0=any dst
1427  * - local: address, 0=autoselect the local address
1428  * - scope: maximum allowed scope value for the local address
1429  */
1430 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1431                          __be32 dst, __be32 local, int scope)
1432 {
1433         __be32 addr = 0;
1434         struct net_device *dev;
1435
1436         if (in_dev)
1437                 return confirm_addr_indev(in_dev, dst, local, scope);
1438
1439         rcu_read_lock();
1440         for_each_netdev_rcu(net, dev) {
1441                 in_dev = __in_dev_get_rcu(dev);
1442                 if (in_dev) {
1443                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1444                         if (addr)
1445                                 break;
1446                 }
1447         }
1448         rcu_read_unlock();
1449
1450         return addr;
1451 }
1452 EXPORT_SYMBOL(inet_confirm_addr);
1453
1454 /*
1455  *      Device notifier
1456  */
1457
1458 int register_inetaddr_notifier(struct notifier_block *nb)
1459 {
1460         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1461 }
1462 EXPORT_SYMBOL(register_inetaddr_notifier);
1463
1464 int unregister_inetaddr_notifier(struct notifier_block *nb)
1465 {
1466         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1467 }
1468 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1469
1470 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1471 {
1472         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1473 }
1474 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1475
1476 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1477 {
1478         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1479             nb);
1480 }
1481 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1482
1483 /* Rename ifa_labels for a device name change. Make some effort to preserve
1484  * existing alias numbering and to create unique labels if possible.
1485 */
1486 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1487 {
1488         struct in_ifaddr *ifa;
1489         int named = 0;
1490
1491         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1492                 char old[IFNAMSIZ], *dot;
1493
1494                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1495                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1496                 if (named++ == 0)
1497                         goto skip;
1498                 dot = strchr(old, ':');
1499                 if (!dot) {
1500                         sprintf(old, ":%d", named);
1501                         dot = old;
1502                 }
1503                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1504                         strcat(ifa->ifa_label, dot);
1505                 else
1506                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1507 skip:
1508                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1509         }
1510 }
1511
1512 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1513                                         struct in_device *in_dev)
1514
1515 {
1516         const struct in_ifaddr *ifa;
1517
1518         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1519                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1520                          ifa->ifa_local, dev,
1521                          ifa->ifa_local, NULL,
1522                          dev->dev_addr, NULL);
1523         }
1524 }
1525
1526 /* Called only under RTNL semaphore */
1527
1528 static int inetdev_event(struct notifier_block *this, unsigned long event,
1529                          void *ptr)
1530 {
1531         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1532         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1533
1534         ASSERT_RTNL();
1535
1536         if (!in_dev) {
1537                 if (event == NETDEV_REGISTER) {
1538                         in_dev = inetdev_init(dev);
1539                         if (IS_ERR(in_dev))
1540                                 return notifier_from_errno(PTR_ERR(in_dev));
1541                         if (dev->flags & IFF_LOOPBACK) {
1542                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1543                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1544                         }
1545                 } else if (event == NETDEV_CHANGEMTU) {
1546                         /* Re-enabling IP */
1547                         if (inetdev_valid_mtu(dev->mtu))
1548                                 in_dev = inetdev_init(dev);
1549                 }
1550                 goto out;
1551         }
1552
1553         switch (event) {
1554         case NETDEV_REGISTER:
1555                 pr_debug("%s: bug\n", __func__);
1556                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1557                 break;
1558         case NETDEV_UP:
1559                 if (!inetdev_valid_mtu(dev->mtu))
1560                         break;
1561                 if (dev->flags & IFF_LOOPBACK) {
1562                         struct in_ifaddr *ifa = inet_alloc_ifa();
1563
1564                         if (ifa) {
1565                                 INIT_HLIST_NODE(&ifa->hash);
1566                                 ifa->ifa_local =
1567                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1568                                 ifa->ifa_prefixlen = 8;
1569                                 ifa->ifa_mask = inet_make_mask(8);
1570                                 in_dev_hold(in_dev);
1571                                 ifa->ifa_dev = in_dev;
1572                                 ifa->ifa_scope = RT_SCOPE_HOST;
1573                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1574                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1575                                                  INFINITY_LIFE_TIME);
1576                                 ipv4_devconf_setall(in_dev);
1577                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1578                                 inet_insert_ifa(ifa);
1579                         }
1580                 }
1581                 ip_mc_up(in_dev);
1582                 fallthrough;
1583         case NETDEV_CHANGEADDR:
1584                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1585                         break;
1586                 fallthrough;
1587         case NETDEV_NOTIFY_PEERS:
1588                 /* Send gratuitous ARP to notify of link change */
1589                 inetdev_send_gratuitous_arp(dev, in_dev);
1590                 break;
1591         case NETDEV_DOWN:
1592                 ip_mc_down(in_dev);
1593                 break;
1594         case NETDEV_PRE_TYPE_CHANGE:
1595                 ip_mc_unmap(in_dev);
1596                 break;
1597         case NETDEV_POST_TYPE_CHANGE:
1598                 ip_mc_remap(in_dev);
1599                 break;
1600         case NETDEV_CHANGEMTU:
1601                 if (inetdev_valid_mtu(dev->mtu))
1602                         break;
1603                 /* disable IP when MTU is not enough */
1604                 fallthrough;
1605         case NETDEV_UNREGISTER:
1606                 inetdev_destroy(in_dev);
1607                 break;
1608         case NETDEV_CHANGENAME:
1609                 /* Do not notify about label change, this event is
1610                  * not interesting to applications using netlink.
1611                  */
1612                 inetdev_changename(dev, in_dev);
1613
1614                 devinet_sysctl_unregister(in_dev);
1615                 devinet_sysctl_register(in_dev);
1616                 break;
1617         }
1618 out:
1619         return NOTIFY_DONE;
1620 }
1621
1622 static struct notifier_block ip_netdev_notifier = {
1623         .notifier_call = inetdev_event,
1624 };
1625
1626 static size_t inet_nlmsg_size(void)
1627 {
1628         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1629                + nla_total_size(4) /* IFA_ADDRESS */
1630                + nla_total_size(4) /* IFA_LOCAL */
1631                + nla_total_size(4) /* IFA_BROADCAST */
1632                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1633                + nla_total_size(4)  /* IFA_FLAGS */
1634                + nla_total_size(1)  /* IFA_PROTO */
1635                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1636                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1637 }
1638
1639 static inline u32 cstamp_delta(unsigned long cstamp)
1640 {
1641         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1642 }
1643
1644 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1645                          unsigned long tstamp, u32 preferred, u32 valid)
1646 {
1647         struct ifa_cacheinfo ci;
1648
1649         ci.cstamp = cstamp_delta(cstamp);
1650         ci.tstamp = cstamp_delta(tstamp);
1651         ci.ifa_prefered = preferred;
1652         ci.ifa_valid = valid;
1653
1654         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1655 }
1656
1657 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1658                             struct inet_fill_args *args)
1659 {
1660         struct ifaddrmsg *ifm;
1661         struct nlmsghdr  *nlh;
1662         u32 preferred, valid;
1663
1664         nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1665                         args->flags);
1666         if (!nlh)
1667                 return -EMSGSIZE;
1668
1669         ifm = nlmsg_data(nlh);
1670         ifm->ifa_family = AF_INET;
1671         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1672         ifm->ifa_flags = ifa->ifa_flags;
1673         ifm->ifa_scope = ifa->ifa_scope;
1674         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1675
1676         if (args->netnsid >= 0 &&
1677             nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1678                 goto nla_put_failure;
1679
1680         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1681                 preferred = ifa->ifa_preferred_lft;
1682                 valid = ifa->ifa_valid_lft;
1683                 if (preferred != INFINITY_LIFE_TIME) {
1684                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1685
1686                         if (preferred > tval)
1687                                 preferred -= tval;
1688                         else
1689                                 preferred = 0;
1690                         if (valid != INFINITY_LIFE_TIME) {
1691                                 if (valid > tval)
1692                                         valid -= tval;
1693                                 else
1694                                         valid = 0;
1695                         }
1696                 }
1697         } else {
1698                 preferred = INFINITY_LIFE_TIME;
1699                 valid = INFINITY_LIFE_TIME;
1700         }
1701         if ((ifa->ifa_address &&
1702              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1703             (ifa->ifa_local &&
1704              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1705             (ifa->ifa_broadcast &&
1706              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1707             (ifa->ifa_label[0] &&
1708              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1709             (ifa->ifa_proto &&
1710              nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1711             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1712             (ifa->ifa_rt_priority &&
1713              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1714             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1715                           preferred, valid))
1716                 goto nla_put_failure;
1717
1718         nlmsg_end(skb, nlh);
1719         return 0;
1720
1721 nla_put_failure:
1722         nlmsg_cancel(skb, nlh);
1723         return -EMSGSIZE;
1724 }
1725
1726 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1727                                       struct inet_fill_args *fillargs,
1728                                       struct net **tgt_net, struct sock *sk,
1729                                       struct netlink_callback *cb)
1730 {
1731         struct netlink_ext_ack *extack = cb->extack;
1732         struct nlattr *tb[IFA_MAX+1];
1733         struct ifaddrmsg *ifm;
1734         int err, i;
1735
1736         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1737                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1738                 return -EINVAL;
1739         }
1740
1741         ifm = nlmsg_data(nlh);
1742         if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1743                 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1744                 return -EINVAL;
1745         }
1746
1747         fillargs->ifindex = ifm->ifa_index;
1748         if (fillargs->ifindex) {
1749                 cb->answer_flags |= NLM_F_DUMP_FILTERED;
1750                 fillargs->flags |= NLM_F_DUMP_FILTERED;
1751         }
1752
1753         err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1754                                             ifa_ipv4_policy, extack);
1755         if (err < 0)
1756                 return err;
1757
1758         for (i = 0; i <= IFA_MAX; ++i) {
1759                 if (!tb[i])
1760                         continue;
1761
1762                 if (i == IFA_TARGET_NETNSID) {
1763                         struct net *net;
1764
1765                         fillargs->netnsid = nla_get_s32(tb[i]);
1766
1767                         net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1768                         if (IS_ERR(net)) {
1769                                 fillargs->netnsid = -1;
1770                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1771                                 return PTR_ERR(net);
1772                         }
1773                         *tgt_net = net;
1774                 } else {
1775                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1776                         return -EINVAL;
1777                 }
1778         }
1779
1780         return 0;
1781 }
1782
1783 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1784                             struct netlink_callback *cb, int s_ip_idx,
1785                             struct inet_fill_args *fillargs)
1786 {
1787         struct in_ifaddr *ifa;
1788         int ip_idx = 0;
1789         int err;
1790
1791         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1792                 if (ip_idx < s_ip_idx) {
1793                         ip_idx++;
1794                         continue;
1795                 }
1796                 err = inet_fill_ifaddr(skb, ifa, fillargs);
1797                 if (err < 0)
1798                         goto done;
1799
1800                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1801                 ip_idx++;
1802         }
1803         err = 0;
1804
1805 done:
1806         cb->args[2] = ip_idx;
1807
1808         return err;
1809 }
1810
1811 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1812 {
1813         const struct nlmsghdr *nlh = cb->nlh;
1814         struct inet_fill_args fillargs = {
1815                 .portid = NETLINK_CB(cb->skb).portid,
1816                 .seq = nlh->nlmsg_seq,
1817                 .event = RTM_NEWADDR,
1818                 .flags = NLM_F_MULTI,
1819                 .netnsid = -1,
1820         };
1821         struct net *net = sock_net(skb->sk);
1822         struct net *tgt_net = net;
1823         int h, s_h;
1824         int idx, s_idx;
1825         int s_ip_idx;
1826         struct net_device *dev;
1827         struct in_device *in_dev;
1828         struct hlist_head *head;
1829         int err = 0;
1830
1831         s_h = cb->args[0];
1832         s_idx = idx = cb->args[1];
1833         s_ip_idx = cb->args[2];
1834
1835         if (cb->strict_check) {
1836                 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1837                                                  skb->sk, cb);
1838                 if (err < 0)
1839                         goto put_tgt_net;
1840
1841                 err = 0;
1842                 if (fillargs.ifindex) {
1843                         dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1844                         if (!dev) {
1845                                 err = -ENODEV;
1846                                 goto put_tgt_net;
1847                         }
1848
1849                         in_dev = __in_dev_get_rtnl(dev);
1850                         if (in_dev) {
1851                                 err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1852                                                        &fillargs);
1853                         }
1854                         goto put_tgt_net;
1855                 }
1856         }
1857
1858         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1859                 idx = 0;
1860                 head = &tgt_net->dev_index_head[h];
1861                 rcu_read_lock();
1862                 cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1863                           tgt_net->dev_base_seq;
1864                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1865                         if (idx < s_idx)
1866                                 goto cont;
1867                         if (h > s_h || idx > s_idx)
1868                                 s_ip_idx = 0;
1869                         in_dev = __in_dev_get_rcu(dev);
1870                         if (!in_dev)
1871                                 goto cont;
1872
1873                         err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1874                                                &fillargs);
1875                         if (err < 0) {
1876                                 rcu_read_unlock();
1877                                 goto done;
1878                         }
1879 cont:
1880                         idx++;
1881                 }
1882                 rcu_read_unlock();
1883         }
1884
1885 done:
1886         cb->args[0] = h;
1887         cb->args[1] = idx;
1888 put_tgt_net:
1889         if (fillargs.netnsid >= 0)
1890                 put_net(tgt_net);
1891
1892         return skb->len ? : err;
1893 }
1894
1895 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1896                       u32 portid)
1897 {
1898         struct inet_fill_args fillargs = {
1899                 .portid = portid,
1900                 .seq = nlh ? nlh->nlmsg_seq : 0,
1901                 .event = event,
1902                 .flags = 0,
1903                 .netnsid = -1,
1904         };
1905         struct sk_buff *skb;
1906         int err = -ENOBUFS;
1907         struct net *net;
1908
1909         net = dev_net(ifa->ifa_dev->dev);
1910         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1911         if (!skb)
1912                 goto errout;
1913
1914         err = inet_fill_ifaddr(skb, ifa, &fillargs);
1915         if (err < 0) {
1916                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1917                 WARN_ON(err == -EMSGSIZE);
1918                 kfree_skb(skb);
1919                 goto errout;
1920         }
1921         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1922         return;
1923 errout:
1924         if (err < 0)
1925                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1926 }
1927
1928 static size_t inet_get_link_af_size(const struct net_device *dev,
1929                                     u32 ext_filter_mask)
1930 {
1931         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1932
1933         if (!in_dev)
1934                 return 0;
1935
1936         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1937 }
1938
1939 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1940                              u32 ext_filter_mask)
1941 {
1942         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1943         struct nlattr *nla;
1944         int i;
1945
1946         if (!in_dev)
1947                 return -ENODATA;
1948
1949         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1950         if (!nla)
1951                 return -EMSGSIZE;
1952
1953         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1954                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1955
1956         return 0;
1957 }
1958
1959 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1960         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1961 };
1962
1963 static int inet_validate_link_af(const struct net_device *dev,
1964                                  const struct nlattr *nla,
1965                                  struct netlink_ext_ack *extack)
1966 {
1967         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1968         int err, rem;
1969
1970         if (dev && !__in_dev_get_rtnl(dev))
1971                 return -EAFNOSUPPORT;
1972
1973         err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1974                                           inet_af_policy, extack);
1975         if (err < 0)
1976                 return err;
1977
1978         if (tb[IFLA_INET_CONF]) {
1979                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1980                         int cfgid = nla_type(a);
1981
1982                         if (nla_len(a) < 4)
1983                                 return -EINVAL;
1984
1985                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1986                                 return -EINVAL;
1987                 }
1988         }
1989
1990         return 0;
1991 }
1992
1993 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
1994                             struct netlink_ext_ack *extack)
1995 {
1996         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1997         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1998         int rem;
1999
2000         if (!in_dev)
2001                 return -EAFNOSUPPORT;
2002
2003         if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2004                 return -EINVAL;
2005
2006         if (tb[IFLA_INET_CONF]) {
2007                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2008                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2009         }
2010
2011         return 0;
2012 }
2013
2014 static int inet_netconf_msgsize_devconf(int type)
2015 {
2016         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2017                    + nla_total_size(4); /* NETCONFA_IFINDEX */
2018         bool all = false;
2019
2020         if (type == NETCONFA_ALL)
2021                 all = true;
2022
2023         if (all || type == NETCONFA_FORWARDING)
2024                 size += nla_total_size(4);
2025         if (all || type == NETCONFA_RP_FILTER)
2026                 size += nla_total_size(4);
2027         if (all || type == NETCONFA_MC_FORWARDING)
2028                 size += nla_total_size(4);
2029         if (all || type == NETCONFA_BC_FORWARDING)
2030                 size += nla_total_size(4);
2031         if (all || type == NETCONFA_PROXY_NEIGH)
2032                 size += nla_total_size(4);
2033         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2034                 size += nla_total_size(4);
2035
2036         return size;
2037 }
2038
2039 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2040                                      struct ipv4_devconf *devconf, u32 portid,
2041                                      u32 seq, int event, unsigned int flags,
2042                                      int type)
2043 {
2044         struct nlmsghdr  *nlh;
2045         struct netconfmsg *ncm;
2046         bool all = false;
2047
2048         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2049                         flags);
2050         if (!nlh)
2051                 return -EMSGSIZE;
2052
2053         if (type == NETCONFA_ALL)
2054                 all = true;
2055
2056         ncm = nlmsg_data(nlh);
2057         ncm->ncm_family = AF_INET;
2058
2059         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2060                 goto nla_put_failure;
2061
2062         if (!devconf)
2063                 goto out;
2064
2065         if ((all || type == NETCONFA_FORWARDING) &&
2066             nla_put_s32(skb, NETCONFA_FORWARDING,
2067                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2068                 goto nla_put_failure;
2069         if ((all || type == NETCONFA_RP_FILTER) &&
2070             nla_put_s32(skb, NETCONFA_RP_FILTER,
2071                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2072                 goto nla_put_failure;
2073         if ((all || type == NETCONFA_MC_FORWARDING) &&
2074             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2075                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2076                 goto nla_put_failure;
2077         if ((all || type == NETCONFA_BC_FORWARDING) &&
2078             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2079                         IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2080                 goto nla_put_failure;
2081         if ((all || type == NETCONFA_PROXY_NEIGH) &&
2082             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2083                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2084                 goto nla_put_failure;
2085         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2086             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2087                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2088                 goto nla_put_failure;
2089
2090 out:
2091         nlmsg_end(skb, nlh);
2092         return 0;
2093
2094 nla_put_failure:
2095         nlmsg_cancel(skb, nlh);
2096         return -EMSGSIZE;
2097 }
2098
2099 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2100                                  int ifindex, struct ipv4_devconf *devconf)
2101 {
2102         struct sk_buff *skb;
2103         int err = -ENOBUFS;
2104
2105         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2106         if (!skb)
2107                 goto errout;
2108
2109         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2110                                         event, 0, type);
2111         if (err < 0) {
2112                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2113                 WARN_ON(err == -EMSGSIZE);
2114                 kfree_skb(skb);
2115                 goto errout;
2116         }
2117         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2118         return;
2119 errout:
2120         if (err < 0)
2121                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2122 }
2123
2124 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2125         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
2126         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
2127         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
2128         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
2129         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
2130 };
2131
2132 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2133                                       const struct nlmsghdr *nlh,
2134                                       struct nlattr **tb,
2135                                       struct netlink_ext_ack *extack)
2136 {
2137         int i, err;
2138
2139         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2140                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2141                 return -EINVAL;
2142         }
2143
2144         if (!netlink_strict_get_check(skb))
2145                 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2146                                               tb, NETCONFA_MAX,
2147                                               devconf_ipv4_policy, extack);
2148
2149         err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2150                                             tb, NETCONFA_MAX,
2151                                             devconf_ipv4_policy, extack);
2152         if (err)
2153                 return err;
2154
2155         for (i = 0; i <= NETCONFA_MAX; i++) {
2156                 if (!tb[i])
2157                         continue;
2158
2159                 switch (i) {
2160                 case NETCONFA_IFINDEX:
2161                         break;
2162                 default:
2163                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2164                         return -EINVAL;
2165                 }
2166         }
2167
2168         return 0;
2169 }
2170
2171 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2172                                     struct nlmsghdr *nlh,
2173                                     struct netlink_ext_ack *extack)
2174 {
2175         struct net *net = sock_net(in_skb->sk);
2176         struct nlattr *tb[NETCONFA_MAX+1];
2177         struct sk_buff *skb;
2178         struct ipv4_devconf *devconf;
2179         struct in_device *in_dev;
2180         struct net_device *dev;
2181         int ifindex;
2182         int err;
2183
2184         err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2185         if (err)
2186                 goto errout;
2187
2188         err = -EINVAL;
2189         if (!tb[NETCONFA_IFINDEX])
2190                 goto errout;
2191
2192         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2193         switch (ifindex) {
2194         case NETCONFA_IFINDEX_ALL:
2195                 devconf = net->ipv4.devconf_all;
2196                 break;
2197         case NETCONFA_IFINDEX_DEFAULT:
2198                 devconf = net->ipv4.devconf_dflt;
2199                 break;
2200         default:
2201                 dev = __dev_get_by_index(net, ifindex);
2202                 if (!dev)
2203                         goto errout;
2204                 in_dev = __in_dev_get_rtnl(dev);
2205                 if (!in_dev)
2206                         goto errout;
2207                 devconf = &in_dev->cnf;
2208                 break;
2209         }
2210
2211         err = -ENOBUFS;
2212         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2213         if (!skb)
2214                 goto errout;
2215
2216         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2217                                         NETLINK_CB(in_skb).portid,
2218                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2219                                         NETCONFA_ALL);
2220         if (err < 0) {
2221                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2222                 WARN_ON(err == -EMSGSIZE);
2223                 kfree_skb(skb);
2224                 goto errout;
2225         }
2226         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2227 errout:
2228         return err;
2229 }
2230
2231 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2232                                      struct netlink_callback *cb)
2233 {
2234         const struct nlmsghdr *nlh = cb->nlh;
2235         struct net *net = sock_net(skb->sk);
2236         int h, s_h;
2237         int idx, s_idx;
2238         struct net_device *dev;
2239         struct in_device *in_dev;
2240         struct hlist_head *head;
2241
2242         if (cb->strict_check) {
2243                 struct netlink_ext_ack *extack = cb->extack;
2244                 struct netconfmsg *ncm;
2245
2246                 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2247                         NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2248                         return -EINVAL;
2249                 }
2250
2251                 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2252                         NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2253                         return -EINVAL;
2254                 }
2255         }
2256
2257         s_h = cb->args[0];
2258         s_idx = idx = cb->args[1];
2259
2260         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2261                 idx = 0;
2262                 head = &net->dev_index_head[h];
2263                 rcu_read_lock();
2264                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2265                           net->dev_base_seq;
2266                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2267                         if (idx < s_idx)
2268                                 goto cont;
2269                         in_dev = __in_dev_get_rcu(dev);
2270                         if (!in_dev)
2271                                 goto cont;
2272
2273                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2274                                                       &in_dev->cnf,
2275                                                       NETLINK_CB(cb->skb).portid,
2276                                                       nlh->nlmsg_seq,
2277                                                       RTM_NEWNETCONF,
2278                                                       NLM_F_MULTI,
2279                                                       NETCONFA_ALL) < 0) {
2280                                 rcu_read_unlock();
2281                                 goto done;
2282                         }
2283                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2284 cont:
2285                         idx++;
2286                 }
2287                 rcu_read_unlock();
2288         }
2289         if (h == NETDEV_HASHENTRIES) {
2290                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2291                                               net->ipv4.devconf_all,
2292                                               NETLINK_CB(cb->skb).portid,
2293                                               nlh->nlmsg_seq,
2294                                               RTM_NEWNETCONF, NLM_F_MULTI,
2295                                               NETCONFA_ALL) < 0)
2296                         goto done;
2297                 else
2298                         h++;
2299         }
2300         if (h == NETDEV_HASHENTRIES + 1) {
2301                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2302                                               net->ipv4.devconf_dflt,
2303                                               NETLINK_CB(cb->skb).portid,
2304                                               nlh->nlmsg_seq,
2305                                               RTM_NEWNETCONF, NLM_F_MULTI,
2306                                               NETCONFA_ALL) < 0)
2307                         goto done;
2308                 else
2309                         h++;
2310         }
2311 done:
2312         cb->args[0] = h;
2313         cb->args[1] = idx;
2314
2315         return skb->len;
2316 }
2317
2318 #ifdef CONFIG_SYSCTL
2319
2320 static void devinet_copy_dflt_conf(struct net *net, int i)
2321 {
2322         struct net_device *dev;
2323
2324         rcu_read_lock();
2325         for_each_netdev_rcu(net, dev) {
2326                 struct in_device *in_dev;
2327
2328                 in_dev = __in_dev_get_rcu(dev);
2329                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2330                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2331         }
2332         rcu_read_unlock();
2333 }
2334
2335 /* called with RTNL locked */
2336 static void inet_forward_change(struct net *net)
2337 {
2338         struct net_device *dev;
2339         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2340
2341         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2342         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2343         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2344                                     NETCONFA_FORWARDING,
2345                                     NETCONFA_IFINDEX_ALL,
2346                                     net->ipv4.devconf_all);
2347         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2348                                     NETCONFA_FORWARDING,
2349                                     NETCONFA_IFINDEX_DEFAULT,
2350                                     net->ipv4.devconf_dflt);
2351
2352         for_each_netdev(net, dev) {
2353                 struct in_device *in_dev;
2354
2355                 if (on)
2356                         dev_disable_lro(dev);
2357
2358                 in_dev = __in_dev_get_rtnl(dev);
2359                 if (in_dev) {
2360                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2361                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2362                                                     NETCONFA_FORWARDING,
2363                                                     dev->ifindex, &in_dev->cnf);
2364                 }
2365         }
2366 }
2367
2368 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2369 {
2370         if (cnf == net->ipv4.devconf_dflt)
2371                 return NETCONFA_IFINDEX_DEFAULT;
2372         else if (cnf == net->ipv4.devconf_all)
2373                 return NETCONFA_IFINDEX_ALL;
2374         else {
2375                 struct in_device *idev
2376                         = container_of(cnf, struct in_device, cnf);
2377                 return idev->dev->ifindex;
2378         }
2379 }
2380
2381 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2382                              void *buffer, size_t *lenp, loff_t *ppos)
2383 {
2384         int old_value = *(int *)ctl->data;
2385         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2386         int new_value = *(int *)ctl->data;
2387
2388         if (write) {
2389                 struct ipv4_devconf *cnf = ctl->extra1;
2390                 struct net *net = ctl->extra2;
2391                 int i = (int *)ctl->data - cnf->data;
2392                 int ifindex;
2393
2394                 set_bit(i, cnf->state);
2395
2396                 if (cnf == net->ipv4.devconf_dflt)
2397                         devinet_copy_dflt_conf(net, i);
2398                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2399                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2400                         if ((new_value == 0) && (old_value != 0))
2401                                 rt_cache_flush(net);
2402
2403                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2404                     new_value != old_value)
2405                         rt_cache_flush(net);
2406
2407                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2408                     new_value != old_value) {
2409                         ifindex = devinet_conf_ifindex(net, cnf);
2410                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2411                                                     NETCONFA_RP_FILTER,
2412                                                     ifindex, cnf);
2413                 }
2414                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2415                     new_value != old_value) {
2416                         ifindex = devinet_conf_ifindex(net, cnf);
2417                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2418                                                     NETCONFA_PROXY_NEIGH,
2419                                                     ifindex, cnf);
2420                 }
2421                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2422                     new_value != old_value) {
2423                         ifindex = devinet_conf_ifindex(net, cnf);
2424                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2425                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2426                                                     ifindex, cnf);
2427                 }
2428         }
2429
2430         return ret;
2431 }
2432
2433 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2434                                   void *buffer, size_t *lenp, loff_t *ppos)
2435 {
2436         int *valp = ctl->data;
2437         int val = *valp;
2438         loff_t pos = *ppos;
2439         struct net *net = ctl->extra2;
2440         int ret;
2441
2442         if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2443                 return -EPERM;
2444
2445         ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2446
2447         if (write && *valp != val) {
2448                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2449                         if (!rtnl_trylock()) {
2450                                 /* Restore the original values before restarting */
2451                                 *valp = val;
2452                                 *ppos = pos;
2453                                 return restart_syscall();
2454                         }
2455                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2456                                 inet_forward_change(net);
2457                         } else {
2458                                 struct ipv4_devconf *cnf = ctl->extra1;
2459                                 struct in_device *idev =
2460                                         container_of(cnf, struct in_device, cnf);
2461                                 if (*valp)
2462                                         dev_disable_lro(idev->dev);
2463                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2464                                                             NETCONFA_FORWARDING,
2465                                                             idev->dev->ifindex,
2466                                                             cnf);
2467                         }
2468                         rtnl_unlock();
2469                         rt_cache_flush(net);
2470                 } else
2471                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2472                                                     NETCONFA_FORWARDING,
2473                                                     NETCONFA_IFINDEX_DEFAULT,
2474                                                     net->ipv4.devconf_dflt);
2475         }
2476
2477         return ret;
2478 }
2479
2480 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2481                                 void *buffer, size_t *lenp, loff_t *ppos)
2482 {
2483         int *valp = ctl->data;
2484         int val = *valp;
2485         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2486         struct net *net = ctl->extra2;
2487
2488         if (write && *valp != val)
2489                 rt_cache_flush(net);
2490
2491         return ret;
2492 }
2493
2494 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2495         { \
2496                 .procname       = name, \
2497                 .data           = ipv4_devconf.data + \
2498                                   IPV4_DEVCONF_ ## attr - 1, \
2499                 .maxlen         = sizeof(int), \
2500                 .mode           = mval, \
2501                 .proc_handler   = proc, \
2502                 .extra1         = &ipv4_devconf, \
2503         }
2504
2505 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2506         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2507
2508 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2509         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2510
2511 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2512         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2513
2514 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2515         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2516
2517 static struct devinet_sysctl_table {
2518         struct ctl_table_header *sysctl_header;
2519         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2520 } devinet_sysctl = {
2521         .devinet_vars = {
2522                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2523                                              devinet_sysctl_forward),
2524                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2525                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2526
2527                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2528                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2529                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2530                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2531                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2532                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2533                                         "accept_source_route"),
2534                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2535                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2536                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2537                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2538                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2539                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2540                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2541                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2542                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2543                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2544                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2545                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2546                 DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2547                                         "arp_evict_nocarrier"),
2548                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2549                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2550                                         "force_igmp_version"),
2551                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2552                                         "igmpv2_unsolicited_report_interval"),
2553                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2554                                         "igmpv3_unsolicited_report_interval"),
2555                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2556                                         "ignore_routes_with_linkdown"),
2557                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2558                                         "drop_gratuitous_arp"),
2559
2560                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2561                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2562                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2563                                               "promote_secondaries"),
2564                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2565                                               "route_localnet"),
2566                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2567                                               "drop_unicast_in_l2_multicast"),
2568         },
2569 };
2570
2571 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2572                                      int ifindex, struct ipv4_devconf *p)
2573 {
2574         int i;
2575         struct devinet_sysctl_table *t;
2576         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2577
2578         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2579         if (!t)
2580                 goto out;
2581
2582         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2583                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2584                 t->devinet_vars[i].extra1 = p;
2585                 t->devinet_vars[i].extra2 = net;
2586         }
2587
2588         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2589
2590         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2591         if (!t->sysctl_header)
2592                 goto free;
2593
2594         p->sysctl = t;
2595
2596         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2597                                     ifindex, p);
2598         return 0;
2599
2600 free:
2601         kfree(t);
2602 out:
2603         return -ENOMEM;
2604 }
2605
2606 static void __devinet_sysctl_unregister(struct net *net,
2607                                         struct ipv4_devconf *cnf, int ifindex)
2608 {
2609         struct devinet_sysctl_table *t = cnf->sysctl;
2610
2611         if (t) {
2612                 cnf->sysctl = NULL;
2613                 unregister_net_sysctl_table(t->sysctl_header);
2614                 kfree(t);
2615         }
2616
2617         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2618 }
2619
2620 static int devinet_sysctl_register(struct in_device *idev)
2621 {
2622         int err;
2623
2624         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2625                 return -EINVAL;
2626
2627         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2628         if (err)
2629                 return err;
2630         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2631                                         idev->dev->ifindex, &idev->cnf);
2632         if (err)
2633                 neigh_sysctl_unregister(idev->arp_parms);
2634         return err;
2635 }
2636
2637 static void devinet_sysctl_unregister(struct in_device *idev)
2638 {
2639         struct net *net = dev_net(idev->dev);
2640
2641         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2642         neigh_sysctl_unregister(idev->arp_parms);
2643 }
2644
2645 static struct ctl_table ctl_forward_entry[] = {
2646         {
2647                 .procname       = "ip_forward",
2648                 .data           = &ipv4_devconf.data[
2649                                         IPV4_DEVCONF_FORWARDING - 1],
2650                 .maxlen         = sizeof(int),
2651                 .mode           = 0644,
2652                 .proc_handler   = devinet_sysctl_forward,
2653                 .extra1         = &ipv4_devconf,
2654                 .extra2         = &init_net,
2655         },
2656         { },
2657 };
2658 #endif
2659
2660 static __net_init int devinet_init_net(struct net *net)
2661 {
2662         int err;
2663         struct ipv4_devconf *all, *dflt;
2664 #ifdef CONFIG_SYSCTL
2665         struct ctl_table *tbl;
2666         struct ctl_table_header *forw_hdr;
2667 #endif
2668
2669         err = -ENOMEM;
2670         all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2671         if (!all)
2672                 goto err_alloc_all;
2673
2674         dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2675         if (!dflt)
2676                 goto err_alloc_dflt;
2677
2678 #ifdef CONFIG_SYSCTL
2679         tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2680         if (!tbl)
2681                 goto err_alloc_ctl;
2682
2683         tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2684         tbl[0].extra1 = all;
2685         tbl[0].extra2 = net;
2686 #endif
2687
2688         if (!net_eq(net, &init_net)) {
2689                 switch (net_inherit_devconf()) {
2690                 case 3:
2691                         /* copy from the current netns */
2692                         memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2693                                sizeof(ipv4_devconf));
2694                         memcpy(dflt,
2695                                current->nsproxy->net_ns->ipv4.devconf_dflt,
2696                                sizeof(ipv4_devconf_dflt));
2697                         break;
2698                 case 0:
2699                 case 1:
2700                         /* copy from init_net */
2701                         memcpy(all, init_net.ipv4.devconf_all,
2702                                sizeof(ipv4_devconf));
2703                         memcpy(dflt, init_net.ipv4.devconf_dflt,
2704                                sizeof(ipv4_devconf_dflt));
2705                         break;
2706                 case 2:
2707                         /* use compiled values */
2708                         break;
2709                 }
2710         }
2711
2712 #ifdef CONFIG_SYSCTL
2713         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2714         if (err < 0)
2715                 goto err_reg_all;
2716
2717         err = __devinet_sysctl_register(net, "default",
2718                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2719         if (err < 0)
2720                 goto err_reg_dflt;
2721
2722         err = -ENOMEM;
2723         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2724         if (!forw_hdr)
2725                 goto err_reg_ctl;
2726         net->ipv4.forw_hdr = forw_hdr;
2727 #endif
2728
2729         net->ipv4.devconf_all = all;
2730         net->ipv4.devconf_dflt = dflt;
2731         return 0;
2732
2733 #ifdef CONFIG_SYSCTL
2734 err_reg_ctl:
2735         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2736 err_reg_dflt:
2737         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2738 err_reg_all:
2739         kfree(tbl);
2740 err_alloc_ctl:
2741 #endif
2742         kfree(dflt);
2743 err_alloc_dflt:
2744         kfree(all);
2745 err_alloc_all:
2746         return err;
2747 }
2748
2749 static __net_exit void devinet_exit_net(struct net *net)
2750 {
2751 #ifdef CONFIG_SYSCTL
2752         struct ctl_table *tbl;
2753
2754         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2755         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2756         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2757                                     NETCONFA_IFINDEX_DEFAULT);
2758         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2759                                     NETCONFA_IFINDEX_ALL);
2760         kfree(tbl);
2761 #endif
2762         kfree(net->ipv4.devconf_dflt);
2763         kfree(net->ipv4.devconf_all);
2764 }
2765
2766 static __net_initdata struct pernet_operations devinet_ops = {
2767         .init = devinet_init_net,
2768         .exit = devinet_exit_net,
2769 };
2770
2771 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2772         .family           = AF_INET,
2773         .fill_link_af     = inet_fill_link_af,
2774         .get_link_af_size = inet_get_link_af_size,
2775         .validate_link_af = inet_validate_link_af,
2776         .set_link_af      = inet_set_link_af,
2777 };
2778
2779 void __init devinet_init(void)
2780 {
2781         int i;
2782
2783         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2784                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2785
2786         register_pernet_subsys(&devinet_ops);
2787         register_netdevice_notifier(&ip_netdev_notifier);
2788
2789         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2790
2791         rtnl_af_register(&inet_af_ops);
2792
2793         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2794         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2795         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2796         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2797                       inet_netconf_dump_devconf, 0);
2798 }