ipv4: don't set IPv6 only flags to IPv4 addresses
[linux-block.git] / net / ipv4 / devinet.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      NET3    IP device support routines.
4  *
5  *      Derived from the IP parts of dev.c 1.0.19
6  *              Authors:        Ross Biro
7  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *      Additional Authors:
11  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *      Changes:
15  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
16  *                                      lists.
17  *              Cyrus Durgin:           updated for kmod
18  *              Matthias Andree:        in devinet_ioctl, compare label and
19  *                                      address (4.4BSD alias style support),
20  *                                      fall back to comparing just the label
21  *                                      if no match found.
22  */
23
24
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64
65 #define IPV6ONLY_FLAGS  \
66                 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67                  IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68                  IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78         },
79 };
80
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82         .data = {
83                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90         },
91 };
92
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97         [IFA_LOCAL]             = { .type = NLA_U32 },
98         [IFA_ADDRESS]           = { .type = NLA_U32 },
99         [IFA_BROADCAST]         = { .type = NLA_U32 },
100         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
102         [IFA_FLAGS]             = { .type = NLA_U32 },
103         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
104         [IFA_TARGET_NETNSID]    = { .type = NLA_S32 },
105 };
106
107 struct inet_fill_args {
108         u32 portid;
109         u32 seq;
110         int event;
111         unsigned int flags;
112         int netnsid;
113         int ifindex;
114 };
115
116 #define IN4_ADDR_HSIZE_SHIFT    8
117 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
118
119 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
120
121 static u32 inet_addr_hash(const struct net *net, __be32 addr)
122 {
123         u32 val = (__force u32) addr ^ net_hash_mix(net);
124
125         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
126 }
127
128 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
129 {
130         u32 hash = inet_addr_hash(net, ifa->ifa_local);
131
132         ASSERT_RTNL();
133         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
134 }
135
136 static void inet_hash_remove(struct in_ifaddr *ifa)
137 {
138         ASSERT_RTNL();
139         hlist_del_init_rcu(&ifa->hash);
140 }
141
142 /**
143  * __ip_dev_find - find the first device with a given source address.
144  * @net: the net namespace
145  * @addr: the source address
146  * @devref: if true, take a reference on the found device
147  *
148  * If a caller uses devref=false, it should be protected by RCU, or RTNL
149  */
150 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
151 {
152         struct net_device *result = NULL;
153         struct in_ifaddr *ifa;
154
155         rcu_read_lock();
156         ifa = inet_lookup_ifaddr_rcu(net, addr);
157         if (!ifa) {
158                 struct flowi4 fl4 = { .daddr = addr };
159                 struct fib_result res = { 0 };
160                 struct fib_table *local;
161
162                 /* Fallback to FIB local table so that communication
163                  * over loopback subnets work.
164                  */
165                 local = fib_get_table(net, RT_TABLE_LOCAL);
166                 if (local &&
167                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
168                     res.type == RTN_LOCAL)
169                         result = FIB_RES_DEV(res);
170         } else {
171                 result = ifa->ifa_dev->dev;
172         }
173         if (result && devref)
174                 dev_hold(result);
175         rcu_read_unlock();
176         return result;
177 }
178 EXPORT_SYMBOL(__ip_dev_find);
179
180 /* called under RCU lock */
181 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
182 {
183         u32 hash = inet_addr_hash(net, addr);
184         struct in_ifaddr *ifa;
185
186         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
187                 if (ifa->ifa_local == addr &&
188                     net_eq(dev_net(ifa->ifa_dev->dev), net))
189                         return ifa;
190
191         return NULL;
192 }
193
194 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
195
196 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
197 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
198 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
199                          int destroy);
200 #ifdef CONFIG_SYSCTL
201 static int devinet_sysctl_register(struct in_device *idev);
202 static void devinet_sysctl_unregister(struct in_device *idev);
203 #else
204 static int devinet_sysctl_register(struct in_device *idev)
205 {
206         return 0;
207 }
208 static void devinet_sysctl_unregister(struct in_device *idev)
209 {
210 }
211 #endif
212
213 /* Locks all the inet devices. */
214
215 static struct in_ifaddr *inet_alloc_ifa(void)
216 {
217         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
218 }
219
220 static void inet_rcu_free_ifa(struct rcu_head *head)
221 {
222         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
223         if (ifa->ifa_dev)
224                 in_dev_put(ifa->ifa_dev);
225         kfree(ifa);
226 }
227
228 static void inet_free_ifa(struct in_ifaddr *ifa)
229 {
230         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
231 }
232
233 void in_dev_finish_destroy(struct in_device *idev)
234 {
235         struct net_device *dev = idev->dev;
236
237         WARN_ON(idev->ifa_list);
238         WARN_ON(idev->mc_list);
239         kfree(rcu_dereference_protected(idev->mc_hash, 1));
240 #ifdef NET_REFCNT_DEBUG
241         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
242 #endif
243         dev_put(dev);
244         if (!idev->dead)
245                 pr_err("Freeing alive in_device %p\n", idev);
246         else
247                 kfree(idev);
248 }
249 EXPORT_SYMBOL(in_dev_finish_destroy);
250
251 static struct in_device *inetdev_init(struct net_device *dev)
252 {
253         struct in_device *in_dev;
254         int err = -ENOMEM;
255
256         ASSERT_RTNL();
257
258         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
259         if (!in_dev)
260                 goto out;
261         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
262                         sizeof(in_dev->cnf));
263         in_dev->cnf.sysctl = NULL;
264         in_dev->dev = dev;
265         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
266         if (!in_dev->arp_parms)
267                 goto out_kfree;
268         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
269                 dev_disable_lro(dev);
270         /* Reference in_dev->dev */
271         dev_hold(dev);
272         /* Account for reference dev->ip_ptr (below) */
273         refcount_set(&in_dev->refcnt, 1);
274
275         err = devinet_sysctl_register(in_dev);
276         if (err) {
277                 in_dev->dead = 1;
278                 in_dev_put(in_dev);
279                 in_dev = NULL;
280                 goto out;
281         }
282         ip_mc_init_dev(in_dev);
283         if (dev->flags & IFF_UP)
284                 ip_mc_up(in_dev);
285
286         /* we can receive as soon as ip_ptr is set -- do this last */
287         rcu_assign_pointer(dev->ip_ptr, in_dev);
288 out:
289         return in_dev ?: ERR_PTR(err);
290 out_kfree:
291         kfree(in_dev);
292         in_dev = NULL;
293         goto out;
294 }
295
296 static void in_dev_rcu_put(struct rcu_head *head)
297 {
298         struct in_device *idev = container_of(head, struct in_device, rcu_head);
299         in_dev_put(idev);
300 }
301
302 static void inetdev_destroy(struct in_device *in_dev)
303 {
304         struct in_ifaddr *ifa;
305         struct net_device *dev;
306
307         ASSERT_RTNL();
308
309         dev = in_dev->dev;
310
311         in_dev->dead = 1;
312
313         ip_mc_destroy_dev(in_dev);
314
315         while ((ifa = in_dev->ifa_list) != NULL) {
316                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
317                 inet_free_ifa(ifa);
318         }
319
320         RCU_INIT_POINTER(dev->ip_ptr, NULL);
321
322         devinet_sysctl_unregister(in_dev);
323         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
324         arp_ifdown(dev);
325
326         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
327 }
328
329 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
330 {
331         rcu_read_lock();
332         for_primary_ifa(in_dev) {
333                 if (inet_ifa_match(a, ifa)) {
334                         if (!b || inet_ifa_match(b, ifa)) {
335                                 rcu_read_unlock();
336                                 return 1;
337                         }
338                 }
339         } endfor_ifa(in_dev);
340         rcu_read_unlock();
341         return 0;
342 }
343
344 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
345                          int destroy, struct nlmsghdr *nlh, u32 portid)
346 {
347         struct in_ifaddr *promote = NULL;
348         struct in_ifaddr *ifa, *ifa1 = *ifap;
349         struct in_ifaddr *last_prim = in_dev->ifa_list;
350         struct in_ifaddr *prev_prom = NULL;
351         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
352
353         ASSERT_RTNL();
354
355         if (in_dev->dead)
356                 goto no_promotions;
357
358         /* 1. Deleting primary ifaddr forces deletion all secondaries
359          * unless alias promotion is set
360          **/
361
362         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
363                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
364
365                 while ((ifa = *ifap1) != NULL) {
366                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
367                             ifa1->ifa_scope <= ifa->ifa_scope)
368                                 last_prim = ifa;
369
370                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
371                             ifa1->ifa_mask != ifa->ifa_mask ||
372                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
373                                 ifap1 = &ifa->ifa_next;
374                                 prev_prom = ifa;
375                                 continue;
376                         }
377
378                         if (!do_promote) {
379                                 inet_hash_remove(ifa);
380                                 *ifap1 = ifa->ifa_next;
381
382                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
383                                 blocking_notifier_call_chain(&inetaddr_chain,
384                                                 NETDEV_DOWN, ifa);
385                                 inet_free_ifa(ifa);
386                         } else {
387                                 promote = ifa;
388                                 break;
389                         }
390                 }
391         }
392
393         /* On promotion all secondaries from subnet are changing
394          * the primary IP, we must remove all their routes silently
395          * and later to add them back with new prefsrc. Do this
396          * while all addresses are on the device list.
397          */
398         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
399                 if (ifa1->ifa_mask == ifa->ifa_mask &&
400                     inet_ifa_match(ifa1->ifa_address, ifa))
401                         fib_del_ifaddr(ifa, ifa1);
402         }
403
404 no_promotions:
405         /* 2. Unlink it */
406
407         *ifap = ifa1->ifa_next;
408         inet_hash_remove(ifa1);
409
410         /* 3. Announce address deletion */
411
412         /* Send message first, then call notifier.
413            At first sight, FIB update triggered by notifier
414            will refer to already deleted ifaddr, that could confuse
415            netlink listeners. It is not true: look, gated sees
416            that route deleted and if it still thinks that ifaddr
417            is valid, it will try to restore deleted routes... Grr.
418            So that, this order is correct.
419          */
420         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
421         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
422
423         if (promote) {
424                 struct in_ifaddr *next_sec = promote->ifa_next;
425
426                 if (prev_prom) {
427                         prev_prom->ifa_next = promote->ifa_next;
428                         promote->ifa_next = last_prim->ifa_next;
429                         last_prim->ifa_next = promote;
430                 }
431
432                 promote->ifa_flags &= ~IFA_F_SECONDARY;
433                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
434                 blocking_notifier_call_chain(&inetaddr_chain,
435                                 NETDEV_UP, promote);
436                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
437                         if (ifa1->ifa_mask != ifa->ifa_mask ||
438                             !inet_ifa_match(ifa1->ifa_address, ifa))
439                                         continue;
440                         fib_add_ifaddr(ifa);
441                 }
442
443         }
444         if (destroy)
445                 inet_free_ifa(ifa1);
446 }
447
448 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
449                          int destroy)
450 {
451         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
452 }
453
454 static void check_lifetime(struct work_struct *work);
455
456 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
457
458 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
459                              u32 portid, struct netlink_ext_ack *extack)
460 {
461         struct in_device *in_dev = ifa->ifa_dev;
462         struct in_ifaddr *ifa1, **ifap, **last_primary;
463         struct in_validator_info ivi;
464         int ret;
465
466         ASSERT_RTNL();
467
468         if (!ifa->ifa_local) {
469                 inet_free_ifa(ifa);
470                 return 0;
471         }
472
473         ifa->ifa_flags &= ~IFA_F_SECONDARY;
474         last_primary = &in_dev->ifa_list;
475
476         /* Don't set IPv6 only flags to IPv4 addresses */
477         ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
478
479         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
480              ifap = &ifa1->ifa_next) {
481                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
482                     ifa->ifa_scope <= ifa1->ifa_scope)
483                         last_primary = &ifa1->ifa_next;
484                 if (ifa1->ifa_mask == ifa->ifa_mask &&
485                     inet_ifa_match(ifa1->ifa_address, ifa)) {
486                         if (ifa1->ifa_local == ifa->ifa_local) {
487                                 inet_free_ifa(ifa);
488                                 return -EEXIST;
489                         }
490                         if (ifa1->ifa_scope != ifa->ifa_scope) {
491                                 inet_free_ifa(ifa);
492                                 return -EINVAL;
493                         }
494                         ifa->ifa_flags |= IFA_F_SECONDARY;
495                 }
496         }
497
498         /* Allow any devices that wish to register ifaddr validtors to weigh
499          * in now, before changes are committed.  The rntl lock is serializing
500          * access here, so the state should not change between a validator call
501          * and a final notify on commit.  This isn't invoked on promotion under
502          * the assumption that validators are checking the address itself, and
503          * not the flags.
504          */
505         ivi.ivi_addr = ifa->ifa_address;
506         ivi.ivi_dev = ifa->ifa_dev;
507         ivi.extack = extack;
508         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
509                                            NETDEV_UP, &ivi);
510         ret = notifier_to_errno(ret);
511         if (ret) {
512                 inet_free_ifa(ifa);
513                 return ret;
514         }
515
516         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
517                 prandom_seed((__force u32) ifa->ifa_local);
518                 ifap = last_primary;
519         }
520
521         ifa->ifa_next = *ifap;
522         *ifap = ifa;
523
524         inet_hash_insert(dev_net(in_dev->dev), ifa);
525
526         cancel_delayed_work(&check_lifetime_work);
527         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
528
529         /* Send message first, then call notifier.
530            Notifier will trigger FIB update, so that
531            listeners of netlink will know about new ifaddr */
532         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
533         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
534
535         return 0;
536 }
537
538 static int inet_insert_ifa(struct in_ifaddr *ifa)
539 {
540         return __inet_insert_ifa(ifa, NULL, 0, NULL);
541 }
542
543 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
544 {
545         struct in_device *in_dev = __in_dev_get_rtnl(dev);
546
547         ASSERT_RTNL();
548
549         if (!in_dev) {
550                 inet_free_ifa(ifa);
551                 return -ENOBUFS;
552         }
553         ipv4_devconf_setall(in_dev);
554         neigh_parms_data_state_setall(in_dev->arp_parms);
555         if (ifa->ifa_dev != in_dev) {
556                 WARN_ON(ifa->ifa_dev);
557                 in_dev_hold(in_dev);
558                 ifa->ifa_dev = in_dev;
559         }
560         if (ipv4_is_loopback(ifa->ifa_local))
561                 ifa->ifa_scope = RT_SCOPE_HOST;
562         return inet_insert_ifa(ifa);
563 }
564
565 /* Caller must hold RCU or RTNL :
566  * We dont take a reference on found in_device
567  */
568 struct in_device *inetdev_by_index(struct net *net, int ifindex)
569 {
570         struct net_device *dev;
571         struct in_device *in_dev = NULL;
572
573         rcu_read_lock();
574         dev = dev_get_by_index_rcu(net, ifindex);
575         if (dev)
576                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
577         rcu_read_unlock();
578         return in_dev;
579 }
580 EXPORT_SYMBOL(inetdev_by_index);
581
582 /* Called only from RTNL semaphored context. No locks. */
583
584 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
585                                     __be32 mask)
586 {
587         ASSERT_RTNL();
588
589         for_primary_ifa(in_dev) {
590                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
591                         return ifa;
592         } endfor_ifa(in_dev);
593         return NULL;
594 }
595
596 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
597 {
598         struct ip_mreqn mreq = {
599                 .imr_multiaddr.s_addr = ifa->ifa_address,
600                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
601         };
602         int ret;
603
604         ASSERT_RTNL();
605
606         lock_sock(sk);
607         if (join)
608                 ret = ip_mc_join_group(sk, &mreq);
609         else
610                 ret = ip_mc_leave_group(sk, &mreq);
611         release_sock(sk);
612
613         return ret;
614 }
615
616 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
617                             struct netlink_ext_ack *extack)
618 {
619         struct net *net = sock_net(skb->sk);
620         struct nlattr *tb[IFA_MAX+1];
621         struct in_device *in_dev;
622         struct ifaddrmsg *ifm;
623         struct in_ifaddr *ifa, **ifap;
624         int err = -EINVAL;
625
626         ASSERT_RTNL();
627
628         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
629                                      ifa_ipv4_policy, extack);
630         if (err < 0)
631                 goto errout;
632
633         ifm = nlmsg_data(nlh);
634         in_dev = inetdev_by_index(net, ifm->ifa_index);
635         if (!in_dev) {
636                 err = -ENODEV;
637                 goto errout;
638         }
639
640         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
641              ifap = &ifa->ifa_next) {
642                 if (tb[IFA_LOCAL] &&
643                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
644                         continue;
645
646                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
647                         continue;
648
649                 if (tb[IFA_ADDRESS] &&
650                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
651                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
652                         continue;
653
654                 if (ipv4_is_multicast(ifa->ifa_address))
655                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
656                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
657                 return 0;
658         }
659
660         err = -EADDRNOTAVAIL;
661 errout:
662         return err;
663 }
664
665 #define INFINITY_LIFE_TIME      0xFFFFFFFF
666
667 static void check_lifetime(struct work_struct *work)
668 {
669         unsigned long now, next, next_sec, next_sched;
670         struct in_ifaddr *ifa;
671         struct hlist_node *n;
672         int i;
673
674         now = jiffies;
675         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
676
677         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
678                 bool change_needed = false;
679
680                 rcu_read_lock();
681                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
682                         unsigned long age;
683
684                         if (ifa->ifa_flags & IFA_F_PERMANENT)
685                                 continue;
686
687                         /* We try to batch several events at once. */
688                         age = (now - ifa->ifa_tstamp +
689                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
690
691                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
692                             age >= ifa->ifa_valid_lft) {
693                                 change_needed = true;
694                         } else if (ifa->ifa_preferred_lft ==
695                                    INFINITY_LIFE_TIME) {
696                                 continue;
697                         } else if (age >= ifa->ifa_preferred_lft) {
698                                 if (time_before(ifa->ifa_tstamp +
699                                                 ifa->ifa_valid_lft * HZ, next))
700                                         next = ifa->ifa_tstamp +
701                                                ifa->ifa_valid_lft * HZ;
702
703                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
704                                         change_needed = true;
705                         } else if (time_before(ifa->ifa_tstamp +
706                                                ifa->ifa_preferred_lft * HZ,
707                                                next)) {
708                                 next = ifa->ifa_tstamp +
709                                        ifa->ifa_preferred_lft * HZ;
710                         }
711                 }
712                 rcu_read_unlock();
713                 if (!change_needed)
714                         continue;
715                 rtnl_lock();
716                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
717                         unsigned long age;
718
719                         if (ifa->ifa_flags & IFA_F_PERMANENT)
720                                 continue;
721
722                         /* We try to batch several events at once. */
723                         age = (now - ifa->ifa_tstamp +
724                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
725
726                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
727                             age >= ifa->ifa_valid_lft) {
728                                 struct in_ifaddr **ifap;
729
730                                 for (ifap = &ifa->ifa_dev->ifa_list;
731                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
732                                         if (*ifap == ifa) {
733                                                 inet_del_ifa(ifa->ifa_dev,
734                                                              ifap, 1);
735                                                 break;
736                                         }
737                                 }
738                         } else if (ifa->ifa_preferred_lft !=
739                                    INFINITY_LIFE_TIME &&
740                                    age >= ifa->ifa_preferred_lft &&
741                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
742                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
743                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
744                         }
745                 }
746                 rtnl_unlock();
747         }
748
749         next_sec = round_jiffies_up(next);
750         next_sched = next;
751
752         /* If rounded timeout is accurate enough, accept it. */
753         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
754                 next_sched = next_sec;
755
756         now = jiffies;
757         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
758         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
759                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
760
761         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
762                         next_sched - now);
763 }
764
765 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
766                              __u32 prefered_lft)
767 {
768         unsigned long timeout;
769
770         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
771
772         timeout = addrconf_timeout_fixup(valid_lft, HZ);
773         if (addrconf_finite_timeout(timeout))
774                 ifa->ifa_valid_lft = timeout;
775         else
776                 ifa->ifa_flags |= IFA_F_PERMANENT;
777
778         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
779         if (addrconf_finite_timeout(timeout)) {
780                 if (timeout == 0)
781                         ifa->ifa_flags |= IFA_F_DEPRECATED;
782                 ifa->ifa_preferred_lft = timeout;
783         }
784         ifa->ifa_tstamp = jiffies;
785         if (!ifa->ifa_cstamp)
786                 ifa->ifa_cstamp = ifa->ifa_tstamp;
787 }
788
789 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
790                                        __u32 *pvalid_lft, __u32 *pprefered_lft,
791                                        struct netlink_ext_ack *extack)
792 {
793         struct nlattr *tb[IFA_MAX+1];
794         struct in_ifaddr *ifa;
795         struct ifaddrmsg *ifm;
796         struct net_device *dev;
797         struct in_device *in_dev;
798         int err;
799
800         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
801                                      ifa_ipv4_policy, extack);
802         if (err < 0)
803                 goto errout;
804
805         ifm = nlmsg_data(nlh);
806         err = -EINVAL;
807         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
808                 goto errout;
809
810         dev = __dev_get_by_index(net, ifm->ifa_index);
811         err = -ENODEV;
812         if (!dev)
813                 goto errout;
814
815         in_dev = __in_dev_get_rtnl(dev);
816         err = -ENOBUFS;
817         if (!in_dev)
818                 goto errout;
819
820         ifa = inet_alloc_ifa();
821         if (!ifa)
822                 /*
823                  * A potential indev allocation can be left alive, it stays
824                  * assigned to its device and is destroy with it.
825                  */
826                 goto errout;
827
828         ipv4_devconf_setall(in_dev);
829         neigh_parms_data_state_setall(in_dev->arp_parms);
830         in_dev_hold(in_dev);
831
832         if (!tb[IFA_ADDRESS])
833                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
834
835         INIT_HLIST_NODE(&ifa->hash);
836         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
837         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
838         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
839                                          ifm->ifa_flags;
840         ifa->ifa_scope = ifm->ifa_scope;
841         ifa->ifa_dev = in_dev;
842
843         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
844         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
845
846         if (tb[IFA_BROADCAST])
847                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
848
849         if (tb[IFA_LABEL])
850                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
851         else
852                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
853
854         if (tb[IFA_RT_PRIORITY])
855                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
856
857         if (tb[IFA_CACHEINFO]) {
858                 struct ifa_cacheinfo *ci;
859
860                 ci = nla_data(tb[IFA_CACHEINFO]);
861                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
862                         err = -EINVAL;
863                         goto errout_free;
864                 }
865                 *pvalid_lft = ci->ifa_valid;
866                 *pprefered_lft = ci->ifa_prefered;
867         }
868
869         return ifa;
870
871 errout_free:
872         inet_free_ifa(ifa);
873 errout:
874         return ERR_PTR(err);
875 }
876
877 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
878 {
879         struct in_device *in_dev = ifa->ifa_dev;
880         struct in_ifaddr *ifa1, **ifap;
881
882         if (!ifa->ifa_local)
883                 return NULL;
884
885         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
886              ifap = &ifa1->ifa_next) {
887                 if (ifa1->ifa_mask == ifa->ifa_mask &&
888                     inet_ifa_match(ifa1->ifa_address, ifa) &&
889                     ifa1->ifa_local == ifa->ifa_local)
890                         return ifa1;
891         }
892         return NULL;
893 }
894
895 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
896                             struct netlink_ext_ack *extack)
897 {
898         struct net *net = sock_net(skb->sk);
899         struct in_ifaddr *ifa;
900         struct in_ifaddr *ifa_existing;
901         __u32 valid_lft = INFINITY_LIFE_TIME;
902         __u32 prefered_lft = INFINITY_LIFE_TIME;
903
904         ASSERT_RTNL();
905
906         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
907         if (IS_ERR(ifa))
908                 return PTR_ERR(ifa);
909
910         ifa_existing = find_matching_ifa(ifa);
911         if (!ifa_existing) {
912                 /* It would be best to check for !NLM_F_CREATE here but
913                  * userspace already relies on not having to provide this.
914                  */
915                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
916                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
917                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
918                                                true, ifa);
919
920                         if (ret < 0) {
921                                 inet_free_ifa(ifa);
922                                 return ret;
923                         }
924                 }
925                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
926                                          extack);
927         } else {
928                 u32 new_metric = ifa->ifa_rt_priority;
929
930                 inet_free_ifa(ifa);
931
932                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
933                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
934                         return -EEXIST;
935                 ifa = ifa_existing;
936
937                 if (ifa->ifa_rt_priority != new_metric) {
938                         fib_modify_prefix_metric(ifa, new_metric);
939                         ifa->ifa_rt_priority = new_metric;
940                 }
941
942                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
943                 cancel_delayed_work(&check_lifetime_work);
944                 queue_delayed_work(system_power_efficient_wq,
945                                 &check_lifetime_work, 0);
946                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
947         }
948         return 0;
949 }
950
951 /*
952  *      Determine a default network mask, based on the IP address.
953  */
954
955 static int inet_abc_len(__be32 addr)
956 {
957         int rc = -1;    /* Something else, probably a multicast. */
958
959         if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
960                 rc = 0;
961         else {
962                 __u32 haddr = ntohl(addr);
963                 if (IN_CLASSA(haddr))
964                         rc = 8;
965                 else if (IN_CLASSB(haddr))
966                         rc = 16;
967                 else if (IN_CLASSC(haddr))
968                         rc = 24;
969                 else if (IN_CLASSE(haddr))
970                         rc = 32;
971         }
972
973         return rc;
974 }
975
976
977 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
978 {
979         struct sockaddr_in sin_orig;
980         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
981         struct in_device *in_dev;
982         struct in_ifaddr **ifap = NULL;
983         struct in_ifaddr *ifa = NULL;
984         struct net_device *dev;
985         char *colon;
986         int ret = -EFAULT;
987         int tryaddrmatch = 0;
988
989         ifr->ifr_name[IFNAMSIZ - 1] = 0;
990
991         /* save original address for comparison */
992         memcpy(&sin_orig, sin, sizeof(*sin));
993
994         colon = strchr(ifr->ifr_name, ':');
995         if (colon)
996                 *colon = 0;
997
998         dev_load(net, ifr->ifr_name);
999
1000         switch (cmd) {
1001         case SIOCGIFADDR:       /* Get interface address */
1002         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1003         case SIOCGIFDSTADDR:    /* Get the destination address */
1004         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1005                 /* Note that these ioctls will not sleep,
1006                    so that we do not impose a lock.
1007                    One day we will be forced to put shlock here (I mean SMP)
1008                  */
1009                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1010                 memset(sin, 0, sizeof(*sin));
1011                 sin->sin_family = AF_INET;
1012                 break;
1013
1014         case SIOCSIFFLAGS:
1015                 ret = -EPERM;
1016                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1017                         goto out;
1018                 break;
1019         case SIOCSIFADDR:       /* Set interface address (and family) */
1020         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1021         case SIOCSIFDSTADDR:    /* Set the destination address */
1022         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1023                 ret = -EPERM;
1024                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1025                         goto out;
1026                 ret = -EINVAL;
1027                 if (sin->sin_family != AF_INET)
1028                         goto out;
1029                 break;
1030         default:
1031                 ret = -EINVAL;
1032                 goto out;
1033         }
1034
1035         rtnl_lock();
1036
1037         ret = -ENODEV;
1038         dev = __dev_get_by_name(net, ifr->ifr_name);
1039         if (!dev)
1040                 goto done;
1041
1042         if (colon)
1043                 *colon = ':';
1044
1045         in_dev = __in_dev_get_rtnl(dev);
1046         if (in_dev) {
1047                 if (tryaddrmatch) {
1048                         /* Matthias Andree */
1049                         /* compare label and address (4.4BSD style) */
1050                         /* note: we only do this for a limited set of ioctls
1051                            and only if the original address family was AF_INET.
1052                            This is checked above. */
1053                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1054                              ifap = &ifa->ifa_next) {
1055                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1056                                     sin_orig.sin_addr.s_addr ==
1057                                                         ifa->ifa_local) {
1058                                         break; /* found */
1059                                 }
1060                         }
1061                 }
1062                 /* we didn't get a match, maybe the application is
1063                    4.3BSD-style and passed in junk so we fall back to
1064                    comparing just the label */
1065                 if (!ifa) {
1066                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1067                              ifap = &ifa->ifa_next)
1068                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1069                                         break;
1070                 }
1071         }
1072
1073         ret = -EADDRNOTAVAIL;
1074         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1075                 goto done;
1076
1077         switch (cmd) {
1078         case SIOCGIFADDR:       /* Get interface address */
1079                 ret = 0;
1080                 sin->sin_addr.s_addr = ifa->ifa_local;
1081                 break;
1082
1083         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1084                 ret = 0;
1085                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1086                 break;
1087
1088         case SIOCGIFDSTADDR:    /* Get the destination address */
1089                 ret = 0;
1090                 sin->sin_addr.s_addr = ifa->ifa_address;
1091                 break;
1092
1093         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1094                 ret = 0;
1095                 sin->sin_addr.s_addr = ifa->ifa_mask;
1096                 break;
1097
1098         case SIOCSIFFLAGS:
1099                 if (colon) {
1100                         ret = -EADDRNOTAVAIL;
1101                         if (!ifa)
1102                                 break;
1103                         ret = 0;
1104                         if (!(ifr->ifr_flags & IFF_UP))
1105                                 inet_del_ifa(in_dev, ifap, 1);
1106                         break;
1107                 }
1108                 ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1109                 break;
1110
1111         case SIOCSIFADDR:       /* Set interface address (and family) */
1112                 ret = -EINVAL;
1113                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1114                         break;
1115
1116                 if (!ifa) {
1117                         ret = -ENOBUFS;
1118                         ifa = inet_alloc_ifa();
1119                         if (!ifa)
1120                                 break;
1121                         INIT_HLIST_NODE(&ifa->hash);
1122                         if (colon)
1123                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1124                         else
1125                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1126                 } else {
1127                         ret = 0;
1128                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1129                                 break;
1130                         inet_del_ifa(in_dev, ifap, 0);
1131                         ifa->ifa_broadcast = 0;
1132                         ifa->ifa_scope = 0;
1133                 }
1134
1135                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1136
1137                 if (!(dev->flags & IFF_POINTOPOINT)) {
1138                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1139                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1140                         if ((dev->flags & IFF_BROADCAST) &&
1141                             ifa->ifa_prefixlen < 31)
1142                                 ifa->ifa_broadcast = ifa->ifa_address |
1143                                                      ~ifa->ifa_mask;
1144                 } else {
1145                         ifa->ifa_prefixlen = 32;
1146                         ifa->ifa_mask = inet_make_mask(32);
1147                 }
1148                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1149                 ret = inet_set_ifa(dev, ifa);
1150                 break;
1151
1152         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1153                 ret = 0;
1154                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1155                         inet_del_ifa(in_dev, ifap, 0);
1156                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1157                         inet_insert_ifa(ifa);
1158                 }
1159                 break;
1160
1161         case SIOCSIFDSTADDR:    /* Set the destination address */
1162                 ret = 0;
1163                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1164                         break;
1165                 ret = -EINVAL;
1166                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1167                         break;
1168                 ret = 0;
1169                 inet_del_ifa(in_dev, ifap, 0);
1170                 ifa->ifa_address = sin->sin_addr.s_addr;
1171                 inet_insert_ifa(ifa);
1172                 break;
1173
1174         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1175
1176                 /*
1177                  *      The mask we set must be legal.
1178                  */
1179                 ret = -EINVAL;
1180                 if (bad_mask(sin->sin_addr.s_addr, 0))
1181                         break;
1182                 ret = 0;
1183                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1184                         __be32 old_mask = ifa->ifa_mask;
1185                         inet_del_ifa(in_dev, ifap, 0);
1186                         ifa->ifa_mask = sin->sin_addr.s_addr;
1187                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1188
1189                         /* See if current broadcast address matches
1190                          * with current netmask, then recalculate
1191                          * the broadcast address. Otherwise it's a
1192                          * funny address, so don't touch it since
1193                          * the user seems to know what (s)he's doing...
1194                          */
1195                         if ((dev->flags & IFF_BROADCAST) &&
1196                             (ifa->ifa_prefixlen < 31) &&
1197                             (ifa->ifa_broadcast ==
1198                              (ifa->ifa_local|~old_mask))) {
1199                                 ifa->ifa_broadcast = (ifa->ifa_local |
1200                                                       ~sin->sin_addr.s_addr);
1201                         }
1202                         inet_insert_ifa(ifa);
1203                 }
1204                 break;
1205         }
1206 done:
1207         rtnl_unlock();
1208 out:
1209         return ret;
1210 }
1211
1212 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1213 {
1214         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1215         struct in_ifaddr *ifa;
1216         struct ifreq ifr;
1217         int done = 0;
1218
1219         if (WARN_ON(size > sizeof(struct ifreq)))
1220                 goto out;
1221
1222         if (!in_dev)
1223                 goto out;
1224
1225         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1226                 if (!buf) {
1227                         done += size;
1228                         continue;
1229                 }
1230                 if (len < size)
1231                         break;
1232                 memset(&ifr, 0, sizeof(struct ifreq));
1233                 strcpy(ifr.ifr_name, ifa->ifa_label);
1234
1235                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1236                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1237                                                                 ifa->ifa_local;
1238
1239                 if (copy_to_user(buf + done, &ifr, size)) {
1240                         done = -EFAULT;
1241                         break;
1242                 }
1243                 len  -= size;
1244                 done += size;
1245         }
1246 out:
1247         return done;
1248 }
1249
1250 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1251                                  int scope)
1252 {
1253         for_primary_ifa(in_dev) {
1254                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1255                     ifa->ifa_scope <= scope)
1256                         return ifa->ifa_local;
1257         } endfor_ifa(in_dev);
1258
1259         return 0;
1260 }
1261
1262 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1263 {
1264         __be32 addr = 0;
1265         struct in_device *in_dev;
1266         struct net *net = dev_net(dev);
1267         int master_idx;
1268
1269         rcu_read_lock();
1270         in_dev = __in_dev_get_rcu(dev);
1271         if (!in_dev)
1272                 goto no_in_dev;
1273
1274         for_primary_ifa(in_dev) {
1275                 if (ifa->ifa_scope > scope)
1276                         continue;
1277                 if (!dst || inet_ifa_match(dst, ifa)) {
1278                         addr = ifa->ifa_local;
1279                         break;
1280                 }
1281                 if (!addr)
1282                         addr = ifa->ifa_local;
1283         } endfor_ifa(in_dev);
1284
1285         if (addr)
1286                 goto out_unlock;
1287 no_in_dev:
1288         master_idx = l3mdev_master_ifindex_rcu(dev);
1289
1290         /* For VRFs, the VRF device takes the place of the loopback device,
1291          * with addresses on it being preferred.  Note in such cases the
1292          * loopback device will be among the devices that fail the master_idx
1293          * equality check in the loop below.
1294          */
1295         if (master_idx &&
1296             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1297             (in_dev = __in_dev_get_rcu(dev))) {
1298                 addr = in_dev_select_addr(in_dev, scope);
1299                 if (addr)
1300                         goto out_unlock;
1301         }
1302
1303         /* Not loopback addresses on loopback should be preferred
1304            in this case. It is important that lo is the first interface
1305            in dev_base list.
1306          */
1307         for_each_netdev_rcu(net, dev) {
1308                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1309                         continue;
1310
1311                 in_dev = __in_dev_get_rcu(dev);
1312                 if (!in_dev)
1313                         continue;
1314
1315                 addr = in_dev_select_addr(in_dev, scope);
1316                 if (addr)
1317                         goto out_unlock;
1318         }
1319 out_unlock:
1320         rcu_read_unlock();
1321         return addr;
1322 }
1323 EXPORT_SYMBOL(inet_select_addr);
1324
1325 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1326                               __be32 local, int scope)
1327 {
1328         int same = 0;
1329         __be32 addr = 0;
1330
1331         for_ifa(in_dev) {
1332                 if (!addr &&
1333                     (local == ifa->ifa_local || !local) &&
1334                     ifa->ifa_scope <= scope) {
1335                         addr = ifa->ifa_local;
1336                         if (same)
1337                                 break;
1338                 }
1339                 if (!same) {
1340                         same = (!local || inet_ifa_match(local, ifa)) &&
1341                                 (!dst || inet_ifa_match(dst, ifa));
1342                         if (same && addr) {
1343                                 if (local || !dst)
1344                                         break;
1345                                 /* Is the selected addr into dst subnet? */
1346                                 if (inet_ifa_match(addr, ifa))
1347                                         break;
1348                                 /* No, then can we use new local src? */
1349                                 if (ifa->ifa_scope <= scope) {
1350                                         addr = ifa->ifa_local;
1351                                         break;
1352                                 }
1353                                 /* search for large dst subnet for addr */
1354                                 same = 0;
1355                         }
1356                 }
1357         } endfor_ifa(in_dev);
1358
1359         return same ? addr : 0;
1360 }
1361
1362 /*
1363  * Confirm that local IP address exists using wildcards:
1364  * - net: netns to check, cannot be NULL
1365  * - in_dev: only on this interface, NULL=any interface
1366  * - dst: only in the same subnet as dst, 0=any dst
1367  * - local: address, 0=autoselect the local address
1368  * - scope: maximum allowed scope value for the local address
1369  */
1370 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1371                          __be32 dst, __be32 local, int scope)
1372 {
1373         __be32 addr = 0;
1374         struct net_device *dev;
1375
1376         if (in_dev)
1377                 return confirm_addr_indev(in_dev, dst, local, scope);
1378
1379         rcu_read_lock();
1380         for_each_netdev_rcu(net, dev) {
1381                 in_dev = __in_dev_get_rcu(dev);
1382                 if (in_dev) {
1383                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1384                         if (addr)
1385                                 break;
1386                 }
1387         }
1388         rcu_read_unlock();
1389
1390         return addr;
1391 }
1392 EXPORT_SYMBOL(inet_confirm_addr);
1393
1394 /*
1395  *      Device notifier
1396  */
1397
1398 int register_inetaddr_notifier(struct notifier_block *nb)
1399 {
1400         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1401 }
1402 EXPORT_SYMBOL(register_inetaddr_notifier);
1403
1404 int unregister_inetaddr_notifier(struct notifier_block *nb)
1405 {
1406         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1407 }
1408 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1409
1410 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1411 {
1412         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1413 }
1414 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1415
1416 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1417 {
1418         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1419             nb);
1420 }
1421 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1422
1423 /* Rename ifa_labels for a device name change. Make some effort to preserve
1424  * existing alias numbering and to create unique labels if possible.
1425 */
1426 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1427 {
1428         struct in_ifaddr *ifa;
1429         int named = 0;
1430
1431         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1432                 char old[IFNAMSIZ], *dot;
1433
1434                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1435                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1436                 if (named++ == 0)
1437                         goto skip;
1438                 dot = strchr(old, ':');
1439                 if (!dot) {
1440                         sprintf(old, ":%d", named);
1441                         dot = old;
1442                 }
1443                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1444                         strcat(ifa->ifa_label, dot);
1445                 else
1446                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1447 skip:
1448                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1449         }
1450 }
1451
1452 static bool inetdev_valid_mtu(unsigned int mtu)
1453 {
1454         return mtu >= IPV4_MIN_MTU;
1455 }
1456
1457 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1458                                         struct in_device *in_dev)
1459
1460 {
1461         struct in_ifaddr *ifa;
1462
1463         for (ifa = in_dev->ifa_list; ifa;
1464              ifa = ifa->ifa_next) {
1465                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1466                          ifa->ifa_local, dev,
1467                          ifa->ifa_local, NULL,
1468                          dev->dev_addr, NULL);
1469         }
1470 }
1471
1472 /* Called only under RTNL semaphore */
1473
1474 static int inetdev_event(struct notifier_block *this, unsigned long event,
1475                          void *ptr)
1476 {
1477         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1478         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1479
1480         ASSERT_RTNL();
1481
1482         if (!in_dev) {
1483                 if (event == NETDEV_REGISTER) {
1484                         in_dev = inetdev_init(dev);
1485                         if (IS_ERR(in_dev))
1486                                 return notifier_from_errno(PTR_ERR(in_dev));
1487                         if (dev->flags & IFF_LOOPBACK) {
1488                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1489                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1490                         }
1491                 } else if (event == NETDEV_CHANGEMTU) {
1492                         /* Re-enabling IP */
1493                         if (inetdev_valid_mtu(dev->mtu))
1494                                 in_dev = inetdev_init(dev);
1495                 }
1496                 goto out;
1497         }
1498
1499         switch (event) {
1500         case NETDEV_REGISTER:
1501                 pr_debug("%s: bug\n", __func__);
1502                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1503                 break;
1504         case NETDEV_UP:
1505                 if (!inetdev_valid_mtu(dev->mtu))
1506                         break;
1507                 if (dev->flags & IFF_LOOPBACK) {
1508                         struct in_ifaddr *ifa = inet_alloc_ifa();
1509
1510                         if (ifa) {
1511                                 INIT_HLIST_NODE(&ifa->hash);
1512                                 ifa->ifa_local =
1513                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1514                                 ifa->ifa_prefixlen = 8;
1515                                 ifa->ifa_mask = inet_make_mask(8);
1516                                 in_dev_hold(in_dev);
1517                                 ifa->ifa_dev = in_dev;
1518                                 ifa->ifa_scope = RT_SCOPE_HOST;
1519                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1520                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1521                                                  INFINITY_LIFE_TIME);
1522                                 ipv4_devconf_setall(in_dev);
1523                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1524                                 inet_insert_ifa(ifa);
1525                         }
1526                 }
1527                 ip_mc_up(in_dev);
1528                 /* fall through */
1529         case NETDEV_CHANGEADDR:
1530                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1531                         break;
1532                 /* fall through */
1533         case NETDEV_NOTIFY_PEERS:
1534                 /* Send gratuitous ARP to notify of link change */
1535                 inetdev_send_gratuitous_arp(dev, in_dev);
1536                 break;
1537         case NETDEV_DOWN:
1538                 ip_mc_down(in_dev);
1539                 break;
1540         case NETDEV_PRE_TYPE_CHANGE:
1541                 ip_mc_unmap(in_dev);
1542                 break;
1543         case NETDEV_POST_TYPE_CHANGE:
1544                 ip_mc_remap(in_dev);
1545                 break;
1546         case NETDEV_CHANGEMTU:
1547                 if (inetdev_valid_mtu(dev->mtu))
1548                         break;
1549                 /* disable IP when MTU is not enough */
1550                 /* fall through */
1551         case NETDEV_UNREGISTER:
1552                 inetdev_destroy(in_dev);
1553                 break;
1554         case NETDEV_CHANGENAME:
1555                 /* Do not notify about label change, this event is
1556                  * not interesting to applications using netlink.
1557                  */
1558                 inetdev_changename(dev, in_dev);
1559
1560                 devinet_sysctl_unregister(in_dev);
1561                 devinet_sysctl_register(in_dev);
1562                 break;
1563         }
1564 out:
1565         return NOTIFY_DONE;
1566 }
1567
1568 static struct notifier_block ip_netdev_notifier = {
1569         .notifier_call = inetdev_event,
1570 };
1571
1572 static size_t inet_nlmsg_size(void)
1573 {
1574         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1575                + nla_total_size(4) /* IFA_ADDRESS */
1576                + nla_total_size(4) /* IFA_LOCAL */
1577                + nla_total_size(4) /* IFA_BROADCAST */
1578                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1579                + nla_total_size(4)  /* IFA_FLAGS */
1580                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1581                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1582 }
1583
1584 static inline u32 cstamp_delta(unsigned long cstamp)
1585 {
1586         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1587 }
1588
1589 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1590                          unsigned long tstamp, u32 preferred, u32 valid)
1591 {
1592         struct ifa_cacheinfo ci;
1593
1594         ci.cstamp = cstamp_delta(cstamp);
1595         ci.tstamp = cstamp_delta(tstamp);
1596         ci.ifa_prefered = preferred;
1597         ci.ifa_valid = valid;
1598
1599         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1600 }
1601
1602 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1603                             struct inet_fill_args *args)
1604 {
1605         struct ifaddrmsg *ifm;
1606         struct nlmsghdr  *nlh;
1607         u32 preferred, valid;
1608
1609         nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1610                         args->flags);
1611         if (!nlh)
1612                 return -EMSGSIZE;
1613
1614         ifm = nlmsg_data(nlh);
1615         ifm->ifa_family = AF_INET;
1616         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1617         ifm->ifa_flags = ifa->ifa_flags;
1618         ifm->ifa_scope = ifa->ifa_scope;
1619         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1620
1621         if (args->netnsid >= 0 &&
1622             nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1623                 goto nla_put_failure;
1624
1625         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1626                 preferred = ifa->ifa_preferred_lft;
1627                 valid = ifa->ifa_valid_lft;
1628                 if (preferred != INFINITY_LIFE_TIME) {
1629                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1630
1631                         if (preferred > tval)
1632                                 preferred -= tval;
1633                         else
1634                                 preferred = 0;
1635                         if (valid != INFINITY_LIFE_TIME) {
1636                                 if (valid > tval)
1637                                         valid -= tval;
1638                                 else
1639                                         valid = 0;
1640                         }
1641                 }
1642         } else {
1643                 preferred = INFINITY_LIFE_TIME;
1644                 valid = INFINITY_LIFE_TIME;
1645         }
1646         if ((ifa->ifa_address &&
1647              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1648             (ifa->ifa_local &&
1649              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1650             (ifa->ifa_broadcast &&
1651              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1652             (ifa->ifa_label[0] &&
1653              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1654             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1655             (ifa->ifa_rt_priority &&
1656              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1657             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1658                           preferred, valid))
1659                 goto nla_put_failure;
1660
1661         nlmsg_end(skb, nlh);
1662         return 0;
1663
1664 nla_put_failure:
1665         nlmsg_cancel(skb, nlh);
1666         return -EMSGSIZE;
1667 }
1668
1669 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1670                                       struct inet_fill_args *fillargs,
1671                                       struct net **tgt_net, struct sock *sk,
1672                                       struct netlink_callback *cb)
1673 {
1674         struct netlink_ext_ack *extack = cb->extack;
1675         struct nlattr *tb[IFA_MAX+1];
1676         struct ifaddrmsg *ifm;
1677         int err, i;
1678
1679         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1680                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1681                 return -EINVAL;
1682         }
1683
1684         ifm = nlmsg_data(nlh);
1685         if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1686                 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1687                 return -EINVAL;
1688         }
1689
1690         fillargs->ifindex = ifm->ifa_index;
1691         if (fillargs->ifindex) {
1692                 cb->answer_flags |= NLM_F_DUMP_FILTERED;
1693                 fillargs->flags |= NLM_F_DUMP_FILTERED;
1694         }
1695
1696         err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1697                                             ifa_ipv4_policy, extack);
1698         if (err < 0)
1699                 return err;
1700
1701         for (i = 0; i <= IFA_MAX; ++i) {
1702                 if (!tb[i])
1703                         continue;
1704
1705                 if (i == IFA_TARGET_NETNSID) {
1706                         struct net *net;
1707
1708                         fillargs->netnsid = nla_get_s32(tb[i]);
1709
1710                         net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1711                         if (IS_ERR(net)) {
1712                                 fillargs->netnsid = -1;
1713                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1714                                 return PTR_ERR(net);
1715                         }
1716                         *tgt_net = net;
1717                 } else {
1718                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1719                         return -EINVAL;
1720                 }
1721         }
1722
1723         return 0;
1724 }
1725
1726 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1727                             struct netlink_callback *cb, int s_ip_idx,
1728                             struct inet_fill_args *fillargs)
1729 {
1730         struct in_ifaddr *ifa;
1731         int ip_idx = 0;
1732         int err;
1733
1734         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next, ip_idx++) {
1735                 if (ip_idx < s_ip_idx)
1736                         continue;
1737
1738                 err = inet_fill_ifaddr(skb, ifa, fillargs);
1739                 if (err < 0)
1740                         goto done;
1741
1742                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1743         }
1744         err = 0;
1745
1746 done:
1747         cb->args[2] = ip_idx;
1748
1749         return err;
1750 }
1751
1752 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1753 {
1754         const struct nlmsghdr *nlh = cb->nlh;
1755         struct inet_fill_args fillargs = {
1756                 .portid = NETLINK_CB(cb->skb).portid,
1757                 .seq = nlh->nlmsg_seq,
1758                 .event = RTM_NEWADDR,
1759                 .flags = NLM_F_MULTI,
1760                 .netnsid = -1,
1761         };
1762         struct net *net = sock_net(skb->sk);
1763         struct net *tgt_net = net;
1764         int h, s_h;
1765         int idx, s_idx;
1766         int s_ip_idx;
1767         struct net_device *dev;
1768         struct in_device *in_dev;
1769         struct hlist_head *head;
1770         int err = 0;
1771
1772         s_h = cb->args[0];
1773         s_idx = idx = cb->args[1];
1774         s_ip_idx = cb->args[2];
1775
1776         if (cb->strict_check) {
1777                 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1778                                                  skb->sk, cb);
1779                 if (err < 0)
1780                         goto put_tgt_net;
1781
1782                 err = 0;
1783                 if (fillargs.ifindex) {
1784                         dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1785                         if (!dev) {
1786                                 err = -ENODEV;
1787                                 goto put_tgt_net;
1788                         }
1789
1790                         in_dev = __in_dev_get_rtnl(dev);
1791                         if (in_dev) {
1792                                 err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1793                                                        &fillargs);
1794                         }
1795                         goto put_tgt_net;
1796                 }
1797         }
1798
1799         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1800                 idx = 0;
1801                 head = &tgt_net->dev_index_head[h];
1802                 rcu_read_lock();
1803                 cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1804                           tgt_net->dev_base_seq;
1805                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1806                         if (idx < s_idx)
1807                                 goto cont;
1808                         if (h > s_h || idx > s_idx)
1809                                 s_ip_idx = 0;
1810                         in_dev = __in_dev_get_rcu(dev);
1811                         if (!in_dev)
1812                                 goto cont;
1813
1814                         err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1815                                                &fillargs);
1816                         if (err < 0) {
1817                                 rcu_read_unlock();
1818                                 goto done;
1819                         }
1820 cont:
1821                         idx++;
1822                 }
1823                 rcu_read_unlock();
1824         }
1825
1826 done:
1827         cb->args[0] = h;
1828         cb->args[1] = idx;
1829 put_tgt_net:
1830         if (fillargs.netnsid >= 0)
1831                 put_net(tgt_net);
1832
1833         return skb->len ? : err;
1834 }
1835
1836 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1837                       u32 portid)
1838 {
1839         struct inet_fill_args fillargs = {
1840                 .portid = portid,
1841                 .seq = nlh ? nlh->nlmsg_seq : 0,
1842                 .event = event,
1843                 .flags = 0,
1844                 .netnsid = -1,
1845         };
1846         struct sk_buff *skb;
1847         int err = -ENOBUFS;
1848         struct net *net;
1849
1850         net = dev_net(ifa->ifa_dev->dev);
1851         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1852         if (!skb)
1853                 goto errout;
1854
1855         err = inet_fill_ifaddr(skb, ifa, &fillargs);
1856         if (err < 0) {
1857                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1858                 WARN_ON(err == -EMSGSIZE);
1859                 kfree_skb(skb);
1860                 goto errout;
1861         }
1862         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1863         return;
1864 errout:
1865         if (err < 0)
1866                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1867 }
1868
1869 static size_t inet_get_link_af_size(const struct net_device *dev,
1870                                     u32 ext_filter_mask)
1871 {
1872         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1873
1874         if (!in_dev)
1875                 return 0;
1876
1877         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1878 }
1879
1880 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1881                              u32 ext_filter_mask)
1882 {
1883         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1884         struct nlattr *nla;
1885         int i;
1886
1887         if (!in_dev)
1888                 return -ENODATA;
1889
1890         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1891         if (!nla)
1892                 return -EMSGSIZE;
1893
1894         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1895                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1896
1897         return 0;
1898 }
1899
1900 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1901         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1902 };
1903
1904 static int inet_validate_link_af(const struct net_device *dev,
1905                                  const struct nlattr *nla)
1906 {
1907         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1908         int err, rem;
1909
1910         if (dev && !__in_dev_get_rcu(dev))
1911                 return -EAFNOSUPPORT;
1912
1913         err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1914                                           inet_af_policy, NULL);
1915         if (err < 0)
1916                 return err;
1917
1918         if (tb[IFLA_INET_CONF]) {
1919                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1920                         int cfgid = nla_type(a);
1921
1922                         if (nla_len(a) < 4)
1923                                 return -EINVAL;
1924
1925                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1926                                 return -EINVAL;
1927                 }
1928         }
1929
1930         return 0;
1931 }
1932
1933 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1934 {
1935         struct in_device *in_dev = __in_dev_get_rcu(dev);
1936         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1937         int rem;
1938
1939         if (!in_dev)
1940                 return -EAFNOSUPPORT;
1941
1942         if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1943                 BUG();
1944
1945         if (tb[IFLA_INET_CONF]) {
1946                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1947                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1948         }
1949
1950         return 0;
1951 }
1952
1953 static int inet_netconf_msgsize_devconf(int type)
1954 {
1955         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1956                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1957         bool all = false;
1958
1959         if (type == NETCONFA_ALL)
1960                 all = true;
1961
1962         if (all || type == NETCONFA_FORWARDING)
1963                 size += nla_total_size(4);
1964         if (all || type == NETCONFA_RP_FILTER)
1965                 size += nla_total_size(4);
1966         if (all || type == NETCONFA_MC_FORWARDING)
1967                 size += nla_total_size(4);
1968         if (all || type == NETCONFA_BC_FORWARDING)
1969                 size += nla_total_size(4);
1970         if (all || type == NETCONFA_PROXY_NEIGH)
1971                 size += nla_total_size(4);
1972         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1973                 size += nla_total_size(4);
1974
1975         return size;
1976 }
1977
1978 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1979                                      struct ipv4_devconf *devconf, u32 portid,
1980                                      u32 seq, int event, unsigned int flags,
1981                                      int type)
1982 {
1983         struct nlmsghdr  *nlh;
1984         struct netconfmsg *ncm;
1985         bool all = false;
1986
1987         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1988                         flags);
1989         if (!nlh)
1990                 return -EMSGSIZE;
1991
1992         if (type == NETCONFA_ALL)
1993                 all = true;
1994
1995         ncm = nlmsg_data(nlh);
1996         ncm->ncm_family = AF_INET;
1997
1998         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1999                 goto nla_put_failure;
2000
2001         if (!devconf)
2002                 goto out;
2003
2004         if ((all || type == NETCONFA_FORWARDING) &&
2005             nla_put_s32(skb, NETCONFA_FORWARDING,
2006                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2007                 goto nla_put_failure;
2008         if ((all || type == NETCONFA_RP_FILTER) &&
2009             nla_put_s32(skb, NETCONFA_RP_FILTER,
2010                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2011                 goto nla_put_failure;
2012         if ((all || type == NETCONFA_MC_FORWARDING) &&
2013             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2014                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2015                 goto nla_put_failure;
2016         if ((all || type == NETCONFA_BC_FORWARDING) &&
2017             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2018                         IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2019                 goto nla_put_failure;
2020         if ((all || type == NETCONFA_PROXY_NEIGH) &&
2021             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2022                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2023                 goto nla_put_failure;
2024         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2025             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2026                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2027                 goto nla_put_failure;
2028
2029 out:
2030         nlmsg_end(skb, nlh);
2031         return 0;
2032
2033 nla_put_failure:
2034         nlmsg_cancel(skb, nlh);
2035         return -EMSGSIZE;
2036 }
2037
2038 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2039                                  int ifindex, struct ipv4_devconf *devconf)
2040 {
2041         struct sk_buff *skb;
2042         int err = -ENOBUFS;
2043
2044         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2045         if (!skb)
2046                 goto errout;
2047
2048         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2049                                         event, 0, type);
2050         if (err < 0) {
2051                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2052                 WARN_ON(err == -EMSGSIZE);
2053                 kfree_skb(skb);
2054                 goto errout;
2055         }
2056         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2057         return;
2058 errout:
2059         if (err < 0)
2060                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2061 }
2062
2063 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2064         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
2065         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
2066         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
2067         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
2068         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
2069 };
2070
2071 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2072                                       const struct nlmsghdr *nlh,
2073                                       struct nlattr **tb,
2074                                       struct netlink_ext_ack *extack)
2075 {
2076         int i, err;
2077
2078         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2079                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2080                 return -EINVAL;
2081         }
2082
2083         if (!netlink_strict_get_check(skb))
2084                 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2085                                               tb, NETCONFA_MAX,
2086                                               devconf_ipv4_policy, extack);
2087
2088         err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2089                                             tb, NETCONFA_MAX,
2090                                             devconf_ipv4_policy, extack);
2091         if (err)
2092                 return err;
2093
2094         for (i = 0; i <= NETCONFA_MAX; i++) {
2095                 if (!tb[i])
2096                         continue;
2097
2098                 switch (i) {
2099                 case NETCONFA_IFINDEX:
2100                         break;
2101                 default:
2102                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2103                         return -EINVAL;
2104                 }
2105         }
2106
2107         return 0;
2108 }
2109
2110 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2111                                     struct nlmsghdr *nlh,
2112                                     struct netlink_ext_ack *extack)
2113 {
2114         struct net *net = sock_net(in_skb->sk);
2115         struct nlattr *tb[NETCONFA_MAX+1];
2116         struct sk_buff *skb;
2117         struct ipv4_devconf *devconf;
2118         struct in_device *in_dev;
2119         struct net_device *dev;
2120         int ifindex;
2121         int err;
2122
2123         err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2124         if (err)
2125                 goto errout;
2126
2127         err = -EINVAL;
2128         if (!tb[NETCONFA_IFINDEX])
2129                 goto errout;
2130
2131         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2132         switch (ifindex) {
2133         case NETCONFA_IFINDEX_ALL:
2134                 devconf = net->ipv4.devconf_all;
2135                 break;
2136         case NETCONFA_IFINDEX_DEFAULT:
2137                 devconf = net->ipv4.devconf_dflt;
2138                 break;
2139         default:
2140                 dev = __dev_get_by_index(net, ifindex);
2141                 if (!dev)
2142                         goto errout;
2143                 in_dev = __in_dev_get_rtnl(dev);
2144                 if (!in_dev)
2145                         goto errout;
2146                 devconf = &in_dev->cnf;
2147                 break;
2148         }
2149
2150         err = -ENOBUFS;
2151         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2152         if (!skb)
2153                 goto errout;
2154
2155         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2156                                         NETLINK_CB(in_skb).portid,
2157                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2158                                         NETCONFA_ALL);
2159         if (err < 0) {
2160                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2161                 WARN_ON(err == -EMSGSIZE);
2162                 kfree_skb(skb);
2163                 goto errout;
2164         }
2165         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2166 errout:
2167         return err;
2168 }
2169
2170 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2171                                      struct netlink_callback *cb)
2172 {
2173         const struct nlmsghdr *nlh = cb->nlh;
2174         struct net *net = sock_net(skb->sk);
2175         int h, s_h;
2176         int idx, s_idx;
2177         struct net_device *dev;
2178         struct in_device *in_dev;
2179         struct hlist_head *head;
2180
2181         if (cb->strict_check) {
2182                 struct netlink_ext_ack *extack = cb->extack;
2183                 struct netconfmsg *ncm;
2184
2185                 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2186                         NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2187                         return -EINVAL;
2188                 }
2189
2190                 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2191                         NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2192                         return -EINVAL;
2193                 }
2194         }
2195
2196         s_h = cb->args[0];
2197         s_idx = idx = cb->args[1];
2198
2199         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2200                 idx = 0;
2201                 head = &net->dev_index_head[h];
2202                 rcu_read_lock();
2203                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2204                           net->dev_base_seq;
2205                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2206                         if (idx < s_idx)
2207                                 goto cont;
2208                         in_dev = __in_dev_get_rcu(dev);
2209                         if (!in_dev)
2210                                 goto cont;
2211
2212                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2213                                                       &in_dev->cnf,
2214                                                       NETLINK_CB(cb->skb).portid,
2215                                                       nlh->nlmsg_seq,
2216                                                       RTM_NEWNETCONF,
2217                                                       NLM_F_MULTI,
2218                                                       NETCONFA_ALL) < 0) {
2219                                 rcu_read_unlock();
2220                                 goto done;
2221                         }
2222                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2223 cont:
2224                         idx++;
2225                 }
2226                 rcu_read_unlock();
2227         }
2228         if (h == NETDEV_HASHENTRIES) {
2229                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2230                                               net->ipv4.devconf_all,
2231                                               NETLINK_CB(cb->skb).portid,
2232                                               nlh->nlmsg_seq,
2233                                               RTM_NEWNETCONF, NLM_F_MULTI,
2234                                               NETCONFA_ALL) < 0)
2235                         goto done;
2236                 else
2237                         h++;
2238         }
2239         if (h == NETDEV_HASHENTRIES + 1) {
2240                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2241                                               net->ipv4.devconf_dflt,
2242                                               NETLINK_CB(cb->skb).portid,
2243                                               nlh->nlmsg_seq,
2244                                               RTM_NEWNETCONF, NLM_F_MULTI,
2245                                               NETCONFA_ALL) < 0)
2246                         goto done;
2247                 else
2248                         h++;
2249         }
2250 done:
2251         cb->args[0] = h;
2252         cb->args[1] = idx;
2253
2254         return skb->len;
2255 }
2256
2257 #ifdef CONFIG_SYSCTL
2258
2259 static void devinet_copy_dflt_conf(struct net *net, int i)
2260 {
2261         struct net_device *dev;
2262
2263         rcu_read_lock();
2264         for_each_netdev_rcu(net, dev) {
2265                 struct in_device *in_dev;
2266
2267                 in_dev = __in_dev_get_rcu(dev);
2268                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2269                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2270         }
2271         rcu_read_unlock();
2272 }
2273
2274 /* called with RTNL locked */
2275 static void inet_forward_change(struct net *net)
2276 {
2277         struct net_device *dev;
2278         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2279
2280         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2281         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2282         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2283                                     NETCONFA_FORWARDING,
2284                                     NETCONFA_IFINDEX_ALL,
2285                                     net->ipv4.devconf_all);
2286         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2287                                     NETCONFA_FORWARDING,
2288                                     NETCONFA_IFINDEX_DEFAULT,
2289                                     net->ipv4.devconf_dflt);
2290
2291         for_each_netdev(net, dev) {
2292                 struct in_device *in_dev;
2293
2294                 if (on)
2295                         dev_disable_lro(dev);
2296
2297                 in_dev = __in_dev_get_rtnl(dev);
2298                 if (in_dev) {
2299                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2300                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2301                                                     NETCONFA_FORWARDING,
2302                                                     dev->ifindex, &in_dev->cnf);
2303                 }
2304         }
2305 }
2306
2307 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2308 {
2309         if (cnf == net->ipv4.devconf_dflt)
2310                 return NETCONFA_IFINDEX_DEFAULT;
2311         else if (cnf == net->ipv4.devconf_all)
2312                 return NETCONFA_IFINDEX_ALL;
2313         else {
2314                 struct in_device *idev
2315                         = container_of(cnf, struct in_device, cnf);
2316                 return idev->dev->ifindex;
2317         }
2318 }
2319
2320 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2321                              void __user *buffer,
2322                              size_t *lenp, loff_t *ppos)
2323 {
2324         int old_value = *(int *)ctl->data;
2325         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2326         int new_value = *(int *)ctl->data;
2327
2328         if (write) {
2329                 struct ipv4_devconf *cnf = ctl->extra1;
2330                 struct net *net = ctl->extra2;
2331                 int i = (int *)ctl->data - cnf->data;
2332                 int ifindex;
2333
2334                 set_bit(i, cnf->state);
2335
2336                 if (cnf == net->ipv4.devconf_dflt)
2337                         devinet_copy_dflt_conf(net, i);
2338                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2339                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2340                         if ((new_value == 0) && (old_value != 0))
2341                                 rt_cache_flush(net);
2342
2343                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2344                     new_value != old_value)
2345                         rt_cache_flush(net);
2346
2347                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2348                     new_value != old_value) {
2349                         ifindex = devinet_conf_ifindex(net, cnf);
2350                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2351                                                     NETCONFA_RP_FILTER,
2352                                                     ifindex, cnf);
2353                 }
2354                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2355                     new_value != old_value) {
2356                         ifindex = devinet_conf_ifindex(net, cnf);
2357                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2358                                                     NETCONFA_PROXY_NEIGH,
2359                                                     ifindex, cnf);
2360                 }
2361                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2362                     new_value != old_value) {
2363                         ifindex = devinet_conf_ifindex(net, cnf);
2364                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2365                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2366                                                     ifindex, cnf);
2367                 }
2368         }
2369
2370         return ret;
2371 }
2372
2373 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2374                                   void __user *buffer,
2375                                   size_t *lenp, loff_t *ppos)
2376 {
2377         int *valp = ctl->data;
2378         int val = *valp;
2379         loff_t pos = *ppos;
2380         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2381
2382         if (write && *valp != val) {
2383                 struct net *net = ctl->extra2;
2384
2385                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2386                         if (!rtnl_trylock()) {
2387                                 /* Restore the original values before restarting */
2388                                 *valp = val;
2389                                 *ppos = pos;
2390                                 return restart_syscall();
2391                         }
2392                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2393                                 inet_forward_change(net);
2394                         } else {
2395                                 struct ipv4_devconf *cnf = ctl->extra1;
2396                                 struct in_device *idev =
2397                                         container_of(cnf, struct in_device, cnf);
2398                                 if (*valp)
2399                                         dev_disable_lro(idev->dev);
2400                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2401                                                             NETCONFA_FORWARDING,
2402                                                             idev->dev->ifindex,
2403                                                             cnf);
2404                         }
2405                         rtnl_unlock();
2406                         rt_cache_flush(net);
2407                 } else
2408                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2409                                                     NETCONFA_FORWARDING,
2410                                                     NETCONFA_IFINDEX_DEFAULT,
2411                                                     net->ipv4.devconf_dflt);
2412         }
2413
2414         return ret;
2415 }
2416
2417 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2418                                 void __user *buffer,
2419                                 size_t *lenp, loff_t *ppos)
2420 {
2421         int *valp = ctl->data;
2422         int val = *valp;
2423         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2424         struct net *net = ctl->extra2;
2425
2426         if (write && *valp != val)
2427                 rt_cache_flush(net);
2428
2429         return ret;
2430 }
2431
2432 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2433         { \
2434                 .procname       = name, \
2435                 .data           = ipv4_devconf.data + \
2436                                   IPV4_DEVCONF_ ## attr - 1, \
2437                 .maxlen         = sizeof(int), \
2438                 .mode           = mval, \
2439                 .proc_handler   = proc, \
2440                 .extra1         = &ipv4_devconf, \
2441         }
2442
2443 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2444         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2445
2446 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2447         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2448
2449 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2450         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2451
2452 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2453         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2454
2455 static struct devinet_sysctl_table {
2456         struct ctl_table_header *sysctl_header;
2457         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2458 } devinet_sysctl = {
2459         .devinet_vars = {
2460                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2461                                              devinet_sysctl_forward),
2462                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2463                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2464
2465                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2466                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2467                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2468                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2469                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2470                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2471                                         "accept_source_route"),
2472                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2473                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2474                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2475                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2476                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2477                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2478                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2479                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2480                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2481                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2482                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2483                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2484                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2485                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2486                                         "force_igmp_version"),
2487                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2488                                         "igmpv2_unsolicited_report_interval"),
2489                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2490                                         "igmpv3_unsolicited_report_interval"),
2491                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2492                                         "ignore_routes_with_linkdown"),
2493                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2494                                         "drop_gratuitous_arp"),
2495
2496                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2497                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2498                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2499                                               "promote_secondaries"),
2500                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2501                                               "route_localnet"),
2502                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2503                                               "drop_unicast_in_l2_multicast"),
2504         },
2505 };
2506
2507 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2508                                      int ifindex, struct ipv4_devconf *p)
2509 {
2510         int i;
2511         struct devinet_sysctl_table *t;
2512         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2513
2514         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2515         if (!t)
2516                 goto out;
2517
2518         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2519                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2520                 t->devinet_vars[i].extra1 = p;
2521                 t->devinet_vars[i].extra2 = net;
2522         }
2523
2524         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2525
2526         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2527         if (!t->sysctl_header)
2528                 goto free;
2529
2530         p->sysctl = t;
2531
2532         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2533                                     ifindex, p);
2534         return 0;
2535
2536 free:
2537         kfree(t);
2538 out:
2539         return -ENOBUFS;
2540 }
2541
2542 static void __devinet_sysctl_unregister(struct net *net,
2543                                         struct ipv4_devconf *cnf, int ifindex)
2544 {
2545         struct devinet_sysctl_table *t = cnf->sysctl;
2546
2547         if (t) {
2548                 cnf->sysctl = NULL;
2549                 unregister_net_sysctl_table(t->sysctl_header);
2550                 kfree(t);
2551         }
2552
2553         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2554 }
2555
2556 static int devinet_sysctl_register(struct in_device *idev)
2557 {
2558         int err;
2559
2560         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2561                 return -EINVAL;
2562
2563         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2564         if (err)
2565                 return err;
2566         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2567                                         idev->dev->ifindex, &idev->cnf);
2568         if (err)
2569                 neigh_sysctl_unregister(idev->arp_parms);
2570         return err;
2571 }
2572
2573 static void devinet_sysctl_unregister(struct in_device *idev)
2574 {
2575         struct net *net = dev_net(idev->dev);
2576
2577         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2578         neigh_sysctl_unregister(idev->arp_parms);
2579 }
2580
2581 static struct ctl_table ctl_forward_entry[] = {
2582         {
2583                 .procname       = "ip_forward",
2584                 .data           = &ipv4_devconf.data[
2585                                         IPV4_DEVCONF_FORWARDING - 1],
2586                 .maxlen         = sizeof(int),
2587                 .mode           = 0644,
2588                 .proc_handler   = devinet_sysctl_forward,
2589                 .extra1         = &ipv4_devconf,
2590                 .extra2         = &init_net,
2591         },
2592         { },
2593 };
2594 #endif
2595
2596 static __net_init int devinet_init_net(struct net *net)
2597 {
2598         int err;
2599         struct ipv4_devconf *all, *dflt;
2600 #ifdef CONFIG_SYSCTL
2601         struct ctl_table *tbl;
2602         struct ctl_table_header *forw_hdr;
2603 #endif
2604
2605         err = -ENOMEM;
2606         all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2607         if (!all)
2608                 goto err_alloc_all;
2609
2610         dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2611         if (!dflt)
2612                 goto err_alloc_dflt;
2613
2614 #ifdef CONFIG_SYSCTL
2615         tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2616         if (!tbl)
2617                 goto err_alloc_ctl;
2618
2619         tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2620         tbl[0].extra1 = all;
2621         tbl[0].extra2 = net;
2622 #endif
2623
2624         if ((!IS_ENABLED(CONFIG_SYSCTL) ||
2625              sysctl_devconf_inherit_init_net != 2) &&
2626             !net_eq(net, &init_net)) {
2627                 memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf));
2628                 memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt));
2629         }
2630
2631 #ifdef CONFIG_SYSCTL
2632         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2633         if (err < 0)
2634                 goto err_reg_all;
2635
2636         err = __devinet_sysctl_register(net, "default",
2637                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2638         if (err < 0)
2639                 goto err_reg_dflt;
2640
2641         err = -ENOMEM;
2642         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2643         if (!forw_hdr)
2644                 goto err_reg_ctl;
2645         net->ipv4.forw_hdr = forw_hdr;
2646 #endif
2647
2648         net->ipv4.devconf_all = all;
2649         net->ipv4.devconf_dflt = dflt;
2650         return 0;
2651
2652 #ifdef CONFIG_SYSCTL
2653 err_reg_ctl:
2654         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2655 err_reg_dflt:
2656         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2657 err_reg_all:
2658         kfree(tbl);
2659 err_alloc_ctl:
2660 #endif
2661         kfree(dflt);
2662 err_alloc_dflt:
2663         kfree(all);
2664 err_alloc_all:
2665         return err;
2666 }
2667
2668 static __net_exit void devinet_exit_net(struct net *net)
2669 {
2670 #ifdef CONFIG_SYSCTL
2671         struct ctl_table *tbl;
2672
2673         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2674         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2675         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2676                                     NETCONFA_IFINDEX_DEFAULT);
2677         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2678                                     NETCONFA_IFINDEX_ALL);
2679         kfree(tbl);
2680 #endif
2681         kfree(net->ipv4.devconf_dflt);
2682         kfree(net->ipv4.devconf_all);
2683 }
2684
2685 static __net_initdata struct pernet_operations devinet_ops = {
2686         .init = devinet_init_net,
2687         .exit = devinet_exit_net,
2688 };
2689
2690 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2691         .family           = AF_INET,
2692         .fill_link_af     = inet_fill_link_af,
2693         .get_link_af_size = inet_get_link_af_size,
2694         .validate_link_af = inet_validate_link_af,
2695         .set_link_af      = inet_set_link_af,
2696 };
2697
2698 void __init devinet_init(void)
2699 {
2700         int i;
2701
2702         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2703                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2704
2705         register_pernet_subsys(&devinet_ops);
2706
2707         register_gifconf(PF_INET, inet_gifconf);
2708         register_netdevice_notifier(&ip_netdev_notifier);
2709
2710         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2711
2712         rtnl_af_register(&inet_af_ops);
2713
2714         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2715         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2716         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2717         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2718                       inet_netconf_dump_devconf, 0);
2719 }