c47d3828d4f6563346619c9b1ac7aec437bd086d
[linux-block.git] / net / ipv4 / devinet.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      NET3    IP device support routines.
4  *
5  *      Derived from the IP parts of dev.c 1.0.19
6  *              Authors:        Ross Biro
7  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *      Additional Authors:
11  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *      Changes:
15  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
16  *                                      lists.
17  *              Cyrus Durgin:           updated for kmod
18  *              Matthias Andree:        in devinet_ioctl, compare label and
19  *                                      address (4.4BSD alias style support),
20  *                                      fall back to comparing just the label
21  *                                      if no match found.
22  */
23
24
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include "igmp_internal.h"
50 #include <linux/slab.h>
51 #include <linux/hash.h>
52 #ifdef CONFIG_SYSCTL
53 #include <linux/sysctl.h>
54 #endif
55 #include <linux/kmod.h>
56 #include <linux/netconf.h>
57
58 #include <net/arp.h>
59 #include <net/ip.h>
60 #include <net/route.h>
61 #include <net/ip_fib.h>
62 #include <net/rtnetlink.h>
63 #include <net/net_namespace.h>
64 #include <net/addrconf.h>
65
66 #define IPV6ONLY_FLAGS  \
67                 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
68                  IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
69                  IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
70
71 static struct ipv4_devconf ipv4_devconf = {
72         .data = {
73                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
76                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
77                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
78                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
79                 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
80         },
81 };
82
83 static struct ipv4_devconf ipv4_devconf_dflt = {
84         .data = {
85                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
87                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
88                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
89                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
90                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
91                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
92                 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
93         },
94 };
95
96 #define IPV4_DEVCONF_DFLT(net, attr) \
97         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
98
99 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
100         [IFA_LOCAL]             = { .type = NLA_U32 },
101         [IFA_ADDRESS]           = { .type = NLA_U32 },
102         [IFA_BROADCAST]         = { .type = NLA_U32 },
103         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
104         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
105         [IFA_FLAGS]             = { .type = NLA_U32 },
106         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
107         [IFA_TARGET_NETNSID]    = { .type = NLA_S32 },
108         [IFA_PROTO]             = { .type = NLA_U8 },
109 };
110
111 #define IN4_ADDR_HSIZE_SHIFT    8
112 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
113
114 static u32 inet_addr_hash(const struct net *net, __be32 addr)
115 {
116         u32 val = __ipv4_addr_hash(addr, net_hash_mix(net));
117
118         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
119 }
120
121 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
122 {
123         u32 hash = inet_addr_hash(net, ifa->ifa_local);
124
125         ASSERT_RTNL();
126         hlist_add_head_rcu(&ifa->addr_lst, &net->ipv4.inet_addr_lst[hash]);
127 }
128
129 static void inet_hash_remove(struct in_ifaddr *ifa)
130 {
131         ASSERT_RTNL();
132         hlist_del_init_rcu(&ifa->addr_lst);
133 }
134
135 /**
136  * __ip_dev_find - find the first device with a given source address.
137  * @net: the net namespace
138  * @addr: the source address
139  * @devref: if true, take a reference on the found device
140  *
141  * If a caller uses devref=false, it should be protected by RCU, or RTNL
142  */
143 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
144 {
145         struct net_device *result = NULL;
146         struct in_ifaddr *ifa;
147
148         rcu_read_lock();
149         ifa = inet_lookup_ifaddr_rcu(net, addr);
150         if (!ifa) {
151                 struct flowi4 fl4 = { .daddr = addr };
152                 struct fib_result res = { 0 };
153                 struct fib_table *local;
154
155                 /* Fallback to FIB local table so that communication
156                  * over loopback subnets work.
157                  */
158                 local = fib_get_table(net, RT_TABLE_LOCAL);
159                 if (local &&
160                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
161                     res.type == RTN_LOCAL)
162                         result = FIB_RES_DEV(res);
163         } else {
164                 result = ifa->ifa_dev->dev;
165         }
166         if (result && devref)
167                 dev_hold(result);
168         rcu_read_unlock();
169         return result;
170 }
171 EXPORT_SYMBOL(__ip_dev_find);
172
173 /* called under RCU lock */
174 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
175 {
176         u32 hash = inet_addr_hash(net, addr);
177         struct in_ifaddr *ifa;
178
179         hlist_for_each_entry_rcu(ifa, &net->ipv4.inet_addr_lst[hash], addr_lst)
180                 if (ifa->ifa_local == addr)
181                         return ifa;
182
183         return NULL;
184 }
185
186 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
187
188 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
189 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
190 static void inet_del_ifa(struct in_device *in_dev,
191                          struct in_ifaddr __rcu **ifap,
192                          int destroy);
193 #ifdef CONFIG_SYSCTL
194 static int devinet_sysctl_register(struct in_device *idev);
195 static void devinet_sysctl_unregister(struct in_device *idev);
196 #else
197 static int devinet_sysctl_register(struct in_device *idev)
198 {
199         return 0;
200 }
201 static void devinet_sysctl_unregister(struct in_device *idev)
202 {
203 }
204 #endif
205
206 /* Locks all the inet devices. */
207
208 static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev)
209 {
210         struct in_ifaddr *ifa;
211
212         ifa = kzalloc(sizeof(*ifa), GFP_KERNEL_ACCOUNT);
213         if (!ifa)
214                 return NULL;
215
216         in_dev_hold(in_dev);
217         ifa->ifa_dev = in_dev;
218
219         INIT_HLIST_NODE(&ifa->addr_lst);
220
221         return ifa;
222 }
223
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227
228         in_dev_put(ifa->ifa_dev);
229         kfree(ifa);
230 }
231
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234         /* Our reference to ifa->ifa_dev must be freed ASAP
235          * to release the reference to the netdev the same way.
236          * in_dev_put() -> in_dev_finish_destroy() -> netdev_put()
237          */
238         call_rcu_hurry(&ifa->rcu_head, inet_rcu_free_ifa);
239 }
240
241 static void in_dev_free_rcu(struct rcu_head *head)
242 {
243         struct in_device *idev = container_of(head, struct in_device, rcu_head);
244
245         kfree(rcu_dereference_protected(idev->mc_hash, 1));
246         kfree(idev);
247 }
248
249 void in_dev_finish_destroy(struct in_device *idev)
250 {
251         struct net_device *dev = idev->dev;
252
253         WARN_ON(idev->ifa_list);
254         WARN_ON(idev->mc_list);
255 #ifdef NET_REFCNT_DEBUG
256         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
257 #endif
258         netdev_put(dev, &idev->dev_tracker);
259         if (!idev->dead)
260                 pr_err("Freeing alive in_device %p\n", idev);
261         else
262                 call_rcu(&idev->rcu_head, in_dev_free_rcu);
263 }
264 EXPORT_SYMBOL(in_dev_finish_destroy);
265
266 static struct in_device *inetdev_init(struct net_device *dev)
267 {
268         struct in_device *in_dev;
269         int err = -ENOMEM;
270
271         ASSERT_RTNL();
272
273         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
274         if (!in_dev)
275                 goto out;
276         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
277                         sizeof(in_dev->cnf));
278         in_dev->cnf.sysctl = NULL;
279         in_dev->dev = dev;
280         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
281         if (!in_dev->arp_parms)
282                 goto out_kfree;
283         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
284                 netif_disable_lro(dev);
285         /* Reference in_dev->dev */
286         netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
287         /* Account for reference dev->ip_ptr (below) */
288         refcount_set(&in_dev->refcnt, 1);
289
290         if (dev != blackhole_netdev) {
291                 err = devinet_sysctl_register(in_dev);
292                 if (err) {
293                         in_dev->dead = 1;
294                         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
295                         in_dev_put(in_dev);
296                         in_dev = NULL;
297                         goto out;
298                 }
299                 ip_mc_init_dev(in_dev);
300                 if (dev->flags & IFF_UP)
301                         ip_mc_up(in_dev);
302         }
303
304         /* we can receive as soon as ip_ptr is set -- do this last */
305         rcu_assign_pointer(dev->ip_ptr, in_dev);
306 out:
307         return in_dev ?: ERR_PTR(err);
308 out_kfree:
309         kfree(in_dev);
310         in_dev = NULL;
311         goto out;
312 }
313
314 static void inetdev_destroy(struct in_device *in_dev)
315 {
316         struct net_device *dev;
317         struct in_ifaddr *ifa;
318
319         ASSERT_RTNL();
320
321         dev = in_dev->dev;
322
323         in_dev->dead = 1;
324
325         ip_mc_destroy_dev(in_dev);
326
327         while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
328                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
329                 inet_free_ifa(ifa);
330         }
331
332         RCU_INIT_POINTER(dev->ip_ptr, NULL);
333
334         devinet_sysctl_unregister(in_dev);
335         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
336         arp_ifdown(dev);
337
338         in_dev_put(in_dev);
339 }
340
341 static int __init inet_blackhole_dev_init(void)
342 {
343         int err = 0;
344
345         rtnl_lock();
346         if (!inetdev_init(blackhole_netdev))
347                 err = -ENOMEM;
348         rtnl_unlock();
349
350         return err;
351 }
352 late_initcall(inet_blackhole_dev_init);
353
354 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
355 {
356         const struct in_ifaddr *ifa;
357
358         rcu_read_lock();
359         in_dev_for_each_ifa_rcu(ifa, in_dev) {
360                 if (inet_ifa_match(a, ifa)) {
361                         if (!b || inet_ifa_match(b, ifa)) {
362                                 rcu_read_unlock();
363                                 return 1;
364                         }
365                 }
366         }
367         rcu_read_unlock();
368         return 0;
369 }
370
371 static void __inet_del_ifa(struct in_device *in_dev,
372                            struct in_ifaddr __rcu **ifap,
373                            int destroy, struct nlmsghdr *nlh, u32 portid)
374 {
375         struct in_ifaddr *promote = NULL;
376         struct in_ifaddr *ifa, *ifa1;
377         struct in_ifaddr __rcu **last_prim;
378         struct in_ifaddr *prev_prom = NULL;
379         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
380
381         ASSERT_RTNL();
382
383         ifa1 = rtnl_dereference(*ifap);
384         last_prim = ifap;
385         if (in_dev->dead)
386                 goto no_promotions;
387
388         /* 1. Deleting primary ifaddr forces deletion all secondaries
389          * unless alias promotion is set
390          **/
391
392         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
393                 struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
394
395                 while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
396                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
397                             ifa1->ifa_scope <= ifa->ifa_scope)
398                                 last_prim = &ifa->ifa_next;
399
400                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
401                             ifa1->ifa_mask != ifa->ifa_mask ||
402                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
403                                 ifap1 = &ifa->ifa_next;
404                                 prev_prom = ifa;
405                                 continue;
406                         }
407
408                         if (!do_promote) {
409                                 inet_hash_remove(ifa);
410                                 *ifap1 = ifa->ifa_next;
411
412                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
413                                 blocking_notifier_call_chain(&inetaddr_chain,
414                                                 NETDEV_DOWN, ifa);
415                                 inet_free_ifa(ifa);
416                         } else {
417                                 promote = ifa;
418                                 break;
419                         }
420                 }
421         }
422
423         /* On promotion all secondaries from subnet are changing
424          * the primary IP, we must remove all their routes silently
425          * and later to add them back with new prefsrc. Do this
426          * while all addresses are on the device list.
427          */
428         for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
429                 if (ifa1->ifa_mask == ifa->ifa_mask &&
430                     inet_ifa_match(ifa1->ifa_address, ifa))
431                         fib_del_ifaddr(ifa, ifa1);
432         }
433
434 no_promotions:
435         /* 2. Unlink it */
436
437         *ifap = ifa1->ifa_next;
438         inet_hash_remove(ifa1);
439
440         /* 3. Announce address deletion */
441
442         /* Send message first, then call notifier.
443            At first sight, FIB update triggered by notifier
444            will refer to already deleted ifaddr, that could confuse
445            netlink listeners. It is not true: look, gated sees
446            that route deleted and if it still thinks that ifaddr
447            is valid, it will try to restore deleted routes... Grr.
448            So that, this order is correct.
449          */
450         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
451         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
452
453         if (promote) {
454                 struct in_ifaddr *next_sec;
455
456                 next_sec = rtnl_dereference(promote->ifa_next);
457                 if (prev_prom) {
458                         struct in_ifaddr *last_sec;
459
460                         rcu_assign_pointer(prev_prom->ifa_next, next_sec);
461
462                         last_sec = rtnl_dereference(*last_prim);
463                         rcu_assign_pointer(promote->ifa_next, last_sec);
464                         rcu_assign_pointer(*last_prim, promote);
465                 }
466
467                 promote->ifa_flags &= ~IFA_F_SECONDARY;
468                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
469                 blocking_notifier_call_chain(&inetaddr_chain,
470                                 NETDEV_UP, promote);
471                 for (ifa = next_sec; ifa;
472                      ifa = rtnl_dereference(ifa->ifa_next)) {
473                         if (ifa1->ifa_mask != ifa->ifa_mask ||
474                             !inet_ifa_match(ifa1->ifa_address, ifa))
475                                         continue;
476                         fib_add_ifaddr(ifa);
477                 }
478
479         }
480         if (destroy)
481                 inet_free_ifa(ifa1);
482 }
483
484 static void inet_del_ifa(struct in_device *in_dev,
485                          struct in_ifaddr __rcu **ifap,
486                          int destroy)
487 {
488         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
489 }
490
491 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
492                              u32 portid, struct netlink_ext_ack *extack)
493 {
494         struct in_ifaddr __rcu **last_primary, **ifap;
495         struct in_device *in_dev = ifa->ifa_dev;
496         struct net *net = dev_net(in_dev->dev);
497         struct in_validator_info ivi;
498         struct in_ifaddr *ifa1;
499         int ret;
500
501         ASSERT_RTNL();
502
503         ifa->ifa_flags &= ~IFA_F_SECONDARY;
504         last_primary = &in_dev->ifa_list;
505
506         /* Don't set IPv6 only flags to IPv4 addresses */
507         ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
508
509         ifap = &in_dev->ifa_list;
510         ifa1 = rtnl_dereference(*ifap);
511
512         while (ifa1) {
513                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
514                     ifa->ifa_scope <= ifa1->ifa_scope)
515                         last_primary = &ifa1->ifa_next;
516                 if (ifa1->ifa_mask == ifa->ifa_mask &&
517                     inet_ifa_match(ifa1->ifa_address, ifa)) {
518                         if (ifa1->ifa_local == ifa->ifa_local) {
519                                 inet_free_ifa(ifa);
520                                 return -EEXIST;
521                         }
522                         if (ifa1->ifa_scope != ifa->ifa_scope) {
523                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
524                                 inet_free_ifa(ifa);
525                                 return -EINVAL;
526                         }
527                         ifa->ifa_flags |= IFA_F_SECONDARY;
528                 }
529
530                 ifap = &ifa1->ifa_next;
531                 ifa1 = rtnl_dereference(*ifap);
532         }
533
534         /* Allow any devices that wish to register ifaddr validtors to weigh
535          * in now, before changes are committed.  The rntl lock is serializing
536          * access here, so the state should not change between a validator call
537          * and a final notify on commit.  This isn't invoked on promotion under
538          * the assumption that validators are checking the address itself, and
539          * not the flags.
540          */
541         ivi.ivi_addr = ifa->ifa_address;
542         ivi.ivi_dev = ifa->ifa_dev;
543         ivi.extack = extack;
544         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
545                                            NETDEV_UP, &ivi);
546         ret = notifier_to_errno(ret);
547         if (ret) {
548                 inet_free_ifa(ifa);
549                 return ret;
550         }
551
552         if (!(ifa->ifa_flags & IFA_F_SECONDARY))
553                 ifap = last_primary;
554
555         rcu_assign_pointer(ifa->ifa_next, *ifap);
556         rcu_assign_pointer(*ifap, ifa);
557
558         inet_hash_insert(dev_net(in_dev->dev), ifa);
559
560         cancel_delayed_work(&net->ipv4.addr_chk_work);
561         queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, 0);
562
563         /* Send message first, then call notifier.
564            Notifier will trigger FIB update, so that
565            listeners of netlink will know about new ifaddr */
566         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
567         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
568
569         return 0;
570 }
571
572 static int inet_insert_ifa(struct in_ifaddr *ifa)
573 {
574         if (!ifa->ifa_local) {
575                 inet_free_ifa(ifa);
576                 return 0;
577         }
578
579         return __inet_insert_ifa(ifa, NULL, 0, NULL);
580 }
581
582 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
583 {
584         struct in_device *in_dev = __in_dev_get_rtnl_net(dev);
585
586         ipv4_devconf_setall(in_dev);
587         neigh_parms_data_state_setall(in_dev->arp_parms);
588
589         if (ipv4_is_loopback(ifa->ifa_local))
590                 ifa->ifa_scope = RT_SCOPE_HOST;
591         return inet_insert_ifa(ifa);
592 }
593
594 /* Caller must hold RCU or RTNL :
595  * We dont take a reference on found in_device
596  */
597 struct in_device *inetdev_by_index(struct net *net, int ifindex)
598 {
599         struct net_device *dev;
600         struct in_device *in_dev = NULL;
601
602         rcu_read_lock();
603         dev = dev_get_by_index_rcu(net, ifindex);
604         if (dev)
605                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
606         rcu_read_unlock();
607         return in_dev;
608 }
609 EXPORT_SYMBOL(inetdev_by_index);
610
611 /* Called only from RTNL semaphored context. No locks. */
612
613 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
614                                     __be32 mask)
615 {
616         struct in_ifaddr *ifa;
617
618         ASSERT_RTNL();
619
620         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
621                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
622                         return ifa;
623         }
624         return NULL;
625 }
626
627 static int ip_mc_autojoin_config(struct net *net, bool join,
628                                  const struct in_ifaddr *ifa)
629 {
630 #if defined(CONFIG_IP_MULTICAST)
631         struct ip_mreqn mreq = {
632                 .imr_multiaddr.s_addr = ifa->ifa_address,
633                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
634         };
635         struct sock *sk = net->ipv4.mc_autojoin_sk;
636         int ret;
637
638         ASSERT_RTNL_NET(net);
639
640         lock_sock(sk);
641         if (join)
642                 ret = ip_mc_join_group(sk, &mreq);
643         else
644                 ret = ip_mc_leave_group(sk, &mreq);
645         release_sock(sk);
646
647         return ret;
648 #else
649         return -EOPNOTSUPP;
650 #endif
651 }
652
653 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
654                             struct netlink_ext_ack *extack)
655 {
656         struct net *net = sock_net(skb->sk);
657         struct in_ifaddr __rcu **ifap;
658         struct nlattr *tb[IFA_MAX+1];
659         struct in_device *in_dev;
660         struct ifaddrmsg *ifm;
661         struct in_ifaddr *ifa;
662         int err;
663
664         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
665                                      ifa_ipv4_policy, extack);
666         if (err < 0)
667                 goto out;
668
669         ifm = nlmsg_data(nlh);
670
671         rtnl_net_lock(net);
672
673         in_dev = inetdev_by_index(net, ifm->ifa_index);
674         if (!in_dev) {
675                 NL_SET_ERR_MSG(extack, "ipv4: Device not found");
676                 err = -ENODEV;
677                 goto unlock;
678         }
679
680         for (ifap = &in_dev->ifa_list;
681              (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
682              ifap = &ifa->ifa_next) {
683                 if (tb[IFA_LOCAL] &&
684                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
685                         continue;
686
687                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
688                         continue;
689
690                 if (tb[IFA_ADDRESS] &&
691                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
692                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
693                         continue;
694
695                 if (ipv4_is_multicast(ifa->ifa_address))
696                         ip_mc_autojoin_config(net, false, ifa);
697
698                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
699                 goto unlock;
700         }
701
702         NL_SET_ERR_MSG(extack, "ipv4: Address not found");
703         err = -EADDRNOTAVAIL;
704 unlock:
705         rtnl_net_unlock(net);
706 out:
707         return err;
708 }
709
710 static void check_lifetime(struct work_struct *work)
711 {
712         unsigned long now, next, next_sec, next_sched;
713         struct in_ifaddr *ifa;
714         struct hlist_node *n;
715         struct net *net;
716         int i;
717
718         net = container_of(to_delayed_work(work), struct net, ipv4.addr_chk_work);
719         now = jiffies;
720         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
721
722         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
723                 struct hlist_head *head = &net->ipv4.inet_addr_lst[i];
724                 bool change_needed = false;
725
726                 rcu_read_lock();
727                 hlist_for_each_entry_rcu(ifa, head, addr_lst) {
728                         unsigned long age, tstamp;
729                         u32 preferred_lft;
730                         u32 valid_lft;
731                         u32 flags;
732
733                         flags = READ_ONCE(ifa->ifa_flags);
734                         if (flags & IFA_F_PERMANENT)
735                                 continue;
736
737                         preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
738                         valid_lft = READ_ONCE(ifa->ifa_valid_lft);
739                         tstamp = READ_ONCE(ifa->ifa_tstamp);
740                         /* We try to batch several events at once. */
741                         age = (now - tstamp +
742                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
743
744                         if (valid_lft != INFINITY_LIFE_TIME &&
745                             age >= valid_lft) {
746                                 change_needed = true;
747                         } else if (preferred_lft ==
748                                    INFINITY_LIFE_TIME) {
749                                 continue;
750                         } else if (age >= preferred_lft) {
751                                 if (time_before(tstamp + valid_lft * HZ, next))
752                                         next = tstamp + valid_lft * HZ;
753
754                                 if (!(flags & IFA_F_DEPRECATED))
755                                         change_needed = true;
756                         } else if (time_before(tstamp + preferred_lft * HZ,
757                                                next)) {
758                                 next = tstamp + preferred_lft * HZ;
759                         }
760                 }
761                 rcu_read_unlock();
762                 if (!change_needed)
763                         continue;
764
765                 rtnl_net_lock(net);
766                 hlist_for_each_entry_safe(ifa, n, head, addr_lst) {
767                         unsigned long age;
768
769                         if (ifa->ifa_flags & IFA_F_PERMANENT)
770                                 continue;
771
772                         /* We try to batch several events at once. */
773                         age = (now - ifa->ifa_tstamp +
774                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
775
776                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
777                             age >= ifa->ifa_valid_lft) {
778                                 struct in_ifaddr __rcu **ifap;
779                                 struct in_ifaddr *tmp;
780
781                                 ifap = &ifa->ifa_dev->ifa_list;
782                                 tmp = rtnl_net_dereference(net, *ifap);
783                                 while (tmp) {
784                                         if (tmp == ifa) {
785                                                 inet_del_ifa(ifa->ifa_dev,
786                                                              ifap, 1);
787                                                 break;
788                                         }
789                                         ifap = &tmp->ifa_next;
790                                         tmp = rtnl_net_dereference(net, *ifap);
791                                 }
792                         } else if (ifa->ifa_preferred_lft !=
793                                    INFINITY_LIFE_TIME &&
794                                    age >= ifa->ifa_preferred_lft &&
795                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
796                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
797                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
798                         }
799                 }
800                 rtnl_net_unlock(net);
801         }
802
803         next_sec = round_jiffies_up(next);
804         next_sched = next;
805
806         /* If rounded timeout is accurate enough, accept it. */
807         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
808                 next_sched = next_sec;
809
810         now = jiffies;
811         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
812         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
813                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
814
815         queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work,
816                            next_sched - now);
817 }
818
819 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
820                              __u32 prefered_lft)
821 {
822         unsigned long timeout;
823         u32 flags;
824
825         flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
826
827         timeout = addrconf_timeout_fixup(valid_lft, HZ);
828         if (addrconf_finite_timeout(timeout))
829                 WRITE_ONCE(ifa->ifa_valid_lft, timeout);
830         else
831                 flags |= IFA_F_PERMANENT;
832
833         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
834         if (addrconf_finite_timeout(timeout)) {
835                 if (timeout == 0)
836                         flags |= IFA_F_DEPRECATED;
837                 WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
838         }
839         WRITE_ONCE(ifa->ifa_flags, flags);
840         WRITE_ONCE(ifa->ifa_tstamp, jiffies);
841         if (!ifa->ifa_cstamp)
842                 WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
843 }
844
845 static int inet_validate_rtm(struct nlmsghdr *nlh, struct nlattr **tb,
846                              struct netlink_ext_ack *extack,
847                              __u32 *valid_lft, __u32 *prefered_lft)
848 {
849         struct ifaddrmsg *ifm = nlmsg_data(nlh);
850         int err;
851
852         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
853                                      ifa_ipv4_policy, extack);
854         if (err < 0)
855                 return err;
856
857         if (ifm->ifa_prefixlen > 32) {
858                 NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
859                 return -EINVAL;
860         }
861
862         if (!tb[IFA_LOCAL]) {
863                 NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
864                 return -EINVAL;
865         }
866
867         if (tb[IFA_CACHEINFO]) {
868                 struct ifa_cacheinfo *ci;
869
870                 ci = nla_data(tb[IFA_CACHEINFO]);
871                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
872                         NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
873                         return -EINVAL;
874                 }
875
876                 *valid_lft = ci->ifa_valid;
877                 *prefered_lft = ci->ifa_prefered;
878         }
879
880         return 0;
881 }
882
883 static struct in_ifaddr *inet_rtm_to_ifa(struct net *net, struct nlmsghdr *nlh,
884                                          struct nlattr **tb,
885                                          struct netlink_ext_ack *extack)
886 {
887         struct ifaddrmsg *ifm = nlmsg_data(nlh);
888         struct in_device *in_dev;
889         struct net_device *dev;
890         struct in_ifaddr *ifa;
891         int err;
892
893         dev = __dev_get_by_index(net, ifm->ifa_index);
894         err = -ENODEV;
895         if (!dev) {
896                 NL_SET_ERR_MSG(extack, "ipv4: Device not found");
897                 goto errout;
898         }
899
900         in_dev = __in_dev_get_rtnl_net(dev);
901         err = -ENOBUFS;
902         if (!in_dev)
903                 goto errout;
904
905         ifa = inet_alloc_ifa(in_dev);
906         if (!ifa)
907                 /*
908                  * A potential indev allocation can be left alive, it stays
909                  * assigned to its device and is destroy with it.
910                  */
911                 goto errout;
912
913         ipv4_devconf_setall(in_dev);
914         neigh_parms_data_state_setall(in_dev->arp_parms);
915
916         if (!tb[IFA_ADDRESS])
917                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
918
919         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
920         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
921         ifa->ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags);
922         ifa->ifa_scope = ifm->ifa_scope;
923         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
924         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
925
926         if (tb[IFA_BROADCAST])
927                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
928
929         if (tb[IFA_LABEL])
930                 nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
931         else
932                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
933
934         if (tb[IFA_RT_PRIORITY])
935                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
936
937         if (tb[IFA_PROTO])
938                 ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
939
940         return ifa;
941
942 errout:
943         return ERR_PTR(err);
944 }
945
946 static struct in_ifaddr *find_matching_ifa(struct net *net, struct in_ifaddr *ifa)
947 {
948         struct in_device *in_dev = ifa->ifa_dev;
949         struct in_ifaddr *ifa1;
950
951         in_dev_for_each_ifa_rtnl_net(net, ifa1, in_dev) {
952                 if (ifa1->ifa_mask == ifa->ifa_mask &&
953                     inet_ifa_match(ifa1->ifa_address, ifa) &&
954                     ifa1->ifa_local == ifa->ifa_local)
955                         return ifa1;
956         }
957
958         return NULL;
959 }
960
961 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
962                             struct netlink_ext_ack *extack)
963 {
964         __u32 prefered_lft = INFINITY_LIFE_TIME;
965         __u32 valid_lft = INFINITY_LIFE_TIME;
966         struct net *net = sock_net(skb->sk);
967         struct in_ifaddr *ifa_existing;
968         struct nlattr *tb[IFA_MAX + 1];
969         struct in_ifaddr *ifa;
970         int ret;
971
972         ret = inet_validate_rtm(nlh, tb, extack, &valid_lft, &prefered_lft);
973         if (ret < 0)
974                 return ret;
975
976         if (!nla_get_in_addr(tb[IFA_LOCAL]))
977                 return 0;
978
979         rtnl_net_lock(net);
980
981         ifa = inet_rtm_to_ifa(net, nlh, tb, extack);
982         if (IS_ERR(ifa)) {
983                 ret = PTR_ERR(ifa);
984                 goto unlock;
985         }
986
987         ifa_existing = find_matching_ifa(net, ifa);
988         if (!ifa_existing) {
989                 /* It would be best to check for !NLM_F_CREATE here but
990                  * userspace already relies on not having to provide this.
991                  */
992                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
993                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
994                         ret = ip_mc_autojoin_config(net, true, ifa);
995                         if (ret < 0) {
996                                 NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
997                                 inet_free_ifa(ifa);
998                                 goto unlock;
999                         }
1000                 }
1001
1002                 ret = __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid, extack);
1003         } else {
1004                 u32 new_metric = ifa->ifa_rt_priority;
1005                 u8 new_proto = ifa->ifa_proto;
1006
1007                 inet_free_ifa(ifa);
1008
1009                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
1010                     !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
1011                         NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
1012                         ret = -EEXIST;
1013                         goto unlock;
1014                 }
1015                 ifa = ifa_existing;
1016
1017                 if (ifa->ifa_rt_priority != new_metric) {
1018                         fib_modify_prefix_metric(ifa, new_metric);
1019                         ifa->ifa_rt_priority = new_metric;
1020                 }
1021
1022                 ifa->ifa_proto = new_proto;
1023
1024                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1025                 cancel_delayed_work(&net->ipv4.addr_chk_work);
1026                 queue_delayed_work(system_power_efficient_wq,
1027                                    &net->ipv4.addr_chk_work, 0);
1028                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1029         }
1030
1031 unlock:
1032         rtnl_net_unlock(net);
1033
1034         return ret;
1035 }
1036
1037 /*
1038  *      Determine a default network mask, based on the IP address.
1039  */
1040
1041 static int inet_abc_len(__be32 addr)
1042 {
1043         int rc = -1;    /* Something else, probably a multicast. */
1044
1045         if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1046                 rc = 0;
1047         else {
1048                 __u32 haddr = ntohl(addr);
1049                 if (IN_CLASSA(haddr))
1050                         rc = 8;
1051                 else if (IN_CLASSB(haddr))
1052                         rc = 16;
1053                 else if (IN_CLASSC(haddr))
1054                         rc = 24;
1055                 else if (IN_CLASSE(haddr))
1056                         rc = 32;
1057         }
1058
1059         return rc;
1060 }
1061
1062
1063 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1064 {
1065         struct sockaddr_in sin_orig;
1066         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1067         struct in_ifaddr __rcu **ifap = NULL;
1068         struct in_device *in_dev;
1069         struct in_ifaddr *ifa = NULL;
1070         struct net_device *dev;
1071         char *colon;
1072         int ret = -EFAULT;
1073         int tryaddrmatch = 0;
1074
1075         ifr->ifr_name[IFNAMSIZ - 1] = 0;
1076
1077         /* save original address for comparison */
1078         memcpy(&sin_orig, sin, sizeof(*sin));
1079
1080         colon = strchr(ifr->ifr_name, ':');
1081         if (colon)
1082                 *colon = 0;
1083
1084         dev_load(net, ifr->ifr_name);
1085
1086         switch (cmd) {
1087         case SIOCGIFADDR:       /* Get interface address */
1088         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1089         case SIOCGIFDSTADDR:    /* Get the destination address */
1090         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1091                 /* Note that these ioctls will not sleep,
1092                    so that we do not impose a lock.
1093                    One day we will be forced to put shlock here (I mean SMP)
1094                  */
1095                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1096                 memset(sin, 0, sizeof(*sin));
1097                 sin->sin_family = AF_INET;
1098                 break;
1099
1100         case SIOCSIFFLAGS:
1101                 ret = -EPERM;
1102                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1103                         goto out;
1104                 break;
1105         case SIOCSIFADDR:       /* Set interface address (and family) */
1106         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1107         case SIOCSIFDSTADDR:    /* Set the destination address */
1108         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1109                 ret = -EPERM;
1110                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1111                         goto out;
1112                 ret = -EINVAL;
1113                 if (sin->sin_family != AF_INET)
1114                         goto out;
1115                 break;
1116         default:
1117                 ret = -EINVAL;
1118                 goto out;
1119         }
1120
1121         rtnl_net_lock(net);
1122
1123         ret = -ENODEV;
1124         dev = __dev_get_by_name(net, ifr->ifr_name);
1125         if (!dev)
1126                 goto done;
1127
1128         if (colon)
1129                 *colon = ':';
1130
1131         in_dev = __in_dev_get_rtnl_net(dev);
1132         if (in_dev) {
1133                 if (tryaddrmatch) {
1134                         /* Matthias Andree */
1135                         /* compare label and address (4.4BSD style) */
1136                         /* note: we only do this for a limited set of ioctls
1137                            and only if the original address family was AF_INET.
1138                            This is checked above. */
1139
1140                         for (ifap = &in_dev->ifa_list;
1141                              (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
1142                              ifap = &ifa->ifa_next) {
1143                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1144                                     sin_orig.sin_addr.s_addr ==
1145                                                         ifa->ifa_local) {
1146                                         break; /* found */
1147                                 }
1148                         }
1149                 }
1150                 /* we didn't get a match, maybe the application is
1151                    4.3BSD-style and passed in junk so we fall back to
1152                    comparing just the label */
1153                 if (!ifa) {
1154                         for (ifap = &in_dev->ifa_list;
1155                              (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
1156                              ifap = &ifa->ifa_next)
1157                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1158                                         break;
1159                 }
1160         }
1161
1162         ret = -EADDRNOTAVAIL;
1163         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1164                 goto done;
1165
1166         switch (cmd) {
1167         case SIOCGIFADDR:       /* Get interface address */
1168                 ret = 0;
1169                 sin->sin_addr.s_addr = ifa->ifa_local;
1170                 break;
1171
1172         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1173                 ret = 0;
1174                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1175                 break;
1176
1177         case SIOCGIFDSTADDR:    /* Get the destination address */
1178                 ret = 0;
1179                 sin->sin_addr.s_addr = ifa->ifa_address;
1180                 break;
1181
1182         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1183                 ret = 0;
1184                 sin->sin_addr.s_addr = ifa->ifa_mask;
1185                 break;
1186
1187         case SIOCSIFFLAGS:
1188                 if (colon) {
1189                         ret = -EADDRNOTAVAIL;
1190                         if (!ifa)
1191                                 break;
1192                         ret = 0;
1193                         if (!(ifr->ifr_flags & IFF_UP))
1194                                 inet_del_ifa(in_dev, ifap, 1);
1195                         break;
1196                 }
1197
1198                 /* NETDEV_UP/DOWN/CHANGE could touch a peer dev */
1199                 ASSERT_RTNL();
1200                 ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1201                 break;
1202
1203         case SIOCSIFADDR:       /* Set interface address (and family) */
1204                 ret = -EINVAL;
1205                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1206                         break;
1207
1208                 if (!ifa) {
1209                         ret = -ENOBUFS;
1210                         if (!in_dev)
1211                                 break;
1212                         ifa = inet_alloc_ifa(in_dev);
1213                         if (!ifa)
1214                                 break;
1215
1216                         if (colon)
1217                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1218                         else
1219                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1220                 } else {
1221                         ret = 0;
1222                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1223                                 break;
1224                         inet_del_ifa(in_dev, ifap, 0);
1225                         ifa->ifa_broadcast = 0;
1226                         ifa->ifa_scope = 0;
1227                 }
1228
1229                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1230
1231                 if (!(dev->flags & IFF_POINTOPOINT)) {
1232                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1233                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1234                         if ((dev->flags & IFF_BROADCAST) &&
1235                             ifa->ifa_prefixlen < 31)
1236                                 ifa->ifa_broadcast = ifa->ifa_address |
1237                                                      ~ifa->ifa_mask;
1238                 } else {
1239                         ifa->ifa_prefixlen = 32;
1240                         ifa->ifa_mask = inet_make_mask(32);
1241                 }
1242                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1243                 ret = inet_set_ifa(dev, ifa);
1244                 break;
1245
1246         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1247                 ret = 0;
1248                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1249                         inet_del_ifa(in_dev, ifap, 0);
1250                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1251                         inet_insert_ifa(ifa);
1252                 }
1253                 break;
1254
1255         case SIOCSIFDSTADDR:    /* Set the destination address */
1256                 ret = 0;
1257                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1258                         break;
1259                 ret = -EINVAL;
1260                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1261                         break;
1262                 ret = 0;
1263                 inet_del_ifa(in_dev, ifap, 0);
1264                 ifa->ifa_address = sin->sin_addr.s_addr;
1265                 inet_insert_ifa(ifa);
1266                 break;
1267
1268         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1269
1270                 /*
1271                  *      The mask we set must be legal.
1272                  */
1273                 ret = -EINVAL;
1274                 if (bad_mask(sin->sin_addr.s_addr, 0))
1275                         break;
1276                 ret = 0;
1277                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1278                         __be32 old_mask = ifa->ifa_mask;
1279                         inet_del_ifa(in_dev, ifap, 0);
1280                         ifa->ifa_mask = sin->sin_addr.s_addr;
1281                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1282
1283                         /* See if current broadcast address matches
1284                          * with current netmask, then recalculate
1285                          * the broadcast address. Otherwise it's a
1286                          * funny address, so don't touch it since
1287                          * the user seems to know what (s)he's doing...
1288                          */
1289                         if ((dev->flags & IFF_BROADCAST) &&
1290                             (ifa->ifa_prefixlen < 31) &&
1291                             (ifa->ifa_broadcast ==
1292                              (ifa->ifa_local|~old_mask))) {
1293                                 ifa->ifa_broadcast = (ifa->ifa_local |
1294                                                       ~sin->sin_addr.s_addr);
1295                         }
1296                         inet_insert_ifa(ifa);
1297                 }
1298                 break;
1299         }
1300 done:
1301         rtnl_net_unlock(net);
1302 out:
1303         return ret;
1304 }
1305
1306 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1307 {
1308         struct in_device *in_dev = __in_dev_get_rtnl_net(dev);
1309         const struct in_ifaddr *ifa;
1310         struct ifreq ifr;
1311         int done = 0;
1312
1313         if (WARN_ON(size > sizeof(struct ifreq)))
1314                 goto out;
1315
1316         if (!in_dev)
1317                 goto out;
1318
1319         in_dev_for_each_ifa_rtnl_net(dev_net(dev), ifa, in_dev) {
1320                 if (!buf) {
1321                         done += size;
1322                         continue;
1323                 }
1324                 if (len < size)
1325                         break;
1326                 memset(&ifr, 0, sizeof(struct ifreq));
1327                 strcpy(ifr.ifr_name, ifa->ifa_label);
1328
1329                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1330                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1331                                                                 ifa->ifa_local;
1332
1333                 if (copy_to_user(buf + done, &ifr, size)) {
1334                         done = -EFAULT;
1335                         break;
1336                 }
1337                 len  -= size;
1338                 done += size;
1339         }
1340 out:
1341         return done;
1342 }
1343
1344 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1345                                  int scope)
1346 {
1347         const struct in_ifaddr *ifa;
1348
1349         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1350                 if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1351                         continue;
1352                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1353                     ifa->ifa_scope <= scope)
1354                         return ifa->ifa_local;
1355         }
1356
1357         return 0;
1358 }
1359
1360 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1361 {
1362         const struct in_ifaddr *ifa;
1363         __be32 addr = 0;
1364         unsigned char localnet_scope = RT_SCOPE_HOST;
1365         struct in_device *in_dev;
1366         struct net *net;
1367         int master_idx;
1368
1369         rcu_read_lock();
1370         net = dev_net_rcu(dev);
1371         in_dev = __in_dev_get_rcu(dev);
1372         if (!in_dev)
1373                 goto no_in_dev;
1374
1375         if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1376                 localnet_scope = RT_SCOPE_LINK;
1377
1378         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1379                 if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1380                         continue;
1381                 if (min(ifa->ifa_scope, localnet_scope) > scope)
1382                         continue;
1383                 if (!dst || inet_ifa_match(dst, ifa)) {
1384                         addr = ifa->ifa_local;
1385                         break;
1386                 }
1387                 if (!addr)
1388                         addr = ifa->ifa_local;
1389         }
1390
1391         if (addr)
1392                 goto out_unlock;
1393 no_in_dev:
1394         master_idx = l3mdev_master_ifindex_rcu(dev);
1395
1396         /* For VRFs, the VRF device takes the place of the loopback device,
1397          * with addresses on it being preferred.  Note in such cases the
1398          * loopback device will be among the devices that fail the master_idx
1399          * equality check in the loop below.
1400          */
1401         if (master_idx &&
1402             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1403             (in_dev = __in_dev_get_rcu(dev))) {
1404                 addr = in_dev_select_addr(in_dev, scope);
1405                 if (addr)
1406                         goto out_unlock;
1407         }
1408
1409         /* Not loopback addresses on loopback should be preferred
1410            in this case. It is important that lo is the first interface
1411            in dev_base list.
1412          */
1413         for_each_netdev_rcu(net, dev) {
1414                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1415                         continue;
1416
1417                 in_dev = __in_dev_get_rcu(dev);
1418                 if (!in_dev)
1419                         continue;
1420
1421                 addr = in_dev_select_addr(in_dev, scope);
1422                 if (addr)
1423                         goto out_unlock;
1424         }
1425 out_unlock:
1426         rcu_read_unlock();
1427         return addr;
1428 }
1429 EXPORT_SYMBOL(inet_select_addr);
1430
1431 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1432                               __be32 local, int scope)
1433 {
1434         unsigned char localnet_scope = RT_SCOPE_HOST;
1435         const struct in_ifaddr *ifa;
1436         __be32 addr = 0;
1437         int same = 0;
1438
1439         if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1440                 localnet_scope = RT_SCOPE_LINK;
1441
1442         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1443                 unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1444
1445                 if (!addr &&
1446                     (local == ifa->ifa_local || !local) &&
1447                     min_scope <= scope) {
1448                         addr = ifa->ifa_local;
1449                         if (same)
1450                                 break;
1451                 }
1452                 if (!same) {
1453                         same = (!local || inet_ifa_match(local, ifa)) &&
1454                                 (!dst || inet_ifa_match(dst, ifa));
1455                         if (same && addr) {
1456                                 if (local || !dst)
1457                                         break;
1458                                 /* Is the selected addr into dst subnet? */
1459                                 if (inet_ifa_match(addr, ifa))
1460                                         break;
1461                                 /* No, then can we use new local src? */
1462                                 if (min_scope <= scope) {
1463                                         addr = ifa->ifa_local;
1464                                         break;
1465                                 }
1466                                 /* search for large dst subnet for addr */
1467                                 same = 0;
1468                         }
1469                 }
1470         }
1471
1472         return same ? addr : 0;
1473 }
1474
1475 /*
1476  * Confirm that local IP address exists using wildcards:
1477  * - net: netns to check, cannot be NULL
1478  * - in_dev: only on this interface, NULL=any interface
1479  * - dst: only in the same subnet as dst, 0=any dst
1480  * - local: address, 0=autoselect the local address
1481  * - scope: maximum allowed scope value for the local address
1482  */
1483 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1484                          __be32 dst, __be32 local, int scope)
1485 {
1486         __be32 addr = 0;
1487         struct net_device *dev;
1488
1489         if (in_dev)
1490                 return confirm_addr_indev(in_dev, dst, local, scope);
1491
1492         rcu_read_lock();
1493         for_each_netdev_rcu(net, dev) {
1494                 in_dev = __in_dev_get_rcu(dev);
1495                 if (in_dev) {
1496                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1497                         if (addr)
1498                                 break;
1499                 }
1500         }
1501         rcu_read_unlock();
1502
1503         return addr;
1504 }
1505 EXPORT_SYMBOL(inet_confirm_addr);
1506
1507 /*
1508  *      Device notifier
1509  */
1510
1511 int register_inetaddr_notifier(struct notifier_block *nb)
1512 {
1513         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1514 }
1515 EXPORT_SYMBOL(register_inetaddr_notifier);
1516
1517 int unregister_inetaddr_notifier(struct notifier_block *nb)
1518 {
1519         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1520 }
1521 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1522
1523 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1524 {
1525         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1526 }
1527 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1528
1529 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1530 {
1531         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1532             nb);
1533 }
1534 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1535
1536 /* Rename ifa_labels for a device name change. Make some effort to preserve
1537  * existing alias numbering and to create unique labels if possible.
1538 */
1539 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1540 {
1541         struct in_ifaddr *ifa;
1542         int named = 0;
1543
1544         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1545                 char old[IFNAMSIZ], *dot;
1546
1547                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1548                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1549                 if (named++ == 0)
1550                         goto skip;
1551                 dot = strchr(old, ':');
1552                 if (!dot) {
1553                         sprintf(old, ":%d", named);
1554                         dot = old;
1555                 }
1556                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1557                         strcat(ifa->ifa_label, dot);
1558                 else
1559                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1560 skip:
1561                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1562         }
1563 }
1564
1565 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1566                                         struct in_device *in_dev)
1567
1568 {
1569         const struct in_ifaddr *ifa;
1570
1571         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1572                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1573                          ifa->ifa_local, dev,
1574                          ifa->ifa_local, NULL,
1575                          dev->dev_addr, NULL);
1576         }
1577 }
1578
1579 /* Called only under RTNL semaphore */
1580
1581 static int inetdev_event(struct notifier_block *this, unsigned long event,
1582                          void *ptr)
1583 {
1584         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1585         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1586
1587         ASSERT_RTNL();
1588
1589         if (!in_dev) {
1590                 if (event == NETDEV_REGISTER) {
1591                         in_dev = inetdev_init(dev);
1592                         if (IS_ERR(in_dev))
1593                                 return notifier_from_errno(PTR_ERR(in_dev));
1594                         if (dev->flags & IFF_LOOPBACK) {
1595                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1596                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1597                         }
1598                 } else if (event == NETDEV_CHANGEMTU) {
1599                         /* Re-enabling IP */
1600                         if (inetdev_valid_mtu(dev->mtu))
1601                                 in_dev = inetdev_init(dev);
1602                 }
1603                 goto out;
1604         }
1605
1606         switch (event) {
1607         case NETDEV_REGISTER:
1608                 pr_debug("%s: bug\n", __func__);
1609                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1610                 break;
1611         case NETDEV_UP:
1612                 if (!inetdev_valid_mtu(dev->mtu))
1613                         break;
1614                 if (dev->flags & IFF_LOOPBACK) {
1615                         struct in_ifaddr *ifa = inet_alloc_ifa(in_dev);
1616
1617                         if (ifa) {
1618                                 ifa->ifa_local =
1619                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1620                                 ifa->ifa_prefixlen = 8;
1621                                 ifa->ifa_mask = inet_make_mask(8);
1622                                 ifa->ifa_scope = RT_SCOPE_HOST;
1623                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1624                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1625                                                  INFINITY_LIFE_TIME);
1626                                 ipv4_devconf_setall(in_dev);
1627                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1628                                 inet_insert_ifa(ifa);
1629                         }
1630                 }
1631                 ip_mc_up(in_dev);
1632                 fallthrough;
1633         case NETDEV_CHANGEADDR:
1634                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1635                         break;
1636                 fallthrough;
1637         case NETDEV_NOTIFY_PEERS:
1638                 /* Send gratuitous ARP to notify of link change */
1639                 inetdev_send_gratuitous_arp(dev, in_dev);
1640                 break;
1641         case NETDEV_DOWN:
1642                 ip_mc_down(in_dev);
1643                 break;
1644         case NETDEV_PRE_TYPE_CHANGE:
1645                 ip_mc_unmap(in_dev);
1646                 break;
1647         case NETDEV_POST_TYPE_CHANGE:
1648                 ip_mc_remap(in_dev);
1649                 break;
1650         case NETDEV_CHANGEMTU:
1651                 if (inetdev_valid_mtu(dev->mtu))
1652                         break;
1653                 /* disable IP when MTU is not enough */
1654                 fallthrough;
1655         case NETDEV_UNREGISTER:
1656                 inetdev_destroy(in_dev);
1657                 break;
1658         case NETDEV_CHANGENAME:
1659                 /* Do not notify about label change, this event is
1660                  * not interesting to applications using netlink.
1661                  */
1662                 inetdev_changename(dev, in_dev);
1663
1664                 devinet_sysctl_unregister(in_dev);
1665                 devinet_sysctl_register(in_dev);
1666                 break;
1667         }
1668 out:
1669         return NOTIFY_DONE;
1670 }
1671
1672 static struct notifier_block ip_netdev_notifier = {
1673         .notifier_call = inetdev_event,
1674 };
1675
1676 static size_t inet_nlmsg_size(void)
1677 {
1678         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1679                + nla_total_size(4) /* IFA_ADDRESS */
1680                + nla_total_size(4) /* IFA_LOCAL */
1681                + nla_total_size(4) /* IFA_BROADCAST */
1682                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1683                + nla_total_size(4)  /* IFA_FLAGS */
1684                + nla_total_size(1)  /* IFA_PROTO */
1685                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1686                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1687 }
1688
1689 static inline u32 cstamp_delta(unsigned long cstamp)
1690 {
1691         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1692 }
1693
1694 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1695                          unsigned long tstamp, u32 preferred, u32 valid)
1696 {
1697         struct ifa_cacheinfo ci;
1698
1699         ci.cstamp = cstamp_delta(cstamp);
1700         ci.tstamp = cstamp_delta(tstamp);
1701         ci.ifa_prefered = preferred;
1702         ci.ifa_valid = valid;
1703
1704         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1705 }
1706
1707 static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
1708                             struct inet_fill_args *args)
1709 {
1710         struct ifaddrmsg *ifm;
1711         struct nlmsghdr  *nlh;
1712         unsigned long tstamp;
1713         u32 preferred, valid;
1714         u32 flags;
1715
1716         nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1717                         args->flags);
1718         if (!nlh)
1719                 return -EMSGSIZE;
1720
1721         ifm = nlmsg_data(nlh);
1722         ifm->ifa_family = AF_INET;
1723         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1724
1725         flags = READ_ONCE(ifa->ifa_flags);
1726         /* Warning : ifm->ifa_flags is an __u8, it holds only 8 bits.
1727          * The 32bit value is given in IFA_FLAGS attribute.
1728          */
1729         ifm->ifa_flags = (__u8)flags;
1730
1731         ifm->ifa_scope = ifa->ifa_scope;
1732         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1733
1734         if (args->netnsid >= 0 &&
1735             nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1736                 goto nla_put_failure;
1737
1738         tstamp = READ_ONCE(ifa->ifa_tstamp);
1739         if (!(flags & IFA_F_PERMANENT)) {
1740                 preferred = READ_ONCE(ifa->ifa_preferred_lft);
1741                 valid = READ_ONCE(ifa->ifa_valid_lft);
1742                 if (preferred != INFINITY_LIFE_TIME) {
1743                         long tval = (jiffies - tstamp) / HZ;
1744
1745                         if (preferred > tval)
1746                                 preferred -= tval;
1747                         else
1748                                 preferred = 0;
1749                         if (valid != INFINITY_LIFE_TIME) {
1750                                 if (valid > tval)
1751                                         valid -= tval;
1752                                 else
1753                                         valid = 0;
1754                         }
1755                 }
1756         } else {
1757                 preferred = INFINITY_LIFE_TIME;
1758                 valid = INFINITY_LIFE_TIME;
1759         }
1760         if ((ifa->ifa_address &&
1761              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1762             (ifa->ifa_local &&
1763              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1764             (ifa->ifa_broadcast &&
1765              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1766             (ifa->ifa_label[0] &&
1767              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1768             (ifa->ifa_proto &&
1769              nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1770             nla_put_u32(skb, IFA_FLAGS, flags) ||
1771             (ifa->ifa_rt_priority &&
1772              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1773             put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
1774                           preferred, valid))
1775                 goto nla_put_failure;
1776
1777         nlmsg_end(skb, nlh);
1778         return 0;
1779
1780 nla_put_failure:
1781         nlmsg_cancel(skb, nlh);
1782         return -EMSGSIZE;
1783 }
1784
1785 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1786                                       struct inet_fill_args *fillargs,
1787                                       struct net **tgt_net, struct sock *sk,
1788                                       struct netlink_callback *cb)
1789 {
1790         struct netlink_ext_ack *extack = cb->extack;
1791         struct nlattr *tb[IFA_MAX+1];
1792         struct ifaddrmsg *ifm;
1793         int err, i;
1794
1795         ifm = nlmsg_payload(nlh, sizeof(*ifm));
1796         if (!ifm) {
1797                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1798                 return -EINVAL;
1799         }
1800
1801         if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1802                 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1803                 return -EINVAL;
1804         }
1805
1806         fillargs->ifindex = ifm->ifa_index;
1807         if (fillargs->ifindex) {
1808                 cb->answer_flags |= NLM_F_DUMP_FILTERED;
1809                 fillargs->flags |= NLM_F_DUMP_FILTERED;
1810         }
1811
1812         err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1813                                             ifa_ipv4_policy, extack);
1814         if (err < 0)
1815                 return err;
1816
1817         for (i = 0; i <= IFA_MAX; ++i) {
1818                 if (!tb[i])
1819                         continue;
1820
1821                 if (i == IFA_TARGET_NETNSID) {
1822                         struct net *net;
1823
1824                         fillargs->netnsid = nla_get_s32(tb[i]);
1825
1826                         net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1827                         if (IS_ERR(net)) {
1828                                 fillargs->netnsid = -1;
1829                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1830                                 return PTR_ERR(net);
1831                         }
1832                         *tgt_net = net;
1833                 } else {
1834                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1835                         return -EINVAL;
1836                 }
1837         }
1838
1839         return 0;
1840 }
1841
1842 static int in_dev_dump_ifmcaddr(struct in_device *in_dev, struct sk_buff *skb,
1843                                 struct netlink_callback *cb, int *s_ip_idx,
1844                                 struct inet_fill_args *fillargs)
1845 {
1846         struct ip_mc_list *im;
1847         int ip_idx = 0;
1848         int err;
1849
1850         for (im = rcu_dereference(in_dev->mc_list);
1851              im;
1852              im = rcu_dereference(im->next_rcu)) {
1853                 if (ip_idx < *s_ip_idx) {
1854                         ip_idx++;
1855                         continue;
1856                 }
1857                 err = inet_fill_ifmcaddr(skb, in_dev->dev, im, fillargs);
1858                 if (err < 0)
1859                         goto done;
1860
1861                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1862                 ip_idx++;
1863         }
1864         err = 0;
1865         ip_idx = 0;
1866 done:
1867         *s_ip_idx = ip_idx;
1868         return err;
1869 }
1870
1871 static int in_dev_dump_ifaddr(struct in_device *in_dev, struct sk_buff *skb,
1872                               struct netlink_callback *cb, int *s_ip_idx,
1873                               struct inet_fill_args *fillargs)
1874 {
1875         struct in_ifaddr *ifa;
1876         int ip_idx = 0;
1877         int err;
1878
1879         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1880                 if (ip_idx < *s_ip_idx) {
1881                         ip_idx++;
1882                         continue;
1883                 }
1884                 err = inet_fill_ifaddr(skb, ifa, fillargs);
1885                 if (err < 0)
1886                         goto done;
1887
1888                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1889                 ip_idx++;
1890         }
1891         err = 0;
1892         ip_idx = 0;
1893 done:
1894         *s_ip_idx = ip_idx;
1895
1896         return err;
1897 }
1898
1899 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1900                             struct netlink_callback *cb, int *s_ip_idx,
1901                             struct inet_fill_args *fillargs)
1902 {
1903         switch (fillargs->event) {
1904         case RTM_NEWADDR:
1905                 return in_dev_dump_ifaddr(in_dev, skb, cb, s_ip_idx, fillargs);
1906         case RTM_GETMULTICAST:
1907                 return in_dev_dump_ifmcaddr(in_dev, skb, cb, s_ip_idx,
1908                                             fillargs);
1909         default:
1910                 return -EINVAL;
1911         }
1912 }
1913
1914 /* Combine dev_addr_genid and dev_base_seq to detect changes.
1915  */
1916 static u32 inet_base_seq(const struct net *net)
1917 {
1918         u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1919                   READ_ONCE(net->dev_base_seq);
1920
1921         /* Must not return 0 (see nl_dump_check_consistent()).
1922          * Chose a value far away from 0.
1923          */
1924         if (!res)
1925                 res = 0x80000000;
1926         return res;
1927 }
1928
1929 static int inet_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
1930                           int event)
1931 {
1932         const struct nlmsghdr *nlh = cb->nlh;
1933         struct inet_fill_args fillargs = {
1934                 .portid = NETLINK_CB(cb->skb).portid,
1935                 .seq = nlh->nlmsg_seq,
1936                 .event = event,
1937                 .flags = NLM_F_MULTI,
1938                 .netnsid = -1,
1939         };
1940         struct net *net = sock_net(skb->sk);
1941         struct net *tgt_net = net;
1942         struct {
1943                 unsigned long ifindex;
1944                 int ip_idx;
1945         } *ctx = (void *)cb->ctx;
1946         struct in_device *in_dev;
1947         struct net_device *dev;
1948         int err = 0;
1949
1950         rcu_read_lock();
1951         if (cb->strict_check) {
1952                 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1953                                                  skb->sk, cb);
1954                 if (err < 0)
1955                         goto done;
1956
1957                 if (fillargs.ifindex) {
1958                         dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
1959                         if (!dev) {
1960                                 err = -ENODEV;
1961                                 goto done;
1962                         }
1963                         in_dev = __in_dev_get_rcu(dev);
1964                         if (!in_dev)
1965                                 goto done;
1966                         err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1967                                                &fillargs);
1968                         goto done;
1969                 }
1970         }
1971
1972         cb->seq = inet_base_seq(tgt_net);
1973
1974         for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
1975                 in_dev = __in_dev_get_rcu(dev);
1976                 if (!in_dev)
1977                         continue;
1978                 err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1979                                        &fillargs);
1980                 if (err < 0)
1981                         goto done;
1982         }
1983 done:
1984         if (fillargs.netnsid >= 0)
1985                 put_net(tgt_net);
1986         rcu_read_unlock();
1987         return err;
1988 }
1989
1990 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1991 {
1992         return inet_dump_addr(skb, cb, RTM_NEWADDR);
1993 }
1994
1995 static int inet_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
1996 {
1997         return inet_dump_addr(skb, cb, RTM_GETMULTICAST);
1998 }
1999
2000 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
2001                       u32 portid)
2002 {
2003         struct inet_fill_args fillargs = {
2004                 .portid = portid,
2005                 .seq = nlh ? nlh->nlmsg_seq : 0,
2006                 .event = event,
2007                 .flags = 0,
2008                 .netnsid = -1,
2009         };
2010         struct sk_buff *skb;
2011         int err = -ENOBUFS;
2012         struct net *net;
2013
2014         net = dev_net(ifa->ifa_dev->dev);
2015         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
2016         if (!skb)
2017                 goto errout;
2018
2019         err = inet_fill_ifaddr(skb, ifa, &fillargs);
2020         if (err < 0) {
2021                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
2022                 WARN_ON(err == -EMSGSIZE);
2023                 kfree_skb(skb);
2024                 goto errout;
2025         }
2026         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
2027         return;
2028 errout:
2029         rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
2030 }
2031
2032 static size_t inet_get_link_af_size(const struct net_device *dev,
2033                                     u32 ext_filter_mask)
2034 {
2035         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
2036
2037         if (!in_dev)
2038                 return 0;
2039
2040         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
2041 }
2042
2043 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
2044                              u32 ext_filter_mask)
2045 {
2046         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
2047         struct nlattr *nla;
2048         int i;
2049
2050         if (!in_dev)
2051                 return -ENODATA;
2052
2053         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
2054         if (!nla)
2055                 return -EMSGSIZE;
2056
2057         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
2058                 ((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
2059
2060         return 0;
2061 }
2062
2063 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
2064         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
2065 };
2066
2067 static int inet_validate_link_af(const struct net_device *dev,
2068                                  const struct nlattr *nla,
2069                                  struct netlink_ext_ack *extack)
2070 {
2071         struct nlattr *a, *tb[IFLA_INET_MAX+1];
2072         int err, rem;
2073
2074         if (dev && !__in_dev_get_rtnl(dev))
2075                 return -EAFNOSUPPORT;
2076
2077         err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
2078                                           inet_af_policy, extack);
2079         if (err < 0)
2080                 return err;
2081
2082         if (tb[IFLA_INET_CONF]) {
2083                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
2084                         int cfgid = nla_type(a);
2085
2086                         if (nla_len(a) < 4)
2087                                 return -EINVAL;
2088
2089                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2090                                 return -EINVAL;
2091                 }
2092         }
2093
2094         return 0;
2095 }
2096
2097 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2098                             struct netlink_ext_ack *extack)
2099 {
2100         struct in_device *in_dev = __in_dev_get_rtnl(dev);
2101         struct nlattr *a, *tb[IFLA_INET_MAX+1];
2102         int rem;
2103
2104         if (!in_dev)
2105                 return -EAFNOSUPPORT;
2106
2107         if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2108                 return -EINVAL;
2109
2110         if (tb[IFLA_INET_CONF]) {
2111                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2112                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2113         }
2114
2115         return 0;
2116 }
2117
2118 static int inet_netconf_msgsize_devconf(int type)
2119 {
2120         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2121                    + nla_total_size(4); /* NETCONFA_IFINDEX */
2122         bool all = false;
2123
2124         if (type == NETCONFA_ALL)
2125                 all = true;
2126
2127         if (all || type == NETCONFA_FORWARDING)
2128                 size += nla_total_size(4);
2129         if (all || type == NETCONFA_RP_FILTER)
2130                 size += nla_total_size(4);
2131         if (all || type == NETCONFA_MC_FORWARDING)
2132                 size += nla_total_size(4);
2133         if (all || type == NETCONFA_BC_FORWARDING)
2134                 size += nla_total_size(4);
2135         if (all || type == NETCONFA_PROXY_NEIGH)
2136                 size += nla_total_size(4);
2137         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2138                 size += nla_total_size(4);
2139
2140         return size;
2141 }
2142
2143 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2144                                      const struct ipv4_devconf *devconf,
2145                                      u32 portid, u32 seq, int event,
2146                                      unsigned int flags, int type)
2147 {
2148         struct nlmsghdr  *nlh;
2149         struct netconfmsg *ncm;
2150         bool all = false;
2151
2152         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2153                         flags);
2154         if (!nlh)
2155                 return -EMSGSIZE;
2156
2157         if (type == NETCONFA_ALL)
2158                 all = true;
2159
2160         ncm = nlmsg_data(nlh);
2161         ncm->ncm_family = AF_INET;
2162
2163         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2164                 goto nla_put_failure;
2165
2166         if (!devconf)
2167                 goto out;
2168
2169         if ((all || type == NETCONFA_FORWARDING) &&
2170             nla_put_s32(skb, NETCONFA_FORWARDING,
2171                         IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
2172                 goto nla_put_failure;
2173         if ((all || type == NETCONFA_RP_FILTER) &&
2174             nla_put_s32(skb, NETCONFA_RP_FILTER,
2175                         IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
2176                 goto nla_put_failure;
2177         if ((all || type == NETCONFA_MC_FORWARDING) &&
2178             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2179                         IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
2180                 goto nla_put_failure;
2181         if ((all || type == NETCONFA_BC_FORWARDING) &&
2182             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2183                         IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
2184                 goto nla_put_failure;
2185         if ((all || type == NETCONFA_PROXY_NEIGH) &&
2186             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2187                         IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
2188                 goto nla_put_failure;
2189         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2190             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2191                         IPV4_DEVCONF_RO(*devconf,
2192                                         IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2193                 goto nla_put_failure;
2194
2195 out:
2196         nlmsg_end(skb, nlh);
2197         return 0;
2198
2199 nla_put_failure:
2200         nlmsg_cancel(skb, nlh);
2201         return -EMSGSIZE;
2202 }
2203
2204 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2205                                  int ifindex, struct ipv4_devconf *devconf)
2206 {
2207         struct sk_buff *skb;
2208         int err = -ENOBUFS;
2209
2210         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2211         if (!skb)
2212                 goto errout;
2213
2214         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2215                                         event, 0, type);
2216         if (err < 0) {
2217                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2218                 WARN_ON(err == -EMSGSIZE);
2219                 kfree_skb(skb);
2220                 goto errout;
2221         }
2222         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2223         return;
2224 errout:
2225         rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2226 }
2227
2228 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2229         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
2230         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
2231         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
2232         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
2233         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
2234 };
2235
2236 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2237                                       const struct nlmsghdr *nlh,
2238                                       struct nlattr **tb,
2239                                       struct netlink_ext_ack *extack)
2240 {
2241         int i, err;
2242
2243         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2244                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2245                 return -EINVAL;
2246         }
2247
2248         if (!netlink_strict_get_check(skb))
2249                 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2250                                               tb, NETCONFA_MAX,
2251                                               devconf_ipv4_policy, extack);
2252
2253         err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2254                                             tb, NETCONFA_MAX,
2255                                             devconf_ipv4_policy, extack);
2256         if (err)
2257                 return err;
2258
2259         for (i = 0; i <= NETCONFA_MAX; i++) {
2260                 if (!tb[i])
2261                         continue;
2262
2263                 switch (i) {
2264                 case NETCONFA_IFINDEX:
2265                         break;
2266                 default:
2267                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2268                         return -EINVAL;
2269                 }
2270         }
2271
2272         return 0;
2273 }
2274
2275 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2276                                     struct nlmsghdr *nlh,
2277                                     struct netlink_ext_ack *extack)
2278 {
2279         struct net *net = sock_net(in_skb->sk);
2280         struct nlattr *tb[NETCONFA_MAX + 1];
2281         const struct ipv4_devconf *devconf;
2282         struct in_device *in_dev = NULL;
2283         struct net_device *dev = NULL;
2284         struct sk_buff *skb;
2285         int ifindex;
2286         int err;
2287
2288         err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2289         if (err)
2290                 return err;
2291
2292         if (!tb[NETCONFA_IFINDEX])
2293                 return -EINVAL;
2294
2295         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2296         switch (ifindex) {
2297         case NETCONFA_IFINDEX_ALL:
2298                 devconf = net->ipv4.devconf_all;
2299                 break;
2300         case NETCONFA_IFINDEX_DEFAULT:
2301                 devconf = net->ipv4.devconf_dflt;
2302                 break;
2303         default:
2304                 err = -ENODEV;
2305                 dev = dev_get_by_index(net, ifindex);
2306                 if (dev)
2307                         in_dev = in_dev_get(dev);
2308                 if (!in_dev)
2309                         goto errout;
2310                 devconf = &in_dev->cnf;
2311                 break;
2312         }
2313
2314         err = -ENOBUFS;
2315         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2316         if (!skb)
2317                 goto errout;
2318
2319         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2320                                         NETLINK_CB(in_skb).portid,
2321                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2322                                         NETCONFA_ALL);
2323         if (err < 0) {
2324                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2325                 WARN_ON(err == -EMSGSIZE);
2326                 kfree_skb(skb);
2327                 goto errout;
2328         }
2329         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2330 errout:
2331         if (in_dev)
2332                 in_dev_put(in_dev);
2333         dev_put(dev);
2334         return err;
2335 }
2336
2337 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2338                                      struct netlink_callback *cb)
2339 {
2340         const struct nlmsghdr *nlh = cb->nlh;
2341         struct net *net = sock_net(skb->sk);
2342         struct {
2343                 unsigned long ifindex;
2344                 unsigned int all_default;
2345         } *ctx = (void *)cb->ctx;
2346         const struct in_device *in_dev;
2347         struct net_device *dev;
2348         int err = 0;
2349
2350         if (cb->strict_check) {
2351                 struct netlink_ext_ack *extack = cb->extack;
2352                 struct netconfmsg *ncm;
2353
2354                 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2355                         NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2356                         return -EINVAL;
2357                 }
2358
2359                 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2360                         NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2361                         return -EINVAL;
2362                 }
2363         }
2364
2365         rcu_read_lock();
2366         for_each_netdev_dump(net, dev, ctx->ifindex) {
2367                 in_dev = __in_dev_get_rcu(dev);
2368                 if (!in_dev)
2369                         continue;
2370                 err = inet_netconf_fill_devconf(skb, dev->ifindex,
2371                                                 &in_dev->cnf,
2372                                                 NETLINK_CB(cb->skb).portid,
2373                                                 nlh->nlmsg_seq,
2374                                                 RTM_NEWNETCONF, NLM_F_MULTI,
2375                                                 NETCONFA_ALL);
2376                 if (err < 0)
2377                         goto done;
2378         }
2379         if (ctx->all_default == 0) {
2380                 err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2381                                                 net->ipv4.devconf_all,
2382                                                 NETLINK_CB(cb->skb).portid,
2383                                                 nlh->nlmsg_seq,
2384                                                 RTM_NEWNETCONF, NLM_F_MULTI,
2385                                                 NETCONFA_ALL);
2386                 if (err < 0)
2387                         goto done;
2388                 ctx->all_default++;
2389         }
2390         if (ctx->all_default == 1) {
2391                 err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2392                                                 net->ipv4.devconf_dflt,
2393                                                 NETLINK_CB(cb->skb).portid,
2394                                                 nlh->nlmsg_seq,
2395                                                 RTM_NEWNETCONF, NLM_F_MULTI,
2396                                                 NETCONFA_ALL);
2397                 if (err < 0)
2398                         goto done;
2399                 ctx->all_default++;
2400         }
2401 done:
2402         rcu_read_unlock();
2403         return err;
2404 }
2405
2406 #ifdef CONFIG_SYSCTL
2407
2408 static void devinet_copy_dflt_conf(struct net *net, int i)
2409 {
2410         struct net_device *dev;
2411
2412         rcu_read_lock();
2413         for_each_netdev_rcu(net, dev) {
2414                 struct in_device *in_dev;
2415
2416                 in_dev = __in_dev_get_rcu(dev);
2417                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2418                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2419         }
2420         rcu_read_unlock();
2421 }
2422
2423 /* called with RTNL locked */
2424 static void inet_forward_change(struct net *net)
2425 {
2426         struct net_device *dev;
2427         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2428
2429         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2430         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2431         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2432                                     NETCONFA_FORWARDING,
2433                                     NETCONFA_IFINDEX_ALL,
2434                                     net->ipv4.devconf_all);
2435         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2436                                     NETCONFA_FORWARDING,
2437                                     NETCONFA_IFINDEX_DEFAULT,
2438                                     net->ipv4.devconf_dflt);
2439
2440         for_each_netdev(net, dev) {
2441                 struct in_device *in_dev;
2442
2443                 if (on)
2444                         dev_disable_lro(dev);
2445
2446                 in_dev = __in_dev_get_rtnl_net(dev);
2447                 if (in_dev) {
2448                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2449                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2450                                                     NETCONFA_FORWARDING,
2451                                                     dev->ifindex, &in_dev->cnf);
2452                 }
2453         }
2454 }
2455
2456 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2457 {
2458         if (cnf == net->ipv4.devconf_dflt)
2459                 return NETCONFA_IFINDEX_DEFAULT;
2460         else if (cnf == net->ipv4.devconf_all)
2461                 return NETCONFA_IFINDEX_ALL;
2462         else {
2463                 struct in_device *idev
2464                         = container_of(cnf, struct in_device, cnf);
2465                 return idev->dev->ifindex;
2466         }
2467 }
2468
2469 static int devinet_conf_proc(const struct ctl_table *ctl, int write,
2470                              void *buffer, size_t *lenp, loff_t *ppos)
2471 {
2472         int old_value = *(int *)ctl->data;
2473         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2474         int new_value = *(int *)ctl->data;
2475
2476         if (write) {
2477                 struct ipv4_devconf *cnf = ctl->extra1;
2478                 struct net *net = ctl->extra2;
2479                 int i = (int *)ctl->data - cnf->data;
2480                 int ifindex;
2481
2482                 set_bit(i, cnf->state);
2483
2484                 if (cnf == net->ipv4.devconf_dflt)
2485                         devinet_copy_dflt_conf(net, i);
2486                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2487                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2488                         if ((new_value == 0) && (old_value != 0))
2489                                 rt_cache_flush(net);
2490
2491                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2492                     new_value != old_value)
2493                         rt_cache_flush(net);
2494
2495                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2496                     new_value != old_value) {
2497                         ifindex = devinet_conf_ifindex(net, cnf);
2498                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2499                                                     NETCONFA_RP_FILTER,
2500                                                     ifindex, cnf);
2501                 }
2502                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2503                     new_value != old_value) {
2504                         ifindex = devinet_conf_ifindex(net, cnf);
2505                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2506                                                     NETCONFA_PROXY_NEIGH,
2507                                                     ifindex, cnf);
2508                 }
2509                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2510                     new_value != old_value) {
2511                         ifindex = devinet_conf_ifindex(net, cnf);
2512                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2513                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2514                                                     ifindex, cnf);
2515                 }
2516         }
2517
2518         return ret;
2519 }
2520
2521 static int devinet_sysctl_forward(const struct ctl_table *ctl, int write,
2522                                   void *buffer, size_t *lenp, loff_t *ppos)
2523 {
2524         int *valp = ctl->data;
2525         int val = *valp;
2526         loff_t pos = *ppos;
2527         struct net *net = ctl->extra2;
2528         int ret;
2529
2530         if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2531                 return -EPERM;
2532
2533         ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2534
2535         if (write && *valp != val) {
2536                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2537                         if (!rtnl_net_trylock(net)) {
2538                                 /* Restore the original values before restarting */
2539                                 *valp = val;
2540                                 *ppos = pos;
2541                                 return restart_syscall();
2542                         }
2543                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2544                                 inet_forward_change(net);
2545                         } else {
2546                                 struct ipv4_devconf *cnf = ctl->extra1;
2547                                 struct in_device *idev =
2548                                         container_of(cnf, struct in_device, cnf);
2549                                 if (*valp)
2550                                         dev_disable_lro(idev->dev);
2551                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2552                                                             NETCONFA_FORWARDING,
2553                                                             idev->dev->ifindex,
2554                                                             cnf);
2555                         }
2556                         rtnl_net_unlock(net);
2557                         rt_cache_flush(net);
2558                 } else
2559                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2560                                                     NETCONFA_FORWARDING,
2561                                                     NETCONFA_IFINDEX_DEFAULT,
2562                                                     net->ipv4.devconf_dflt);
2563         }
2564
2565         return ret;
2566 }
2567
2568 static int ipv4_doint_and_flush(const struct ctl_table *ctl, int write,
2569                                 void *buffer, size_t *lenp, loff_t *ppos)
2570 {
2571         int *valp = ctl->data;
2572         int val = *valp;
2573         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2574         struct net *net = ctl->extra2;
2575
2576         if (write && *valp != val)
2577                 rt_cache_flush(net);
2578
2579         return ret;
2580 }
2581
2582 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2583         { \
2584                 .procname       = name, \
2585                 .data           = ipv4_devconf.data + \
2586                                   IPV4_DEVCONF_ ## attr - 1, \
2587                 .maxlen         = sizeof(int), \
2588                 .mode           = mval, \
2589                 .proc_handler   = proc, \
2590                 .extra1         = &ipv4_devconf, \
2591         }
2592
2593 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2594         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2595
2596 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2597         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2598
2599 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2600         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2601
2602 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2603         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2604
2605 static struct devinet_sysctl_table {
2606         struct ctl_table_header *sysctl_header;
2607         struct ctl_table devinet_vars[IPV4_DEVCONF_MAX];
2608 } devinet_sysctl = {
2609         .devinet_vars = {
2610                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2611                                              devinet_sysctl_forward),
2612                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2613                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2614
2615                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2616                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2617                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2618                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2619                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2620                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2621                                         "accept_source_route"),
2622                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2623                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2624                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2625                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2626                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2627                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2628                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2629                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2630                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2631                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2632                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2633                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2634                 DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2635                                         "arp_evict_nocarrier"),
2636                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2637                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2638                                         "force_igmp_version"),
2639                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2640                                         "igmpv2_unsolicited_report_interval"),
2641                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2642                                         "igmpv3_unsolicited_report_interval"),
2643                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2644                                         "ignore_routes_with_linkdown"),
2645                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2646                                         "drop_gratuitous_arp"),
2647
2648                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2649                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2650                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2651                                               "promote_secondaries"),
2652                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2653                                               "route_localnet"),
2654                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2655                                               "drop_unicast_in_l2_multicast"),
2656         },
2657 };
2658
2659 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2660                                      int ifindex, struct ipv4_devconf *p)
2661 {
2662         int i;
2663         struct devinet_sysctl_table *t;
2664         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2665
2666         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2667         if (!t)
2668                 goto out;
2669
2670         for (i = 0; i < ARRAY_SIZE(t->devinet_vars); i++) {
2671                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2672                 t->devinet_vars[i].extra1 = p;
2673                 t->devinet_vars[i].extra2 = net;
2674         }
2675
2676         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2677
2678         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2679         if (!t->sysctl_header)
2680                 goto free;
2681
2682         p->sysctl = t;
2683
2684         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2685                                     ifindex, p);
2686         return 0;
2687
2688 free:
2689         kfree(t);
2690 out:
2691         return -ENOMEM;
2692 }
2693
2694 static void __devinet_sysctl_unregister(struct net *net,
2695                                         struct ipv4_devconf *cnf, int ifindex)
2696 {
2697         struct devinet_sysctl_table *t = cnf->sysctl;
2698
2699         if (t) {
2700                 cnf->sysctl = NULL;
2701                 unregister_net_sysctl_table(t->sysctl_header);
2702                 kfree(t);
2703         }
2704
2705         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2706 }
2707
2708 static int devinet_sysctl_register(struct in_device *idev)
2709 {
2710         int err;
2711
2712         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2713                 return -EINVAL;
2714
2715         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2716         if (err)
2717                 return err;
2718         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2719                                         idev->dev->ifindex, &idev->cnf);
2720         if (err)
2721                 neigh_sysctl_unregister(idev->arp_parms);
2722         return err;
2723 }
2724
2725 static void devinet_sysctl_unregister(struct in_device *idev)
2726 {
2727         struct net *net = dev_net(idev->dev);
2728
2729         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2730         neigh_sysctl_unregister(idev->arp_parms);
2731 }
2732
2733 static struct ctl_table ctl_forward_entry[] = {
2734         {
2735                 .procname       = "ip_forward",
2736                 .data           = &ipv4_devconf.data[
2737                                         IPV4_DEVCONF_FORWARDING - 1],
2738                 .maxlen         = sizeof(int),
2739                 .mode           = 0644,
2740                 .proc_handler   = devinet_sysctl_forward,
2741                 .extra1         = &ipv4_devconf,
2742                 .extra2         = &init_net,
2743         },
2744 };
2745 #endif
2746
2747 static __net_init int devinet_init_net(struct net *net)
2748 {
2749 #ifdef CONFIG_SYSCTL
2750         struct ctl_table_header *forw_hdr;
2751         struct ctl_table *tbl;
2752 #endif
2753         struct ipv4_devconf *all, *dflt;
2754         int err;
2755         int i;
2756
2757         err = -ENOMEM;
2758         net->ipv4.inet_addr_lst = kmalloc_array(IN4_ADDR_HSIZE,
2759                                                 sizeof(struct hlist_head),
2760                                                 GFP_KERNEL);
2761         if (!net->ipv4.inet_addr_lst)
2762                 goto err_alloc_hash;
2763
2764         all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2765         if (!all)
2766                 goto err_alloc_all;
2767
2768         dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2769         if (!dflt)
2770                 goto err_alloc_dflt;
2771
2772 #ifdef CONFIG_SYSCTL
2773         tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2774         if (!tbl)
2775                 goto err_alloc_ctl;
2776
2777         tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2778         tbl[0].extra1 = all;
2779         tbl[0].extra2 = net;
2780 #endif
2781
2782         if (!net_eq(net, &init_net)) {
2783                 switch (net_inherit_devconf()) {
2784                 case 3:
2785                         /* copy from the current netns */
2786                         memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2787                                sizeof(ipv4_devconf));
2788                         memcpy(dflt,
2789                                current->nsproxy->net_ns->ipv4.devconf_dflt,
2790                                sizeof(ipv4_devconf_dflt));
2791                         break;
2792                 case 0:
2793                 case 1:
2794                         /* copy from init_net */
2795                         memcpy(all, init_net.ipv4.devconf_all,
2796                                sizeof(ipv4_devconf));
2797                         memcpy(dflt, init_net.ipv4.devconf_dflt,
2798                                sizeof(ipv4_devconf_dflt));
2799                         break;
2800                 case 2:
2801                         /* use compiled values */
2802                         break;
2803                 }
2804         }
2805
2806 #ifdef CONFIG_SYSCTL
2807         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2808         if (err < 0)
2809                 goto err_reg_all;
2810
2811         err = __devinet_sysctl_register(net, "default",
2812                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2813         if (err < 0)
2814                 goto err_reg_dflt;
2815
2816         err = -ENOMEM;
2817         forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2818                                           ARRAY_SIZE(ctl_forward_entry));
2819         if (!forw_hdr)
2820                 goto err_reg_ctl;
2821         net->ipv4.forw_hdr = forw_hdr;
2822 #endif
2823
2824         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2825                 INIT_HLIST_HEAD(&net->ipv4.inet_addr_lst[i]);
2826
2827         INIT_DEFERRABLE_WORK(&net->ipv4.addr_chk_work, check_lifetime);
2828
2829         net->ipv4.devconf_all = all;
2830         net->ipv4.devconf_dflt = dflt;
2831         return 0;
2832
2833 #ifdef CONFIG_SYSCTL
2834 err_reg_ctl:
2835         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2836 err_reg_dflt:
2837         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2838 err_reg_all:
2839         kfree(tbl);
2840 err_alloc_ctl:
2841 #endif
2842         kfree(dflt);
2843 err_alloc_dflt:
2844         kfree(all);
2845 err_alloc_all:
2846         kfree(net->ipv4.inet_addr_lst);
2847 err_alloc_hash:
2848         return err;
2849 }
2850
2851 static __net_exit void devinet_exit_net(struct net *net)
2852 {
2853 #ifdef CONFIG_SYSCTL
2854         const struct ctl_table *tbl;
2855 #endif
2856
2857         cancel_delayed_work_sync(&net->ipv4.addr_chk_work);
2858
2859 #ifdef CONFIG_SYSCTL
2860         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2861         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2862         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2863                                     NETCONFA_IFINDEX_DEFAULT);
2864         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2865                                     NETCONFA_IFINDEX_ALL);
2866         kfree(tbl);
2867 #endif
2868         kfree(net->ipv4.devconf_dflt);
2869         kfree(net->ipv4.devconf_all);
2870         kfree(net->ipv4.inet_addr_lst);
2871 }
2872
2873 static __net_initdata struct pernet_operations devinet_ops = {
2874         .init = devinet_init_net,
2875         .exit = devinet_exit_net,
2876 };
2877
2878 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2879         .family           = AF_INET,
2880         .fill_link_af     = inet_fill_link_af,
2881         .get_link_af_size = inet_get_link_af_size,
2882         .validate_link_af = inet_validate_link_af,
2883         .set_link_af      = inet_set_link_af,
2884 };
2885
2886 static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = {
2887         {.protocol = PF_INET, .msgtype = RTM_NEWADDR, .doit = inet_rtm_newaddr,
2888          .flags = RTNL_FLAG_DOIT_PERNET},
2889         {.protocol = PF_INET, .msgtype = RTM_DELADDR, .doit = inet_rtm_deladdr,
2890          .flags = RTNL_FLAG_DOIT_PERNET},
2891         {.protocol = PF_INET, .msgtype = RTM_GETADDR, .dumpit = inet_dump_ifaddr,
2892          .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
2893         {.protocol = PF_INET, .msgtype = RTM_GETNETCONF,
2894          .doit = inet_netconf_get_devconf, .dumpit = inet_netconf_dump_devconf,
2895          .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
2896         {.owner = THIS_MODULE, .protocol = PF_INET, .msgtype = RTM_GETMULTICAST,
2897          .dumpit = inet_dump_ifmcaddr, .flags = RTNL_FLAG_DUMP_UNLOCKED},
2898 };
2899
2900 void __init devinet_init(void)
2901 {
2902         register_pernet_subsys(&devinet_ops);
2903         register_netdevice_notifier(&ip_netdev_notifier);
2904
2905         if (rtnl_af_register(&inet_af_ops))
2906                 panic("Unable to register inet_af_ops\n");
2907
2908         rtnl_register_many(devinet_rtnl_msg_handlers);
2909 }