9cf64ee47dd2b4fddf7be62c7110a1ad41b53fe8
[linux-2.6-block.git] / net / ipv4 / devinet.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      NET3    IP device support routines.
4  *
5  *      Derived from the IP parts of dev.c 1.0.19
6  *              Authors:        Ross Biro
7  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *      Additional Authors:
11  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *      Changes:
15  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
16  *                                      lists.
17  *              Cyrus Durgin:           updated for kmod
18  *              Matthias Andree:        in devinet_ioctl, compare label and
19  *                                      address (4.4BSD alias style support),
20  *                                      fall back to comparing just the label
21  *                                      if no match found.
22  */
23
24
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64
65 #define IPV6ONLY_FLAGS  \
66                 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67                  IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68                  IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78                 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79         },
80 };
81
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83         .data = {
84                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91                 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92         },
93 };
94
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99         [IFA_LOCAL]             = { .type = NLA_U32 },
100         [IFA_ADDRESS]           = { .type = NLA_U32 },
101         [IFA_BROADCAST]         = { .type = NLA_U32 },
102         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
104         [IFA_FLAGS]             = { .type = NLA_U32 },
105         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
106         [IFA_TARGET_NETNSID]    = { .type = NLA_S32 },
107         [IFA_PROTO]             = { .type = NLA_U8 },
108 };
109
110 struct inet_fill_args {
111         u32 portid;
112         u32 seq;
113         int event;
114         unsigned int flags;
115         int netnsid;
116         int ifindex;
117 };
118
119 #define IN4_ADDR_HSIZE_SHIFT    8
120 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
121
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126         u32 val = (__force u32) addr ^ net_hash_mix(net);
127
128         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133         u32 hash = inet_addr_hash(net, ifa->ifa_local);
134
135         ASSERT_RTNL();
136         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141         ASSERT_RTNL();
142         hlist_del_init_rcu(&ifa->hash);
143 }
144
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155         struct net_device *result = NULL;
156         struct in_ifaddr *ifa;
157
158         rcu_read_lock();
159         ifa = inet_lookup_ifaddr_rcu(net, addr);
160         if (!ifa) {
161                 struct flowi4 fl4 = { .daddr = addr };
162                 struct fib_result res = { 0 };
163                 struct fib_table *local;
164
165                 /* Fallback to FIB local table so that communication
166                  * over loopback subnets work.
167                  */
168                 local = fib_get_table(net, RT_TABLE_LOCAL);
169                 if (local &&
170                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171                     res.type == RTN_LOCAL)
172                         result = FIB_RES_DEV(res);
173         } else {
174                 result = ifa->ifa_dev->dev;
175         }
176         if (result && devref)
177                 dev_hold(result);
178         rcu_read_unlock();
179         return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186         u32 hash = inet_addr_hash(net, addr);
187         struct in_ifaddr *ifa;
188
189         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190                 if (ifa->ifa_local == addr &&
191                     net_eq(dev_net(ifa->ifa_dev->dev), net))
192                         return ifa;
193
194         return NULL;
195 }
196
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202                          struct in_ifaddr __rcu **ifap,
203                          int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210         return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216
217 /* Locks all the inet devices. */
218
219 static struct in_ifaddr *inet_alloc_ifa(void)
220 {
221         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222 }
223
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227         if (ifa->ifa_dev)
228                 in_dev_put(ifa->ifa_dev);
229         kfree(ifa);
230 }
231
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235 }
236
237 static void in_dev_free_rcu(struct rcu_head *head)
238 {
239         struct in_device *idev = container_of(head, struct in_device, rcu_head);
240
241         kfree(rcu_dereference_protected(idev->mc_hash, 1));
242         kfree(idev);
243 }
244
245 void in_dev_finish_destroy(struct in_device *idev)
246 {
247         struct net_device *dev = idev->dev;
248
249         WARN_ON(idev->ifa_list);
250         WARN_ON(idev->mc_list);
251 #ifdef NET_REFCNT_DEBUG
252         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
253 #endif
254         netdev_put(dev, &idev->dev_tracker);
255         if (!idev->dead)
256                 pr_err("Freeing alive in_device %p\n", idev);
257         else
258                 call_rcu(&idev->rcu_head, in_dev_free_rcu);
259 }
260 EXPORT_SYMBOL(in_dev_finish_destroy);
261
262 static struct in_device *inetdev_init(struct net_device *dev)
263 {
264         struct in_device *in_dev;
265         int err = -ENOMEM;
266
267         ASSERT_RTNL();
268
269         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
270         if (!in_dev)
271                 goto out;
272         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
273                         sizeof(in_dev->cnf));
274         in_dev->cnf.sysctl = NULL;
275         in_dev->dev = dev;
276         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
277         if (!in_dev->arp_parms)
278                 goto out_kfree;
279         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
280                 dev_disable_lro(dev);
281         /* Reference in_dev->dev */
282         netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
283         /* Account for reference dev->ip_ptr (below) */
284         refcount_set(&in_dev->refcnt, 1);
285
286         err = devinet_sysctl_register(in_dev);
287         if (err) {
288                 in_dev->dead = 1;
289                 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
290                 in_dev_put(in_dev);
291                 in_dev = NULL;
292                 goto out;
293         }
294         ip_mc_init_dev(in_dev);
295         if (dev->flags & IFF_UP)
296                 ip_mc_up(in_dev);
297
298         /* we can receive as soon as ip_ptr is set -- do this last */
299         rcu_assign_pointer(dev->ip_ptr, in_dev);
300 out:
301         return in_dev ?: ERR_PTR(err);
302 out_kfree:
303         kfree(in_dev);
304         in_dev = NULL;
305         goto out;
306 }
307
308 static void inetdev_destroy(struct in_device *in_dev)
309 {
310         struct net_device *dev;
311         struct in_ifaddr *ifa;
312
313         ASSERT_RTNL();
314
315         dev = in_dev->dev;
316
317         in_dev->dead = 1;
318
319         ip_mc_destroy_dev(in_dev);
320
321         while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
322                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
323                 inet_free_ifa(ifa);
324         }
325
326         RCU_INIT_POINTER(dev->ip_ptr, NULL);
327
328         devinet_sysctl_unregister(in_dev);
329         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
330         arp_ifdown(dev);
331
332         in_dev_put(in_dev);
333 }
334
335 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
336 {
337         const struct in_ifaddr *ifa;
338
339         rcu_read_lock();
340         in_dev_for_each_ifa_rcu(ifa, in_dev) {
341                 if (inet_ifa_match(a, ifa)) {
342                         if (!b || inet_ifa_match(b, ifa)) {
343                                 rcu_read_unlock();
344                                 return 1;
345                         }
346                 }
347         }
348         rcu_read_unlock();
349         return 0;
350 }
351
352 static void __inet_del_ifa(struct in_device *in_dev,
353                            struct in_ifaddr __rcu **ifap,
354                            int destroy, struct nlmsghdr *nlh, u32 portid)
355 {
356         struct in_ifaddr *promote = NULL;
357         struct in_ifaddr *ifa, *ifa1;
358         struct in_ifaddr *last_prim;
359         struct in_ifaddr *prev_prom = NULL;
360         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
361
362         ASSERT_RTNL();
363
364         ifa1 = rtnl_dereference(*ifap);
365         last_prim = rtnl_dereference(in_dev->ifa_list);
366         if (in_dev->dead)
367                 goto no_promotions;
368
369         /* 1. Deleting primary ifaddr forces deletion all secondaries
370          * unless alias promotion is set
371          **/
372
373         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
374                 struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
375
376                 while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
377                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
378                             ifa1->ifa_scope <= ifa->ifa_scope)
379                                 last_prim = ifa;
380
381                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
382                             ifa1->ifa_mask != ifa->ifa_mask ||
383                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
384                                 ifap1 = &ifa->ifa_next;
385                                 prev_prom = ifa;
386                                 continue;
387                         }
388
389                         if (!do_promote) {
390                                 inet_hash_remove(ifa);
391                                 *ifap1 = ifa->ifa_next;
392
393                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
394                                 blocking_notifier_call_chain(&inetaddr_chain,
395                                                 NETDEV_DOWN, ifa);
396                                 inet_free_ifa(ifa);
397                         } else {
398                                 promote = ifa;
399                                 break;
400                         }
401                 }
402         }
403
404         /* On promotion all secondaries from subnet are changing
405          * the primary IP, we must remove all their routes silently
406          * and later to add them back with new prefsrc. Do this
407          * while all addresses are on the device list.
408          */
409         for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
410                 if (ifa1->ifa_mask == ifa->ifa_mask &&
411                     inet_ifa_match(ifa1->ifa_address, ifa))
412                         fib_del_ifaddr(ifa, ifa1);
413         }
414
415 no_promotions:
416         /* 2. Unlink it */
417
418         *ifap = ifa1->ifa_next;
419         inet_hash_remove(ifa1);
420
421         /* 3. Announce address deletion */
422
423         /* Send message first, then call notifier.
424            At first sight, FIB update triggered by notifier
425            will refer to already deleted ifaddr, that could confuse
426            netlink listeners. It is not true: look, gated sees
427            that route deleted and if it still thinks that ifaddr
428            is valid, it will try to restore deleted routes... Grr.
429            So that, this order is correct.
430          */
431         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
432         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
433
434         if (promote) {
435                 struct in_ifaddr *next_sec;
436
437                 next_sec = rtnl_dereference(promote->ifa_next);
438                 if (prev_prom) {
439                         struct in_ifaddr *last_sec;
440
441                         rcu_assign_pointer(prev_prom->ifa_next, next_sec);
442
443                         last_sec = rtnl_dereference(last_prim->ifa_next);
444                         rcu_assign_pointer(promote->ifa_next, last_sec);
445                         rcu_assign_pointer(last_prim->ifa_next, promote);
446                 }
447
448                 promote->ifa_flags &= ~IFA_F_SECONDARY;
449                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
450                 blocking_notifier_call_chain(&inetaddr_chain,
451                                 NETDEV_UP, promote);
452                 for (ifa = next_sec; ifa;
453                      ifa = rtnl_dereference(ifa->ifa_next)) {
454                         if (ifa1->ifa_mask != ifa->ifa_mask ||
455                             !inet_ifa_match(ifa1->ifa_address, ifa))
456                                         continue;
457                         fib_add_ifaddr(ifa);
458                 }
459
460         }
461         if (destroy)
462                 inet_free_ifa(ifa1);
463 }
464
465 static void inet_del_ifa(struct in_device *in_dev,
466                          struct in_ifaddr __rcu **ifap,
467                          int destroy)
468 {
469         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
470 }
471
472 static void check_lifetime(struct work_struct *work);
473
474 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
475
476 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
477                              u32 portid, struct netlink_ext_ack *extack)
478 {
479         struct in_ifaddr __rcu **last_primary, **ifap;
480         struct in_device *in_dev = ifa->ifa_dev;
481         struct in_validator_info ivi;
482         struct in_ifaddr *ifa1;
483         int ret;
484
485         ASSERT_RTNL();
486
487         if (!ifa->ifa_local) {
488                 inet_free_ifa(ifa);
489                 return 0;
490         }
491
492         ifa->ifa_flags &= ~IFA_F_SECONDARY;
493         last_primary = &in_dev->ifa_list;
494
495         /* Don't set IPv6 only flags to IPv4 addresses */
496         ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
497
498         ifap = &in_dev->ifa_list;
499         ifa1 = rtnl_dereference(*ifap);
500
501         while (ifa1) {
502                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
503                     ifa->ifa_scope <= ifa1->ifa_scope)
504                         last_primary = &ifa1->ifa_next;
505                 if (ifa1->ifa_mask == ifa->ifa_mask &&
506                     inet_ifa_match(ifa1->ifa_address, ifa)) {
507                         if (ifa1->ifa_local == ifa->ifa_local) {
508                                 inet_free_ifa(ifa);
509                                 return -EEXIST;
510                         }
511                         if (ifa1->ifa_scope != ifa->ifa_scope) {
512                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
513                                 inet_free_ifa(ifa);
514                                 return -EINVAL;
515                         }
516                         ifa->ifa_flags |= IFA_F_SECONDARY;
517                 }
518
519                 ifap = &ifa1->ifa_next;
520                 ifa1 = rtnl_dereference(*ifap);
521         }
522
523         /* Allow any devices that wish to register ifaddr validtors to weigh
524          * in now, before changes are committed.  The rntl lock is serializing
525          * access here, so the state should not change between a validator call
526          * and a final notify on commit.  This isn't invoked on promotion under
527          * the assumption that validators are checking the address itself, and
528          * not the flags.
529          */
530         ivi.ivi_addr = ifa->ifa_address;
531         ivi.ivi_dev = ifa->ifa_dev;
532         ivi.extack = extack;
533         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
534                                            NETDEV_UP, &ivi);
535         ret = notifier_to_errno(ret);
536         if (ret) {
537                 inet_free_ifa(ifa);
538                 return ret;
539         }
540
541         if (!(ifa->ifa_flags & IFA_F_SECONDARY))
542                 ifap = last_primary;
543
544         rcu_assign_pointer(ifa->ifa_next, *ifap);
545         rcu_assign_pointer(*ifap, ifa);
546
547         inet_hash_insert(dev_net(in_dev->dev), ifa);
548
549         cancel_delayed_work(&check_lifetime_work);
550         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
551
552         /* Send message first, then call notifier.
553            Notifier will trigger FIB update, so that
554            listeners of netlink will know about new ifaddr */
555         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
556         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
557
558         return 0;
559 }
560
561 static int inet_insert_ifa(struct in_ifaddr *ifa)
562 {
563         return __inet_insert_ifa(ifa, NULL, 0, NULL);
564 }
565
566 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
567 {
568         struct in_device *in_dev = __in_dev_get_rtnl(dev);
569
570         ASSERT_RTNL();
571
572         if (!in_dev) {
573                 inet_free_ifa(ifa);
574                 return -ENOBUFS;
575         }
576         ipv4_devconf_setall(in_dev);
577         neigh_parms_data_state_setall(in_dev->arp_parms);
578         if (ifa->ifa_dev != in_dev) {
579                 WARN_ON(ifa->ifa_dev);
580                 in_dev_hold(in_dev);
581                 ifa->ifa_dev = in_dev;
582         }
583         if (ipv4_is_loopback(ifa->ifa_local))
584                 ifa->ifa_scope = RT_SCOPE_HOST;
585         return inet_insert_ifa(ifa);
586 }
587
588 /* Caller must hold RCU or RTNL :
589  * We dont take a reference on found in_device
590  */
591 struct in_device *inetdev_by_index(struct net *net, int ifindex)
592 {
593         struct net_device *dev;
594         struct in_device *in_dev = NULL;
595
596         rcu_read_lock();
597         dev = dev_get_by_index_rcu(net, ifindex);
598         if (dev)
599                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
600         rcu_read_unlock();
601         return in_dev;
602 }
603 EXPORT_SYMBOL(inetdev_by_index);
604
605 /* Called only from RTNL semaphored context. No locks. */
606
607 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
608                                     __be32 mask)
609 {
610         struct in_ifaddr *ifa;
611
612         ASSERT_RTNL();
613
614         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
615                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
616                         return ifa;
617         }
618         return NULL;
619 }
620
621 static int ip_mc_autojoin_config(struct net *net, bool join,
622                                  const struct in_ifaddr *ifa)
623 {
624 #if defined(CONFIG_IP_MULTICAST)
625         struct ip_mreqn mreq = {
626                 .imr_multiaddr.s_addr = ifa->ifa_address,
627                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
628         };
629         struct sock *sk = net->ipv4.mc_autojoin_sk;
630         int ret;
631
632         ASSERT_RTNL();
633
634         lock_sock(sk);
635         if (join)
636                 ret = ip_mc_join_group(sk, &mreq);
637         else
638                 ret = ip_mc_leave_group(sk, &mreq);
639         release_sock(sk);
640
641         return ret;
642 #else
643         return -EOPNOTSUPP;
644 #endif
645 }
646
647 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
648                             struct netlink_ext_ack *extack)
649 {
650         struct net *net = sock_net(skb->sk);
651         struct in_ifaddr __rcu **ifap;
652         struct nlattr *tb[IFA_MAX+1];
653         struct in_device *in_dev;
654         struct ifaddrmsg *ifm;
655         struct in_ifaddr *ifa;
656         int err;
657
658         ASSERT_RTNL();
659
660         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
661                                      ifa_ipv4_policy, extack);
662         if (err < 0)
663                 goto errout;
664
665         ifm = nlmsg_data(nlh);
666         in_dev = inetdev_by_index(net, ifm->ifa_index);
667         if (!in_dev) {
668                 NL_SET_ERR_MSG(extack, "ipv4: Device not found");
669                 err = -ENODEV;
670                 goto errout;
671         }
672
673         for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
674              ifap = &ifa->ifa_next) {
675                 if (tb[IFA_LOCAL] &&
676                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
677                         continue;
678
679                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
680                         continue;
681
682                 if (tb[IFA_ADDRESS] &&
683                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
684                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
685                         continue;
686
687                 if (ipv4_is_multicast(ifa->ifa_address))
688                         ip_mc_autojoin_config(net, false, ifa);
689                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
690                 return 0;
691         }
692
693         NL_SET_ERR_MSG(extack, "ipv4: Address not found");
694         err = -EADDRNOTAVAIL;
695 errout:
696         return err;
697 }
698
699 #define INFINITY_LIFE_TIME      0xFFFFFFFF
700
701 static void check_lifetime(struct work_struct *work)
702 {
703         unsigned long now, next, next_sec, next_sched;
704         struct in_ifaddr *ifa;
705         struct hlist_node *n;
706         int i;
707
708         now = jiffies;
709         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
710
711         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
712                 bool change_needed = false;
713
714                 rcu_read_lock();
715                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
716                         unsigned long age;
717
718                         if (ifa->ifa_flags & IFA_F_PERMANENT)
719                                 continue;
720
721                         /* We try to batch several events at once. */
722                         age = (now - ifa->ifa_tstamp +
723                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
724
725                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
726                             age >= ifa->ifa_valid_lft) {
727                                 change_needed = true;
728                         } else if (ifa->ifa_preferred_lft ==
729                                    INFINITY_LIFE_TIME) {
730                                 continue;
731                         } else if (age >= ifa->ifa_preferred_lft) {
732                                 if (time_before(ifa->ifa_tstamp +
733                                                 ifa->ifa_valid_lft * HZ, next))
734                                         next = ifa->ifa_tstamp +
735                                                ifa->ifa_valid_lft * HZ;
736
737                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
738                                         change_needed = true;
739                         } else if (time_before(ifa->ifa_tstamp +
740                                                ifa->ifa_preferred_lft * HZ,
741                                                next)) {
742                                 next = ifa->ifa_tstamp +
743                                        ifa->ifa_preferred_lft * HZ;
744                         }
745                 }
746                 rcu_read_unlock();
747                 if (!change_needed)
748                         continue;
749                 rtnl_lock();
750                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
751                         unsigned long age;
752
753                         if (ifa->ifa_flags & IFA_F_PERMANENT)
754                                 continue;
755
756                         /* We try to batch several events at once. */
757                         age = (now - ifa->ifa_tstamp +
758                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
759
760                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
761                             age >= ifa->ifa_valid_lft) {
762                                 struct in_ifaddr __rcu **ifap;
763                                 struct in_ifaddr *tmp;
764
765                                 ifap = &ifa->ifa_dev->ifa_list;
766                                 tmp = rtnl_dereference(*ifap);
767                                 while (tmp) {
768                                         if (tmp == ifa) {
769                                                 inet_del_ifa(ifa->ifa_dev,
770                                                              ifap, 1);
771                                                 break;
772                                         }
773                                         ifap = &tmp->ifa_next;
774                                         tmp = rtnl_dereference(*ifap);
775                                 }
776                         } else if (ifa->ifa_preferred_lft !=
777                                    INFINITY_LIFE_TIME &&
778                                    age >= ifa->ifa_preferred_lft &&
779                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
780                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
781                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
782                         }
783                 }
784                 rtnl_unlock();
785         }
786
787         next_sec = round_jiffies_up(next);
788         next_sched = next;
789
790         /* If rounded timeout is accurate enough, accept it. */
791         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
792                 next_sched = next_sec;
793
794         now = jiffies;
795         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
796         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
797                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
798
799         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
800                         next_sched - now);
801 }
802
803 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
804                              __u32 prefered_lft)
805 {
806         unsigned long timeout;
807
808         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
809
810         timeout = addrconf_timeout_fixup(valid_lft, HZ);
811         if (addrconf_finite_timeout(timeout))
812                 ifa->ifa_valid_lft = timeout;
813         else
814                 ifa->ifa_flags |= IFA_F_PERMANENT;
815
816         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
817         if (addrconf_finite_timeout(timeout)) {
818                 if (timeout == 0)
819                         ifa->ifa_flags |= IFA_F_DEPRECATED;
820                 ifa->ifa_preferred_lft = timeout;
821         }
822         ifa->ifa_tstamp = jiffies;
823         if (!ifa->ifa_cstamp)
824                 ifa->ifa_cstamp = ifa->ifa_tstamp;
825 }
826
827 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
828                                        __u32 *pvalid_lft, __u32 *pprefered_lft,
829                                        struct netlink_ext_ack *extack)
830 {
831         struct nlattr *tb[IFA_MAX+1];
832         struct in_ifaddr *ifa;
833         struct ifaddrmsg *ifm;
834         struct net_device *dev;
835         struct in_device *in_dev;
836         int err;
837
838         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
839                                      ifa_ipv4_policy, extack);
840         if (err < 0)
841                 goto errout;
842
843         ifm = nlmsg_data(nlh);
844         err = -EINVAL;
845
846         if (ifm->ifa_prefixlen > 32) {
847                 NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
848                 goto errout;
849         }
850
851         if (!tb[IFA_LOCAL]) {
852                 NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
853                 goto errout;
854         }
855
856         dev = __dev_get_by_index(net, ifm->ifa_index);
857         err = -ENODEV;
858         if (!dev) {
859                 NL_SET_ERR_MSG(extack, "ipv4: Device not found");
860                 goto errout;
861         }
862
863         in_dev = __in_dev_get_rtnl(dev);
864         err = -ENOBUFS;
865         if (!in_dev)
866                 goto errout;
867
868         ifa = inet_alloc_ifa();
869         if (!ifa)
870                 /*
871                  * A potential indev allocation can be left alive, it stays
872                  * assigned to its device and is destroy with it.
873                  */
874                 goto errout;
875
876         ipv4_devconf_setall(in_dev);
877         neigh_parms_data_state_setall(in_dev->arp_parms);
878         in_dev_hold(in_dev);
879
880         if (!tb[IFA_ADDRESS])
881                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
882
883         INIT_HLIST_NODE(&ifa->hash);
884         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
885         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
886         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
887                                          ifm->ifa_flags;
888         ifa->ifa_scope = ifm->ifa_scope;
889         ifa->ifa_dev = in_dev;
890
891         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
892         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
893
894         if (tb[IFA_BROADCAST])
895                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
896
897         if (tb[IFA_LABEL])
898                 nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
899         else
900                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
901
902         if (tb[IFA_RT_PRIORITY])
903                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
904
905         if (tb[IFA_PROTO])
906                 ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
907
908         if (tb[IFA_CACHEINFO]) {
909                 struct ifa_cacheinfo *ci;
910
911                 ci = nla_data(tb[IFA_CACHEINFO]);
912                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
913                         NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
914                         err = -EINVAL;
915                         goto errout_free;
916                 }
917                 *pvalid_lft = ci->ifa_valid;
918                 *pprefered_lft = ci->ifa_prefered;
919         }
920
921         return ifa;
922
923 errout_free:
924         inet_free_ifa(ifa);
925 errout:
926         return ERR_PTR(err);
927 }
928
929 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
930 {
931         struct in_device *in_dev = ifa->ifa_dev;
932         struct in_ifaddr *ifa1;
933
934         if (!ifa->ifa_local)
935                 return NULL;
936
937         in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
938                 if (ifa1->ifa_mask == ifa->ifa_mask &&
939                     inet_ifa_match(ifa1->ifa_address, ifa) &&
940                     ifa1->ifa_local == ifa->ifa_local)
941                         return ifa1;
942         }
943         return NULL;
944 }
945
946 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
947                             struct netlink_ext_ack *extack)
948 {
949         struct net *net = sock_net(skb->sk);
950         struct in_ifaddr *ifa;
951         struct in_ifaddr *ifa_existing;
952         __u32 valid_lft = INFINITY_LIFE_TIME;
953         __u32 prefered_lft = INFINITY_LIFE_TIME;
954
955         ASSERT_RTNL();
956
957         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
958         if (IS_ERR(ifa))
959                 return PTR_ERR(ifa);
960
961         ifa_existing = find_matching_ifa(ifa);
962         if (!ifa_existing) {
963                 /* It would be best to check for !NLM_F_CREATE here but
964                  * userspace already relies on not having to provide this.
965                  */
966                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
967                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
968                         int ret = ip_mc_autojoin_config(net, true, ifa);
969
970                         if (ret < 0) {
971                                 NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
972                                 inet_free_ifa(ifa);
973                                 return ret;
974                         }
975                 }
976                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
977                                          extack);
978         } else {
979                 u32 new_metric = ifa->ifa_rt_priority;
980                 u8 new_proto = ifa->ifa_proto;
981
982                 inet_free_ifa(ifa);
983
984                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
985                     !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
986                         NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
987                         return -EEXIST;
988                 }
989                 ifa = ifa_existing;
990
991                 if (ifa->ifa_rt_priority != new_metric) {
992                         fib_modify_prefix_metric(ifa, new_metric);
993                         ifa->ifa_rt_priority = new_metric;
994                 }
995
996                 ifa->ifa_proto = new_proto;
997
998                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
999                 cancel_delayed_work(&check_lifetime_work);
1000                 queue_delayed_work(system_power_efficient_wq,
1001                                 &check_lifetime_work, 0);
1002                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1003         }
1004         return 0;
1005 }
1006
1007 /*
1008  *      Determine a default network mask, based on the IP address.
1009  */
1010
1011 static int inet_abc_len(__be32 addr)
1012 {
1013         int rc = -1;    /* Something else, probably a multicast. */
1014
1015         if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1016                 rc = 0;
1017         else {
1018                 __u32 haddr = ntohl(addr);
1019                 if (IN_CLASSA(haddr))
1020                         rc = 8;
1021                 else if (IN_CLASSB(haddr))
1022                         rc = 16;
1023                 else if (IN_CLASSC(haddr))
1024                         rc = 24;
1025                 else if (IN_CLASSE(haddr))
1026                         rc = 32;
1027         }
1028
1029         return rc;
1030 }
1031
1032
1033 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1034 {
1035         struct sockaddr_in sin_orig;
1036         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1037         struct in_ifaddr __rcu **ifap = NULL;
1038         struct in_device *in_dev;
1039         struct in_ifaddr *ifa = NULL;
1040         struct net_device *dev;
1041         char *colon;
1042         int ret = -EFAULT;
1043         int tryaddrmatch = 0;
1044
1045         ifr->ifr_name[IFNAMSIZ - 1] = 0;
1046
1047         /* save original address for comparison */
1048         memcpy(&sin_orig, sin, sizeof(*sin));
1049
1050         colon = strchr(ifr->ifr_name, ':');
1051         if (colon)
1052                 *colon = 0;
1053
1054         dev_load(net, ifr->ifr_name);
1055
1056         switch (cmd) {
1057         case SIOCGIFADDR:       /* Get interface address */
1058         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1059         case SIOCGIFDSTADDR:    /* Get the destination address */
1060         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1061                 /* Note that these ioctls will not sleep,
1062                    so that we do not impose a lock.
1063                    One day we will be forced to put shlock here (I mean SMP)
1064                  */
1065                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1066                 memset(sin, 0, sizeof(*sin));
1067                 sin->sin_family = AF_INET;
1068                 break;
1069
1070         case SIOCSIFFLAGS:
1071                 ret = -EPERM;
1072                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1073                         goto out;
1074                 break;
1075         case SIOCSIFADDR:       /* Set interface address (and family) */
1076         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1077         case SIOCSIFDSTADDR:    /* Set the destination address */
1078         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1079                 ret = -EPERM;
1080                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1081                         goto out;
1082                 ret = -EINVAL;
1083                 if (sin->sin_family != AF_INET)
1084                         goto out;
1085                 break;
1086         default:
1087                 ret = -EINVAL;
1088                 goto out;
1089         }
1090
1091         rtnl_lock();
1092
1093         ret = -ENODEV;
1094         dev = __dev_get_by_name(net, ifr->ifr_name);
1095         if (!dev)
1096                 goto done;
1097
1098         if (colon)
1099                 *colon = ':';
1100
1101         in_dev = __in_dev_get_rtnl(dev);
1102         if (in_dev) {
1103                 if (tryaddrmatch) {
1104                         /* Matthias Andree */
1105                         /* compare label and address (4.4BSD style) */
1106                         /* note: we only do this for a limited set of ioctls
1107                            and only if the original address family was AF_INET.
1108                            This is checked above. */
1109
1110                         for (ifap = &in_dev->ifa_list;
1111                              (ifa = rtnl_dereference(*ifap)) != NULL;
1112                              ifap = &ifa->ifa_next) {
1113                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1114                                     sin_orig.sin_addr.s_addr ==
1115                                                         ifa->ifa_local) {
1116                                         break; /* found */
1117                                 }
1118                         }
1119                 }
1120                 /* we didn't get a match, maybe the application is
1121                    4.3BSD-style and passed in junk so we fall back to
1122                    comparing just the label */
1123                 if (!ifa) {
1124                         for (ifap = &in_dev->ifa_list;
1125                              (ifa = rtnl_dereference(*ifap)) != NULL;
1126                              ifap = &ifa->ifa_next)
1127                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1128                                         break;
1129                 }
1130         }
1131
1132         ret = -EADDRNOTAVAIL;
1133         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1134                 goto done;
1135
1136         switch (cmd) {
1137         case SIOCGIFADDR:       /* Get interface address */
1138                 ret = 0;
1139                 sin->sin_addr.s_addr = ifa->ifa_local;
1140                 break;
1141
1142         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1143                 ret = 0;
1144                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1145                 break;
1146
1147         case SIOCGIFDSTADDR:    /* Get the destination address */
1148                 ret = 0;
1149                 sin->sin_addr.s_addr = ifa->ifa_address;
1150                 break;
1151
1152         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1153                 ret = 0;
1154                 sin->sin_addr.s_addr = ifa->ifa_mask;
1155                 break;
1156
1157         case SIOCSIFFLAGS:
1158                 if (colon) {
1159                         ret = -EADDRNOTAVAIL;
1160                         if (!ifa)
1161                                 break;
1162                         ret = 0;
1163                         if (!(ifr->ifr_flags & IFF_UP))
1164                                 inet_del_ifa(in_dev, ifap, 1);
1165                         break;
1166                 }
1167                 ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1168                 break;
1169
1170         case SIOCSIFADDR:       /* Set interface address (and family) */
1171                 ret = -EINVAL;
1172                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1173                         break;
1174
1175                 if (!ifa) {
1176                         ret = -ENOBUFS;
1177                         ifa = inet_alloc_ifa();
1178                         if (!ifa)
1179                                 break;
1180                         INIT_HLIST_NODE(&ifa->hash);
1181                         if (colon)
1182                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1183                         else
1184                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1185                 } else {
1186                         ret = 0;
1187                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1188                                 break;
1189                         inet_del_ifa(in_dev, ifap, 0);
1190                         ifa->ifa_broadcast = 0;
1191                         ifa->ifa_scope = 0;
1192                 }
1193
1194                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1195
1196                 if (!(dev->flags & IFF_POINTOPOINT)) {
1197                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1198                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1199                         if ((dev->flags & IFF_BROADCAST) &&
1200                             ifa->ifa_prefixlen < 31)
1201                                 ifa->ifa_broadcast = ifa->ifa_address |
1202                                                      ~ifa->ifa_mask;
1203                 } else {
1204                         ifa->ifa_prefixlen = 32;
1205                         ifa->ifa_mask = inet_make_mask(32);
1206                 }
1207                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1208                 ret = inet_set_ifa(dev, ifa);
1209                 break;
1210
1211         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1212                 ret = 0;
1213                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1214                         inet_del_ifa(in_dev, ifap, 0);
1215                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1216                         inet_insert_ifa(ifa);
1217                 }
1218                 break;
1219
1220         case SIOCSIFDSTADDR:    /* Set the destination address */
1221                 ret = 0;
1222                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1223                         break;
1224                 ret = -EINVAL;
1225                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1226                         break;
1227                 ret = 0;
1228                 inet_del_ifa(in_dev, ifap, 0);
1229                 ifa->ifa_address = sin->sin_addr.s_addr;
1230                 inet_insert_ifa(ifa);
1231                 break;
1232
1233         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1234
1235                 /*
1236                  *      The mask we set must be legal.
1237                  */
1238                 ret = -EINVAL;
1239                 if (bad_mask(sin->sin_addr.s_addr, 0))
1240                         break;
1241                 ret = 0;
1242                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1243                         __be32 old_mask = ifa->ifa_mask;
1244                         inet_del_ifa(in_dev, ifap, 0);
1245                         ifa->ifa_mask = sin->sin_addr.s_addr;
1246                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1247
1248                         /* See if current broadcast address matches
1249                          * with current netmask, then recalculate
1250                          * the broadcast address. Otherwise it's a
1251                          * funny address, so don't touch it since
1252                          * the user seems to know what (s)he's doing...
1253                          */
1254                         if ((dev->flags & IFF_BROADCAST) &&
1255                             (ifa->ifa_prefixlen < 31) &&
1256                             (ifa->ifa_broadcast ==
1257                              (ifa->ifa_local|~old_mask))) {
1258                                 ifa->ifa_broadcast = (ifa->ifa_local |
1259                                                       ~sin->sin_addr.s_addr);
1260                         }
1261                         inet_insert_ifa(ifa);
1262                 }
1263                 break;
1264         }
1265 done:
1266         rtnl_unlock();
1267 out:
1268         return ret;
1269 }
1270
1271 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1272 {
1273         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1274         const struct in_ifaddr *ifa;
1275         struct ifreq ifr;
1276         int done = 0;
1277
1278         if (WARN_ON(size > sizeof(struct ifreq)))
1279                 goto out;
1280
1281         if (!in_dev)
1282                 goto out;
1283
1284         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1285                 if (!buf) {
1286                         done += size;
1287                         continue;
1288                 }
1289                 if (len < size)
1290                         break;
1291                 memset(&ifr, 0, sizeof(struct ifreq));
1292                 strcpy(ifr.ifr_name, ifa->ifa_label);
1293
1294                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1295                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1296                                                                 ifa->ifa_local;
1297
1298                 if (copy_to_user(buf + done, &ifr, size)) {
1299                         done = -EFAULT;
1300                         break;
1301                 }
1302                 len  -= size;
1303                 done += size;
1304         }
1305 out:
1306         return done;
1307 }
1308
1309 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1310                                  int scope)
1311 {
1312         const struct in_ifaddr *ifa;
1313
1314         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1315                 if (ifa->ifa_flags & IFA_F_SECONDARY)
1316                         continue;
1317                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1318                     ifa->ifa_scope <= scope)
1319                         return ifa->ifa_local;
1320         }
1321
1322         return 0;
1323 }
1324
1325 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1326 {
1327         const struct in_ifaddr *ifa;
1328         __be32 addr = 0;
1329         unsigned char localnet_scope = RT_SCOPE_HOST;
1330         struct in_device *in_dev;
1331         struct net *net = dev_net(dev);
1332         int master_idx;
1333
1334         rcu_read_lock();
1335         in_dev = __in_dev_get_rcu(dev);
1336         if (!in_dev)
1337                 goto no_in_dev;
1338
1339         if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1340                 localnet_scope = RT_SCOPE_LINK;
1341
1342         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1343                 if (ifa->ifa_flags & IFA_F_SECONDARY)
1344                         continue;
1345                 if (min(ifa->ifa_scope, localnet_scope) > scope)
1346                         continue;
1347                 if (!dst || inet_ifa_match(dst, ifa)) {
1348                         addr = ifa->ifa_local;
1349                         break;
1350                 }
1351                 if (!addr)
1352                         addr = ifa->ifa_local;
1353         }
1354
1355         if (addr)
1356                 goto out_unlock;
1357 no_in_dev:
1358         master_idx = l3mdev_master_ifindex_rcu(dev);
1359
1360         /* For VRFs, the VRF device takes the place of the loopback device,
1361          * with addresses on it being preferred.  Note in such cases the
1362          * loopback device will be among the devices that fail the master_idx
1363          * equality check in the loop below.
1364          */
1365         if (master_idx &&
1366             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1367             (in_dev = __in_dev_get_rcu(dev))) {
1368                 addr = in_dev_select_addr(in_dev, scope);
1369                 if (addr)
1370                         goto out_unlock;
1371         }
1372
1373         /* Not loopback addresses on loopback should be preferred
1374            in this case. It is important that lo is the first interface
1375            in dev_base list.
1376          */
1377         for_each_netdev_rcu(net, dev) {
1378                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1379                         continue;
1380
1381                 in_dev = __in_dev_get_rcu(dev);
1382                 if (!in_dev)
1383                         continue;
1384
1385                 addr = in_dev_select_addr(in_dev, scope);
1386                 if (addr)
1387                         goto out_unlock;
1388         }
1389 out_unlock:
1390         rcu_read_unlock();
1391         return addr;
1392 }
1393 EXPORT_SYMBOL(inet_select_addr);
1394
1395 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1396                               __be32 local, int scope)
1397 {
1398         unsigned char localnet_scope = RT_SCOPE_HOST;
1399         const struct in_ifaddr *ifa;
1400         __be32 addr = 0;
1401         int same = 0;
1402
1403         if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1404                 localnet_scope = RT_SCOPE_LINK;
1405
1406         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1407                 unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1408
1409                 if (!addr &&
1410                     (local == ifa->ifa_local || !local) &&
1411                     min_scope <= scope) {
1412                         addr = ifa->ifa_local;
1413                         if (same)
1414                                 break;
1415                 }
1416                 if (!same) {
1417                         same = (!local || inet_ifa_match(local, ifa)) &&
1418                                 (!dst || inet_ifa_match(dst, ifa));
1419                         if (same && addr) {
1420                                 if (local || !dst)
1421                                         break;
1422                                 /* Is the selected addr into dst subnet? */
1423                                 if (inet_ifa_match(addr, ifa))
1424                                         break;
1425                                 /* No, then can we use new local src? */
1426                                 if (min_scope <= scope) {
1427                                         addr = ifa->ifa_local;
1428                                         break;
1429                                 }
1430                                 /* search for large dst subnet for addr */
1431                                 same = 0;
1432                         }
1433                 }
1434         }
1435
1436         return same ? addr : 0;
1437 }
1438
1439 /*
1440  * Confirm that local IP address exists using wildcards:
1441  * - net: netns to check, cannot be NULL
1442  * - in_dev: only on this interface, NULL=any interface
1443  * - dst: only in the same subnet as dst, 0=any dst
1444  * - local: address, 0=autoselect the local address
1445  * - scope: maximum allowed scope value for the local address
1446  */
1447 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1448                          __be32 dst, __be32 local, int scope)
1449 {
1450         __be32 addr = 0;
1451         struct net_device *dev;
1452
1453         if (in_dev)
1454                 return confirm_addr_indev(in_dev, dst, local, scope);
1455
1456         rcu_read_lock();
1457         for_each_netdev_rcu(net, dev) {
1458                 in_dev = __in_dev_get_rcu(dev);
1459                 if (in_dev) {
1460                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1461                         if (addr)
1462                                 break;
1463                 }
1464         }
1465         rcu_read_unlock();
1466
1467         return addr;
1468 }
1469 EXPORT_SYMBOL(inet_confirm_addr);
1470
1471 /*
1472  *      Device notifier
1473  */
1474
1475 int register_inetaddr_notifier(struct notifier_block *nb)
1476 {
1477         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1478 }
1479 EXPORT_SYMBOL(register_inetaddr_notifier);
1480
1481 int unregister_inetaddr_notifier(struct notifier_block *nb)
1482 {
1483         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1484 }
1485 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1486
1487 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1488 {
1489         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1490 }
1491 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1492
1493 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1494 {
1495         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1496             nb);
1497 }
1498 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1499
1500 /* Rename ifa_labels for a device name change. Make some effort to preserve
1501  * existing alias numbering and to create unique labels if possible.
1502 */
1503 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1504 {
1505         struct in_ifaddr *ifa;
1506         int named = 0;
1507
1508         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1509                 char old[IFNAMSIZ], *dot;
1510
1511                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1512                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1513                 if (named++ == 0)
1514                         goto skip;
1515                 dot = strchr(old, ':');
1516                 if (!dot) {
1517                         sprintf(old, ":%d", named);
1518                         dot = old;
1519                 }
1520                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1521                         strcat(ifa->ifa_label, dot);
1522                 else
1523                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1524 skip:
1525                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1526         }
1527 }
1528
1529 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1530                                         struct in_device *in_dev)
1531
1532 {
1533         const struct in_ifaddr *ifa;
1534
1535         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1536                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1537                          ifa->ifa_local, dev,
1538                          ifa->ifa_local, NULL,
1539                          dev->dev_addr, NULL);
1540         }
1541 }
1542
1543 /* Called only under RTNL semaphore */
1544
1545 static int inetdev_event(struct notifier_block *this, unsigned long event,
1546                          void *ptr)
1547 {
1548         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1549         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1550
1551         ASSERT_RTNL();
1552
1553         if (!in_dev) {
1554                 if (event == NETDEV_REGISTER) {
1555                         in_dev = inetdev_init(dev);
1556                         if (IS_ERR(in_dev))
1557                                 return notifier_from_errno(PTR_ERR(in_dev));
1558                         if (dev->flags & IFF_LOOPBACK) {
1559                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1560                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1561                         }
1562                 } else if (event == NETDEV_CHANGEMTU) {
1563                         /* Re-enabling IP */
1564                         if (inetdev_valid_mtu(dev->mtu))
1565                                 in_dev = inetdev_init(dev);
1566                 }
1567                 goto out;
1568         }
1569
1570         switch (event) {
1571         case NETDEV_REGISTER:
1572                 pr_debug("%s: bug\n", __func__);
1573                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1574                 break;
1575         case NETDEV_UP:
1576                 if (!inetdev_valid_mtu(dev->mtu))
1577                         break;
1578                 if (dev->flags & IFF_LOOPBACK) {
1579                         struct in_ifaddr *ifa = inet_alloc_ifa();
1580
1581                         if (ifa) {
1582                                 INIT_HLIST_NODE(&ifa->hash);
1583                                 ifa->ifa_local =
1584                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1585                                 ifa->ifa_prefixlen = 8;
1586                                 ifa->ifa_mask = inet_make_mask(8);
1587                                 in_dev_hold(in_dev);
1588                                 ifa->ifa_dev = in_dev;
1589                                 ifa->ifa_scope = RT_SCOPE_HOST;
1590                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1591                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1592                                                  INFINITY_LIFE_TIME);
1593                                 ipv4_devconf_setall(in_dev);
1594                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1595                                 inet_insert_ifa(ifa);
1596                         }
1597                 }
1598                 ip_mc_up(in_dev);
1599                 fallthrough;
1600         case NETDEV_CHANGEADDR:
1601                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1602                         break;
1603                 fallthrough;
1604         case NETDEV_NOTIFY_PEERS:
1605                 /* Send gratuitous ARP to notify of link change */
1606                 inetdev_send_gratuitous_arp(dev, in_dev);
1607                 break;
1608         case NETDEV_DOWN:
1609                 ip_mc_down(in_dev);
1610                 break;
1611         case NETDEV_PRE_TYPE_CHANGE:
1612                 ip_mc_unmap(in_dev);
1613                 break;
1614         case NETDEV_POST_TYPE_CHANGE:
1615                 ip_mc_remap(in_dev);
1616                 break;
1617         case NETDEV_CHANGEMTU:
1618                 if (inetdev_valid_mtu(dev->mtu))
1619                         break;
1620                 /* disable IP when MTU is not enough */
1621                 fallthrough;
1622         case NETDEV_UNREGISTER:
1623                 inetdev_destroy(in_dev);
1624                 break;
1625         case NETDEV_CHANGENAME:
1626                 /* Do not notify about label change, this event is
1627                  * not interesting to applications using netlink.
1628                  */
1629                 inetdev_changename(dev, in_dev);
1630
1631                 devinet_sysctl_unregister(in_dev);
1632                 devinet_sysctl_register(in_dev);
1633                 break;
1634         }
1635 out:
1636         return NOTIFY_DONE;
1637 }
1638
1639 static struct notifier_block ip_netdev_notifier = {
1640         .notifier_call = inetdev_event,
1641 };
1642
1643 static size_t inet_nlmsg_size(void)
1644 {
1645         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1646                + nla_total_size(4) /* IFA_ADDRESS */
1647                + nla_total_size(4) /* IFA_LOCAL */
1648                + nla_total_size(4) /* IFA_BROADCAST */
1649                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1650                + nla_total_size(4)  /* IFA_FLAGS */
1651                + nla_total_size(1)  /* IFA_PROTO */
1652                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1653                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1654 }
1655
1656 static inline u32 cstamp_delta(unsigned long cstamp)
1657 {
1658         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1659 }
1660
1661 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1662                          unsigned long tstamp, u32 preferred, u32 valid)
1663 {
1664         struct ifa_cacheinfo ci;
1665
1666         ci.cstamp = cstamp_delta(cstamp);
1667         ci.tstamp = cstamp_delta(tstamp);
1668         ci.ifa_prefered = preferred;
1669         ci.ifa_valid = valid;
1670
1671         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1672 }
1673
1674 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1675                             struct inet_fill_args *args)
1676 {
1677         struct ifaddrmsg *ifm;
1678         struct nlmsghdr  *nlh;
1679         u32 preferred, valid;
1680
1681         nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1682                         args->flags);
1683         if (!nlh)
1684                 return -EMSGSIZE;
1685
1686         ifm = nlmsg_data(nlh);
1687         ifm->ifa_family = AF_INET;
1688         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1689         ifm->ifa_flags = ifa->ifa_flags;
1690         ifm->ifa_scope = ifa->ifa_scope;
1691         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1692
1693         if (args->netnsid >= 0 &&
1694             nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1695                 goto nla_put_failure;
1696
1697         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1698                 preferred = ifa->ifa_preferred_lft;
1699                 valid = ifa->ifa_valid_lft;
1700                 if (preferred != INFINITY_LIFE_TIME) {
1701                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1702
1703                         if (preferred > tval)
1704                                 preferred -= tval;
1705                         else
1706                                 preferred = 0;
1707                         if (valid != INFINITY_LIFE_TIME) {
1708                                 if (valid > tval)
1709                                         valid -= tval;
1710                                 else
1711                                         valid = 0;
1712                         }
1713                 }
1714         } else {
1715                 preferred = INFINITY_LIFE_TIME;
1716                 valid = INFINITY_LIFE_TIME;
1717         }
1718         if ((ifa->ifa_address &&
1719              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1720             (ifa->ifa_local &&
1721              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1722             (ifa->ifa_broadcast &&
1723              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1724             (ifa->ifa_label[0] &&
1725              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1726             (ifa->ifa_proto &&
1727              nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1728             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1729             (ifa->ifa_rt_priority &&
1730              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1731             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1732                           preferred, valid))
1733                 goto nla_put_failure;
1734
1735         nlmsg_end(skb, nlh);
1736         return 0;
1737
1738 nla_put_failure:
1739         nlmsg_cancel(skb, nlh);
1740         return -EMSGSIZE;
1741 }
1742
1743 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1744                                       struct inet_fill_args *fillargs,
1745                                       struct net **tgt_net, struct sock *sk,
1746                                       struct netlink_callback *cb)
1747 {
1748         struct netlink_ext_ack *extack = cb->extack;
1749         struct nlattr *tb[IFA_MAX+1];
1750         struct ifaddrmsg *ifm;
1751         int err, i;
1752
1753         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1754                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1755                 return -EINVAL;
1756         }
1757
1758         ifm = nlmsg_data(nlh);
1759         if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1760                 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1761                 return -EINVAL;
1762         }
1763
1764         fillargs->ifindex = ifm->ifa_index;
1765         if (fillargs->ifindex) {
1766                 cb->answer_flags |= NLM_F_DUMP_FILTERED;
1767                 fillargs->flags |= NLM_F_DUMP_FILTERED;
1768         }
1769
1770         err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1771                                             ifa_ipv4_policy, extack);
1772         if (err < 0)
1773                 return err;
1774
1775         for (i = 0; i <= IFA_MAX; ++i) {
1776                 if (!tb[i])
1777                         continue;
1778
1779                 if (i == IFA_TARGET_NETNSID) {
1780                         struct net *net;
1781
1782                         fillargs->netnsid = nla_get_s32(tb[i]);
1783
1784                         net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1785                         if (IS_ERR(net)) {
1786                                 fillargs->netnsid = -1;
1787                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1788                                 return PTR_ERR(net);
1789                         }
1790                         *tgt_net = net;
1791                 } else {
1792                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1793                         return -EINVAL;
1794                 }
1795         }
1796
1797         return 0;
1798 }
1799
1800 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1801                             struct netlink_callback *cb, int s_ip_idx,
1802                             struct inet_fill_args *fillargs)
1803 {
1804         struct in_ifaddr *ifa;
1805         int ip_idx = 0;
1806         int err;
1807
1808         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1809                 if (ip_idx < s_ip_idx) {
1810                         ip_idx++;
1811                         continue;
1812                 }
1813                 err = inet_fill_ifaddr(skb, ifa, fillargs);
1814                 if (err < 0)
1815                         goto done;
1816
1817                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1818                 ip_idx++;
1819         }
1820         err = 0;
1821
1822 done:
1823         cb->args[2] = ip_idx;
1824
1825         return err;
1826 }
1827
1828 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1829 {
1830         const struct nlmsghdr *nlh = cb->nlh;
1831         struct inet_fill_args fillargs = {
1832                 .portid = NETLINK_CB(cb->skb).portid,
1833                 .seq = nlh->nlmsg_seq,
1834                 .event = RTM_NEWADDR,
1835                 .flags = NLM_F_MULTI,
1836                 .netnsid = -1,
1837         };
1838         struct net *net = sock_net(skb->sk);
1839         struct net *tgt_net = net;
1840         int h, s_h;
1841         int idx, s_idx;
1842         int s_ip_idx;
1843         struct net_device *dev;
1844         struct in_device *in_dev;
1845         struct hlist_head *head;
1846         int err = 0;
1847
1848         s_h = cb->args[0];
1849         s_idx = idx = cb->args[1];
1850         s_ip_idx = cb->args[2];
1851
1852         if (cb->strict_check) {
1853                 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1854                                                  skb->sk, cb);
1855                 if (err < 0)
1856                         goto put_tgt_net;
1857
1858                 err = 0;
1859                 if (fillargs.ifindex) {
1860                         dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1861                         if (!dev) {
1862                                 err = -ENODEV;
1863                                 goto put_tgt_net;
1864                         }
1865
1866                         in_dev = __in_dev_get_rtnl(dev);
1867                         if (in_dev) {
1868                                 err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1869                                                        &fillargs);
1870                         }
1871                         goto put_tgt_net;
1872                 }
1873         }
1874
1875         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1876                 idx = 0;
1877                 head = &tgt_net->dev_index_head[h];
1878                 rcu_read_lock();
1879                 cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1880                           tgt_net->dev_base_seq;
1881                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1882                         if (idx < s_idx)
1883                                 goto cont;
1884                         if (h > s_h || idx > s_idx)
1885                                 s_ip_idx = 0;
1886                         in_dev = __in_dev_get_rcu(dev);
1887                         if (!in_dev)
1888                                 goto cont;
1889
1890                         err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1891                                                &fillargs);
1892                         if (err < 0) {
1893                                 rcu_read_unlock();
1894                                 goto done;
1895                         }
1896 cont:
1897                         idx++;
1898                 }
1899                 rcu_read_unlock();
1900         }
1901
1902 done:
1903         cb->args[0] = h;
1904         cb->args[1] = idx;
1905 put_tgt_net:
1906         if (fillargs.netnsid >= 0)
1907                 put_net(tgt_net);
1908
1909         return skb->len ? : err;
1910 }
1911
1912 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1913                       u32 portid)
1914 {
1915         struct inet_fill_args fillargs = {
1916                 .portid = portid,
1917                 .seq = nlh ? nlh->nlmsg_seq : 0,
1918                 .event = event,
1919                 .flags = 0,
1920                 .netnsid = -1,
1921         };
1922         struct sk_buff *skb;
1923         int err = -ENOBUFS;
1924         struct net *net;
1925
1926         net = dev_net(ifa->ifa_dev->dev);
1927         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1928         if (!skb)
1929                 goto errout;
1930
1931         err = inet_fill_ifaddr(skb, ifa, &fillargs);
1932         if (err < 0) {
1933                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1934                 WARN_ON(err == -EMSGSIZE);
1935                 kfree_skb(skb);
1936                 goto errout;
1937         }
1938         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1939         return;
1940 errout:
1941         if (err < 0)
1942                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1943 }
1944
1945 static size_t inet_get_link_af_size(const struct net_device *dev,
1946                                     u32 ext_filter_mask)
1947 {
1948         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1949
1950         if (!in_dev)
1951                 return 0;
1952
1953         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1954 }
1955
1956 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1957                              u32 ext_filter_mask)
1958 {
1959         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1960         struct nlattr *nla;
1961         int i;
1962
1963         if (!in_dev)
1964                 return -ENODATA;
1965
1966         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1967         if (!nla)
1968                 return -EMSGSIZE;
1969
1970         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1971                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1972
1973         return 0;
1974 }
1975
1976 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1977         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1978 };
1979
1980 static int inet_validate_link_af(const struct net_device *dev,
1981                                  const struct nlattr *nla,
1982                                  struct netlink_ext_ack *extack)
1983 {
1984         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1985         int err, rem;
1986
1987         if (dev && !__in_dev_get_rtnl(dev))
1988                 return -EAFNOSUPPORT;
1989
1990         err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1991                                           inet_af_policy, extack);
1992         if (err < 0)
1993                 return err;
1994
1995         if (tb[IFLA_INET_CONF]) {
1996                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1997                         int cfgid = nla_type(a);
1998
1999                         if (nla_len(a) < 4)
2000                                 return -EINVAL;
2001
2002                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2003                                 return -EINVAL;
2004                 }
2005         }
2006
2007         return 0;
2008 }
2009
2010 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2011                             struct netlink_ext_ack *extack)
2012 {
2013         struct in_device *in_dev = __in_dev_get_rtnl(dev);
2014         struct nlattr *a, *tb[IFLA_INET_MAX+1];
2015         int rem;
2016
2017         if (!in_dev)
2018                 return -EAFNOSUPPORT;
2019
2020         if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2021                 return -EINVAL;
2022
2023         if (tb[IFLA_INET_CONF]) {
2024                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2025                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2026         }
2027
2028         return 0;
2029 }
2030
2031 static int inet_netconf_msgsize_devconf(int type)
2032 {
2033         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2034                    + nla_total_size(4); /* NETCONFA_IFINDEX */
2035         bool all = false;
2036
2037         if (type == NETCONFA_ALL)
2038                 all = true;
2039
2040         if (all || type == NETCONFA_FORWARDING)
2041                 size += nla_total_size(4);
2042         if (all || type == NETCONFA_RP_FILTER)
2043                 size += nla_total_size(4);
2044         if (all || type == NETCONFA_MC_FORWARDING)
2045                 size += nla_total_size(4);
2046         if (all || type == NETCONFA_BC_FORWARDING)
2047                 size += nla_total_size(4);
2048         if (all || type == NETCONFA_PROXY_NEIGH)
2049                 size += nla_total_size(4);
2050         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2051                 size += nla_total_size(4);
2052
2053         return size;
2054 }
2055
2056 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2057                                      struct ipv4_devconf *devconf, u32 portid,
2058                                      u32 seq, int event, unsigned int flags,
2059                                      int type)
2060 {
2061         struct nlmsghdr  *nlh;
2062         struct netconfmsg *ncm;
2063         bool all = false;
2064
2065         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2066                         flags);
2067         if (!nlh)
2068                 return -EMSGSIZE;
2069
2070         if (type == NETCONFA_ALL)
2071                 all = true;
2072
2073         ncm = nlmsg_data(nlh);
2074         ncm->ncm_family = AF_INET;
2075
2076         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2077                 goto nla_put_failure;
2078
2079         if (!devconf)
2080                 goto out;
2081
2082         if ((all || type == NETCONFA_FORWARDING) &&
2083             nla_put_s32(skb, NETCONFA_FORWARDING,
2084                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2085                 goto nla_put_failure;
2086         if ((all || type == NETCONFA_RP_FILTER) &&
2087             nla_put_s32(skb, NETCONFA_RP_FILTER,
2088                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2089                 goto nla_put_failure;
2090         if ((all || type == NETCONFA_MC_FORWARDING) &&
2091             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2092                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2093                 goto nla_put_failure;
2094         if ((all || type == NETCONFA_BC_FORWARDING) &&
2095             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2096                         IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2097                 goto nla_put_failure;
2098         if ((all || type == NETCONFA_PROXY_NEIGH) &&
2099             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2100                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2101                 goto nla_put_failure;
2102         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2103             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2104                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2105                 goto nla_put_failure;
2106
2107 out:
2108         nlmsg_end(skb, nlh);
2109         return 0;
2110
2111 nla_put_failure:
2112         nlmsg_cancel(skb, nlh);
2113         return -EMSGSIZE;
2114 }
2115
2116 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2117                                  int ifindex, struct ipv4_devconf *devconf)
2118 {
2119         struct sk_buff *skb;
2120         int err = -ENOBUFS;
2121
2122         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2123         if (!skb)
2124                 goto errout;
2125
2126         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2127                                         event, 0, type);
2128         if (err < 0) {
2129                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2130                 WARN_ON(err == -EMSGSIZE);
2131                 kfree_skb(skb);
2132                 goto errout;
2133         }
2134         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2135         return;
2136 errout:
2137         if (err < 0)
2138                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2139 }
2140
2141 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2142         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
2143         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
2144         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
2145         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
2146         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
2147 };
2148
2149 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2150                                       const struct nlmsghdr *nlh,
2151                                       struct nlattr **tb,
2152                                       struct netlink_ext_ack *extack)
2153 {
2154         int i, err;
2155
2156         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2157                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2158                 return -EINVAL;
2159         }
2160
2161         if (!netlink_strict_get_check(skb))
2162                 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2163                                               tb, NETCONFA_MAX,
2164                                               devconf_ipv4_policy, extack);
2165
2166         err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2167                                             tb, NETCONFA_MAX,
2168                                             devconf_ipv4_policy, extack);
2169         if (err)
2170                 return err;
2171
2172         for (i = 0; i <= NETCONFA_MAX; i++) {
2173                 if (!tb[i])
2174                         continue;
2175
2176                 switch (i) {
2177                 case NETCONFA_IFINDEX:
2178                         break;
2179                 default:
2180                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2181                         return -EINVAL;
2182                 }
2183         }
2184
2185         return 0;
2186 }
2187
2188 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2189                                     struct nlmsghdr *nlh,
2190                                     struct netlink_ext_ack *extack)
2191 {
2192         struct net *net = sock_net(in_skb->sk);
2193         struct nlattr *tb[NETCONFA_MAX+1];
2194         struct sk_buff *skb;
2195         struct ipv4_devconf *devconf;
2196         struct in_device *in_dev;
2197         struct net_device *dev;
2198         int ifindex;
2199         int err;
2200
2201         err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2202         if (err)
2203                 goto errout;
2204
2205         err = -EINVAL;
2206         if (!tb[NETCONFA_IFINDEX])
2207                 goto errout;
2208
2209         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2210         switch (ifindex) {
2211         case NETCONFA_IFINDEX_ALL:
2212                 devconf = net->ipv4.devconf_all;
2213                 break;
2214         case NETCONFA_IFINDEX_DEFAULT:
2215                 devconf = net->ipv4.devconf_dflt;
2216                 break;
2217         default:
2218                 dev = __dev_get_by_index(net, ifindex);
2219                 if (!dev)
2220                         goto errout;
2221                 in_dev = __in_dev_get_rtnl(dev);
2222                 if (!in_dev)
2223                         goto errout;
2224                 devconf = &in_dev->cnf;
2225                 break;
2226         }
2227
2228         err = -ENOBUFS;
2229         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2230         if (!skb)
2231                 goto errout;
2232
2233         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2234                                         NETLINK_CB(in_skb).portid,
2235                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2236                                         NETCONFA_ALL);
2237         if (err < 0) {
2238                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2239                 WARN_ON(err == -EMSGSIZE);
2240                 kfree_skb(skb);
2241                 goto errout;
2242         }
2243         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2244 errout:
2245         return err;
2246 }
2247
2248 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2249                                      struct netlink_callback *cb)
2250 {
2251         const struct nlmsghdr *nlh = cb->nlh;
2252         struct net *net = sock_net(skb->sk);
2253         int h, s_h;
2254         int idx, s_idx;
2255         struct net_device *dev;
2256         struct in_device *in_dev;
2257         struct hlist_head *head;
2258
2259         if (cb->strict_check) {
2260                 struct netlink_ext_ack *extack = cb->extack;
2261                 struct netconfmsg *ncm;
2262
2263                 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2264                         NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2265                         return -EINVAL;
2266                 }
2267
2268                 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2269                         NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2270                         return -EINVAL;
2271                 }
2272         }
2273
2274         s_h = cb->args[0];
2275         s_idx = idx = cb->args[1];
2276
2277         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2278                 idx = 0;
2279                 head = &net->dev_index_head[h];
2280                 rcu_read_lock();
2281                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2282                           net->dev_base_seq;
2283                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2284                         if (idx < s_idx)
2285                                 goto cont;
2286                         in_dev = __in_dev_get_rcu(dev);
2287                         if (!in_dev)
2288                                 goto cont;
2289
2290                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2291                                                       &in_dev->cnf,
2292                                                       NETLINK_CB(cb->skb).portid,
2293                                                       nlh->nlmsg_seq,
2294                                                       RTM_NEWNETCONF,
2295                                                       NLM_F_MULTI,
2296                                                       NETCONFA_ALL) < 0) {
2297                                 rcu_read_unlock();
2298                                 goto done;
2299                         }
2300                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2301 cont:
2302                         idx++;
2303                 }
2304                 rcu_read_unlock();
2305         }
2306         if (h == NETDEV_HASHENTRIES) {
2307                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2308                                               net->ipv4.devconf_all,
2309                                               NETLINK_CB(cb->skb).portid,
2310                                               nlh->nlmsg_seq,
2311                                               RTM_NEWNETCONF, NLM_F_MULTI,
2312                                               NETCONFA_ALL) < 0)
2313                         goto done;
2314                 else
2315                         h++;
2316         }
2317         if (h == NETDEV_HASHENTRIES + 1) {
2318                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2319                                               net->ipv4.devconf_dflt,
2320                                               NETLINK_CB(cb->skb).portid,
2321                                               nlh->nlmsg_seq,
2322                                               RTM_NEWNETCONF, NLM_F_MULTI,
2323                                               NETCONFA_ALL) < 0)
2324                         goto done;
2325                 else
2326                         h++;
2327         }
2328 done:
2329         cb->args[0] = h;
2330         cb->args[1] = idx;
2331
2332         return skb->len;
2333 }
2334
2335 #ifdef CONFIG_SYSCTL
2336
2337 static void devinet_copy_dflt_conf(struct net *net, int i)
2338 {
2339         struct net_device *dev;
2340
2341         rcu_read_lock();
2342         for_each_netdev_rcu(net, dev) {
2343                 struct in_device *in_dev;
2344
2345                 in_dev = __in_dev_get_rcu(dev);
2346                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2347                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2348         }
2349         rcu_read_unlock();
2350 }
2351
2352 /* called with RTNL locked */
2353 static void inet_forward_change(struct net *net)
2354 {
2355         struct net_device *dev;
2356         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2357
2358         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2359         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2360         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2361                                     NETCONFA_FORWARDING,
2362                                     NETCONFA_IFINDEX_ALL,
2363                                     net->ipv4.devconf_all);
2364         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2365                                     NETCONFA_FORWARDING,
2366                                     NETCONFA_IFINDEX_DEFAULT,
2367                                     net->ipv4.devconf_dflt);
2368
2369         for_each_netdev(net, dev) {
2370                 struct in_device *in_dev;
2371
2372                 if (on)
2373                         dev_disable_lro(dev);
2374
2375                 in_dev = __in_dev_get_rtnl(dev);
2376                 if (in_dev) {
2377                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2378                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2379                                                     NETCONFA_FORWARDING,
2380                                                     dev->ifindex, &in_dev->cnf);
2381                 }
2382         }
2383 }
2384
2385 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2386 {
2387         if (cnf == net->ipv4.devconf_dflt)
2388                 return NETCONFA_IFINDEX_DEFAULT;
2389         else if (cnf == net->ipv4.devconf_all)
2390                 return NETCONFA_IFINDEX_ALL;
2391         else {
2392                 struct in_device *idev
2393                         = container_of(cnf, struct in_device, cnf);
2394                 return idev->dev->ifindex;
2395         }
2396 }
2397
2398 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2399                              void *buffer, size_t *lenp, loff_t *ppos)
2400 {
2401         int old_value = *(int *)ctl->data;
2402         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2403         int new_value = *(int *)ctl->data;
2404
2405         if (write) {
2406                 struct ipv4_devconf *cnf = ctl->extra1;
2407                 struct net *net = ctl->extra2;
2408                 int i = (int *)ctl->data - cnf->data;
2409                 int ifindex;
2410
2411                 set_bit(i, cnf->state);
2412
2413                 if (cnf == net->ipv4.devconf_dflt)
2414                         devinet_copy_dflt_conf(net, i);
2415                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2416                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2417                         if ((new_value == 0) && (old_value != 0))
2418                                 rt_cache_flush(net);
2419
2420                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2421                     new_value != old_value)
2422                         rt_cache_flush(net);
2423
2424                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2425                     new_value != old_value) {
2426                         ifindex = devinet_conf_ifindex(net, cnf);
2427                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2428                                                     NETCONFA_RP_FILTER,
2429                                                     ifindex, cnf);
2430                 }
2431                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2432                     new_value != old_value) {
2433                         ifindex = devinet_conf_ifindex(net, cnf);
2434                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2435                                                     NETCONFA_PROXY_NEIGH,
2436                                                     ifindex, cnf);
2437                 }
2438                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2439                     new_value != old_value) {
2440                         ifindex = devinet_conf_ifindex(net, cnf);
2441                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2442                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2443                                                     ifindex, cnf);
2444                 }
2445         }
2446
2447         return ret;
2448 }
2449
2450 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2451                                   void *buffer, size_t *lenp, loff_t *ppos)
2452 {
2453         int *valp = ctl->data;
2454         int val = *valp;
2455         loff_t pos = *ppos;
2456         struct net *net = ctl->extra2;
2457         int ret;
2458
2459         if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2460                 return -EPERM;
2461
2462         ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2463
2464         if (write && *valp != val) {
2465                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2466                         if (!rtnl_trylock()) {
2467                                 /* Restore the original values before restarting */
2468                                 *valp = val;
2469                                 *ppos = pos;
2470                                 return restart_syscall();
2471                         }
2472                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2473                                 inet_forward_change(net);
2474                         } else {
2475                                 struct ipv4_devconf *cnf = ctl->extra1;
2476                                 struct in_device *idev =
2477                                         container_of(cnf, struct in_device, cnf);
2478                                 if (*valp)
2479                                         dev_disable_lro(idev->dev);
2480                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2481                                                             NETCONFA_FORWARDING,
2482                                                             idev->dev->ifindex,
2483                                                             cnf);
2484                         }
2485                         rtnl_unlock();
2486                         rt_cache_flush(net);
2487                 } else
2488                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2489                                                     NETCONFA_FORWARDING,
2490                                                     NETCONFA_IFINDEX_DEFAULT,
2491                                                     net->ipv4.devconf_dflt);
2492         }
2493
2494         return ret;
2495 }
2496
2497 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2498                                 void *buffer, size_t *lenp, loff_t *ppos)
2499 {
2500         int *valp = ctl->data;
2501         int val = *valp;
2502         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2503         struct net *net = ctl->extra2;
2504
2505         if (write && *valp != val)
2506                 rt_cache_flush(net);
2507
2508         return ret;
2509 }
2510
2511 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2512         { \
2513                 .procname       = name, \
2514                 .data           = ipv4_devconf.data + \
2515                                   IPV4_DEVCONF_ ## attr - 1, \
2516                 .maxlen         = sizeof(int), \
2517                 .mode           = mval, \
2518                 .proc_handler   = proc, \
2519                 .extra1         = &ipv4_devconf, \
2520         }
2521
2522 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2523         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2524
2525 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2526         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2527
2528 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2529         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2530
2531 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2532         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2533
2534 static struct devinet_sysctl_table {
2535         struct ctl_table_header *sysctl_header;
2536         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2537 } devinet_sysctl = {
2538         .devinet_vars = {
2539                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2540                                              devinet_sysctl_forward),
2541                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2542                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2543
2544                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2545                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2546                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2547                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2548                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2549                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2550                                         "accept_source_route"),
2551                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2552                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2553                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2554                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2555                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2556                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2557                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2558                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2559                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2560                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2561                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2562                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2563                 DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2564                                         "arp_evict_nocarrier"),
2565                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2566                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2567                                         "force_igmp_version"),
2568                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2569                                         "igmpv2_unsolicited_report_interval"),
2570                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2571                                         "igmpv3_unsolicited_report_interval"),
2572                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2573                                         "ignore_routes_with_linkdown"),
2574                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2575                                         "drop_gratuitous_arp"),
2576
2577                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2578                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2579                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2580                                               "promote_secondaries"),
2581                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2582                                               "route_localnet"),
2583                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2584                                               "drop_unicast_in_l2_multicast"),
2585         },
2586 };
2587
2588 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2589                                      int ifindex, struct ipv4_devconf *p)
2590 {
2591         int i;
2592         struct devinet_sysctl_table *t;
2593         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2594
2595         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2596         if (!t)
2597                 goto out;
2598
2599         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2600                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2601                 t->devinet_vars[i].extra1 = p;
2602                 t->devinet_vars[i].extra2 = net;
2603         }
2604
2605         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2606
2607         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2608         if (!t->sysctl_header)
2609                 goto free;
2610
2611         p->sysctl = t;
2612
2613         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2614                                     ifindex, p);
2615         return 0;
2616
2617 free:
2618         kfree(t);
2619 out:
2620         return -ENOMEM;
2621 }
2622
2623 static void __devinet_sysctl_unregister(struct net *net,
2624                                         struct ipv4_devconf *cnf, int ifindex)
2625 {
2626         struct devinet_sysctl_table *t = cnf->sysctl;
2627
2628         if (t) {
2629                 cnf->sysctl = NULL;
2630                 unregister_net_sysctl_table(t->sysctl_header);
2631                 kfree(t);
2632         }
2633
2634         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2635 }
2636
2637 static int devinet_sysctl_register(struct in_device *idev)
2638 {
2639         int err;
2640
2641         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2642                 return -EINVAL;
2643
2644         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2645         if (err)
2646                 return err;
2647         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2648                                         idev->dev->ifindex, &idev->cnf);
2649         if (err)
2650                 neigh_sysctl_unregister(idev->arp_parms);
2651         return err;
2652 }
2653
2654 static void devinet_sysctl_unregister(struct in_device *idev)
2655 {
2656         struct net *net = dev_net(idev->dev);
2657
2658         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2659         neigh_sysctl_unregister(idev->arp_parms);
2660 }
2661
2662 static struct ctl_table ctl_forward_entry[] = {
2663         {
2664                 .procname       = "ip_forward",
2665                 .data           = &ipv4_devconf.data[
2666                                         IPV4_DEVCONF_FORWARDING - 1],
2667                 .maxlen         = sizeof(int),
2668                 .mode           = 0644,
2669                 .proc_handler   = devinet_sysctl_forward,
2670                 .extra1         = &ipv4_devconf,
2671                 .extra2         = &init_net,
2672         },
2673         { },
2674 };
2675 #endif
2676
2677 static __net_init int devinet_init_net(struct net *net)
2678 {
2679         int err;
2680         struct ipv4_devconf *all, *dflt;
2681 #ifdef CONFIG_SYSCTL
2682         struct ctl_table *tbl;
2683         struct ctl_table_header *forw_hdr;
2684 #endif
2685
2686         err = -ENOMEM;
2687         all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2688         if (!all)
2689                 goto err_alloc_all;
2690
2691         dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2692         if (!dflt)
2693                 goto err_alloc_dflt;
2694
2695 #ifdef CONFIG_SYSCTL
2696         tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2697         if (!tbl)
2698                 goto err_alloc_ctl;
2699
2700         tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2701         tbl[0].extra1 = all;
2702         tbl[0].extra2 = net;
2703 #endif
2704
2705         if (!net_eq(net, &init_net)) {
2706                 switch (net_inherit_devconf()) {
2707                 case 3:
2708                         /* copy from the current netns */
2709                         memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2710                                sizeof(ipv4_devconf));
2711                         memcpy(dflt,
2712                                current->nsproxy->net_ns->ipv4.devconf_dflt,
2713                                sizeof(ipv4_devconf_dflt));
2714                         break;
2715                 case 0:
2716                 case 1:
2717                         /* copy from init_net */
2718                         memcpy(all, init_net.ipv4.devconf_all,
2719                                sizeof(ipv4_devconf));
2720                         memcpy(dflt, init_net.ipv4.devconf_dflt,
2721                                sizeof(ipv4_devconf_dflt));
2722                         break;
2723                 case 2:
2724                         /* use compiled values */
2725                         break;
2726                 }
2727         }
2728
2729 #ifdef CONFIG_SYSCTL
2730         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2731         if (err < 0)
2732                 goto err_reg_all;
2733
2734         err = __devinet_sysctl_register(net, "default",
2735                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2736         if (err < 0)
2737                 goto err_reg_dflt;
2738
2739         err = -ENOMEM;
2740         forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2741                                           ARRAY_SIZE(ctl_forward_entry));
2742         if (!forw_hdr)
2743                 goto err_reg_ctl;
2744         net->ipv4.forw_hdr = forw_hdr;
2745 #endif
2746
2747         net->ipv4.devconf_all = all;
2748         net->ipv4.devconf_dflt = dflt;
2749         return 0;
2750
2751 #ifdef CONFIG_SYSCTL
2752 err_reg_ctl:
2753         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2754 err_reg_dflt:
2755         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2756 err_reg_all:
2757         kfree(tbl);
2758 err_alloc_ctl:
2759 #endif
2760         kfree(dflt);
2761 err_alloc_dflt:
2762         kfree(all);
2763 err_alloc_all:
2764         return err;
2765 }
2766
2767 static __net_exit void devinet_exit_net(struct net *net)
2768 {
2769 #ifdef CONFIG_SYSCTL
2770         struct ctl_table *tbl;
2771
2772         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2773         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2774         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2775                                     NETCONFA_IFINDEX_DEFAULT);
2776         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2777                                     NETCONFA_IFINDEX_ALL);
2778         kfree(tbl);
2779 #endif
2780         kfree(net->ipv4.devconf_dflt);
2781         kfree(net->ipv4.devconf_all);
2782 }
2783
2784 static __net_initdata struct pernet_operations devinet_ops = {
2785         .init = devinet_init_net,
2786         .exit = devinet_exit_net,
2787 };
2788
2789 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2790         .family           = AF_INET,
2791         .fill_link_af     = inet_fill_link_af,
2792         .get_link_af_size = inet_get_link_af_size,
2793         .validate_link_af = inet_validate_link_af,
2794         .set_link_af      = inet_set_link_af,
2795 };
2796
2797 void __init devinet_init(void)
2798 {
2799         int i;
2800
2801         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2802                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2803
2804         register_pernet_subsys(&devinet_ops);
2805         register_netdevice_notifier(&ip_netdev_notifier);
2806
2807         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2808
2809         rtnl_af_register(&inet_af_ops);
2810
2811         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2812         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2813         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2814         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2815                       inet_netconf_dump_devconf, 0);
2816 }