Merge tag 'dma-mapping-6.10-2024-05-31' of git://git.infradead.org/users/hch/dma...
[linux-block.git] / net / ipv4 / devinet.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      NET3    IP device support routines.
4  *
5  *      Derived from the IP parts of dev.c 1.0.19
6  *              Authors:        Ross Biro
7  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *      Additional Authors:
11  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *      Changes:
15  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
16  *                                      lists.
17  *              Cyrus Durgin:           updated for kmod
18  *              Matthias Andree:        in devinet_ioctl, compare label and
19  *                                      address (4.4BSD alias style support),
20  *                                      fall back to comparing just the label
21  *                                      if no match found.
22  */
23
24
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64
65 #define IPV6ONLY_FLAGS  \
66                 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67                  IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68                  IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78                 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79         },
80 };
81
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83         .data = {
84                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91                 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92         },
93 };
94
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99         [IFA_LOCAL]             = { .type = NLA_U32 },
100         [IFA_ADDRESS]           = { .type = NLA_U32 },
101         [IFA_BROADCAST]         = { .type = NLA_U32 },
102         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
104         [IFA_FLAGS]             = { .type = NLA_U32 },
105         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
106         [IFA_TARGET_NETNSID]    = { .type = NLA_S32 },
107         [IFA_PROTO]             = { .type = NLA_U8 },
108 };
109
110 struct inet_fill_args {
111         u32 portid;
112         u32 seq;
113         int event;
114         unsigned int flags;
115         int netnsid;
116         int ifindex;
117 };
118
119 #define IN4_ADDR_HSIZE_SHIFT    8
120 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
121
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126         u32 val = (__force u32) addr ^ net_hash_mix(net);
127
128         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133         u32 hash = inet_addr_hash(net, ifa->ifa_local);
134
135         ASSERT_RTNL();
136         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141         ASSERT_RTNL();
142         hlist_del_init_rcu(&ifa->hash);
143 }
144
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155         struct net_device *result = NULL;
156         struct in_ifaddr *ifa;
157
158         rcu_read_lock();
159         ifa = inet_lookup_ifaddr_rcu(net, addr);
160         if (!ifa) {
161                 struct flowi4 fl4 = { .daddr = addr };
162                 struct fib_result res = { 0 };
163                 struct fib_table *local;
164
165                 /* Fallback to FIB local table so that communication
166                  * over loopback subnets work.
167                  */
168                 local = fib_get_table(net, RT_TABLE_LOCAL);
169                 if (local &&
170                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171                     res.type == RTN_LOCAL)
172                         result = FIB_RES_DEV(res);
173         } else {
174                 result = ifa->ifa_dev->dev;
175         }
176         if (result && devref)
177                 dev_hold(result);
178         rcu_read_unlock();
179         return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186         u32 hash = inet_addr_hash(net, addr);
187         struct in_ifaddr *ifa;
188
189         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190                 if (ifa->ifa_local == addr &&
191                     net_eq(dev_net(ifa->ifa_dev->dev), net))
192                         return ifa;
193
194         return NULL;
195 }
196
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202                          struct in_ifaddr __rcu **ifap,
203                          int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210         return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216
217 /* Locks all the inet devices. */
218
219 static struct in_ifaddr *inet_alloc_ifa(void)
220 {
221         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222 }
223
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227
228         if (ifa->ifa_dev)
229                 in_dev_put(ifa->ifa_dev);
230         kfree(ifa);
231 }
232
233 static void inet_free_ifa(struct in_ifaddr *ifa)
234 {
235         /* Our reference to ifa->ifa_dev must be freed ASAP
236          * to release the reference to the netdev the same way.
237          * in_dev_put() -> in_dev_finish_destroy() -> netdev_put()
238          */
239         call_rcu_hurry(&ifa->rcu_head, inet_rcu_free_ifa);
240 }
241
242 static void in_dev_free_rcu(struct rcu_head *head)
243 {
244         struct in_device *idev = container_of(head, struct in_device, rcu_head);
245
246         kfree(rcu_dereference_protected(idev->mc_hash, 1));
247         kfree(idev);
248 }
249
250 void in_dev_finish_destroy(struct in_device *idev)
251 {
252         struct net_device *dev = idev->dev;
253
254         WARN_ON(idev->ifa_list);
255         WARN_ON(idev->mc_list);
256 #ifdef NET_REFCNT_DEBUG
257         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
258 #endif
259         netdev_put(dev, &idev->dev_tracker);
260         if (!idev->dead)
261                 pr_err("Freeing alive in_device %p\n", idev);
262         else
263                 call_rcu(&idev->rcu_head, in_dev_free_rcu);
264 }
265 EXPORT_SYMBOL(in_dev_finish_destroy);
266
267 static struct in_device *inetdev_init(struct net_device *dev)
268 {
269         struct in_device *in_dev;
270         int err = -ENOMEM;
271
272         ASSERT_RTNL();
273
274         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
275         if (!in_dev)
276                 goto out;
277         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
278                         sizeof(in_dev->cnf));
279         in_dev->cnf.sysctl = NULL;
280         in_dev->dev = dev;
281         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
282         if (!in_dev->arp_parms)
283                 goto out_kfree;
284         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
285                 dev_disable_lro(dev);
286         /* Reference in_dev->dev */
287         netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
288         /* Account for reference dev->ip_ptr (below) */
289         refcount_set(&in_dev->refcnt, 1);
290
291         err = devinet_sysctl_register(in_dev);
292         if (err) {
293                 in_dev->dead = 1;
294                 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
295                 in_dev_put(in_dev);
296                 in_dev = NULL;
297                 goto out;
298         }
299         ip_mc_init_dev(in_dev);
300         if (dev->flags & IFF_UP)
301                 ip_mc_up(in_dev);
302
303         /* we can receive as soon as ip_ptr is set -- do this last */
304         rcu_assign_pointer(dev->ip_ptr, in_dev);
305 out:
306         return in_dev ?: ERR_PTR(err);
307 out_kfree:
308         kfree(in_dev);
309         in_dev = NULL;
310         goto out;
311 }
312
313 static void inetdev_destroy(struct in_device *in_dev)
314 {
315         struct net_device *dev;
316         struct in_ifaddr *ifa;
317
318         ASSERT_RTNL();
319
320         dev = in_dev->dev;
321
322         in_dev->dead = 1;
323
324         ip_mc_destroy_dev(in_dev);
325
326         while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
327                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
328                 inet_free_ifa(ifa);
329         }
330
331         RCU_INIT_POINTER(dev->ip_ptr, NULL);
332
333         devinet_sysctl_unregister(in_dev);
334         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
335         arp_ifdown(dev);
336
337         in_dev_put(in_dev);
338 }
339
340 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
341 {
342         const struct in_ifaddr *ifa;
343
344         rcu_read_lock();
345         in_dev_for_each_ifa_rcu(ifa, in_dev) {
346                 if (inet_ifa_match(a, ifa)) {
347                         if (!b || inet_ifa_match(b, ifa)) {
348                                 rcu_read_unlock();
349                                 return 1;
350                         }
351                 }
352         }
353         rcu_read_unlock();
354         return 0;
355 }
356
357 static void __inet_del_ifa(struct in_device *in_dev,
358                            struct in_ifaddr __rcu **ifap,
359                            int destroy, struct nlmsghdr *nlh, u32 portid)
360 {
361         struct in_ifaddr *promote = NULL;
362         struct in_ifaddr *ifa, *ifa1;
363         struct in_ifaddr __rcu **last_prim;
364         struct in_ifaddr *prev_prom = NULL;
365         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
366
367         ASSERT_RTNL();
368
369         ifa1 = rtnl_dereference(*ifap);
370         last_prim = ifap;
371         if (in_dev->dead)
372                 goto no_promotions;
373
374         /* 1. Deleting primary ifaddr forces deletion all secondaries
375          * unless alias promotion is set
376          **/
377
378         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
379                 struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
380
381                 while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
382                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
383                             ifa1->ifa_scope <= ifa->ifa_scope)
384                                 last_prim = &ifa->ifa_next;
385
386                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
387                             ifa1->ifa_mask != ifa->ifa_mask ||
388                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
389                                 ifap1 = &ifa->ifa_next;
390                                 prev_prom = ifa;
391                                 continue;
392                         }
393
394                         if (!do_promote) {
395                                 inet_hash_remove(ifa);
396                                 *ifap1 = ifa->ifa_next;
397
398                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
399                                 blocking_notifier_call_chain(&inetaddr_chain,
400                                                 NETDEV_DOWN, ifa);
401                                 inet_free_ifa(ifa);
402                         } else {
403                                 promote = ifa;
404                                 break;
405                         }
406                 }
407         }
408
409         /* On promotion all secondaries from subnet are changing
410          * the primary IP, we must remove all their routes silently
411          * and later to add them back with new prefsrc. Do this
412          * while all addresses are on the device list.
413          */
414         for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
415                 if (ifa1->ifa_mask == ifa->ifa_mask &&
416                     inet_ifa_match(ifa1->ifa_address, ifa))
417                         fib_del_ifaddr(ifa, ifa1);
418         }
419
420 no_promotions:
421         /* 2. Unlink it */
422
423         *ifap = ifa1->ifa_next;
424         inet_hash_remove(ifa1);
425
426         /* 3. Announce address deletion */
427
428         /* Send message first, then call notifier.
429            At first sight, FIB update triggered by notifier
430            will refer to already deleted ifaddr, that could confuse
431            netlink listeners. It is not true: look, gated sees
432            that route deleted and if it still thinks that ifaddr
433            is valid, it will try to restore deleted routes... Grr.
434            So that, this order is correct.
435          */
436         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
437         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
438
439         if (promote) {
440                 struct in_ifaddr *next_sec;
441
442                 next_sec = rtnl_dereference(promote->ifa_next);
443                 if (prev_prom) {
444                         struct in_ifaddr *last_sec;
445
446                         rcu_assign_pointer(prev_prom->ifa_next, next_sec);
447
448                         last_sec = rtnl_dereference(*last_prim);
449                         rcu_assign_pointer(promote->ifa_next, last_sec);
450                         rcu_assign_pointer(*last_prim, promote);
451                 }
452
453                 promote->ifa_flags &= ~IFA_F_SECONDARY;
454                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
455                 blocking_notifier_call_chain(&inetaddr_chain,
456                                 NETDEV_UP, promote);
457                 for (ifa = next_sec; ifa;
458                      ifa = rtnl_dereference(ifa->ifa_next)) {
459                         if (ifa1->ifa_mask != ifa->ifa_mask ||
460                             !inet_ifa_match(ifa1->ifa_address, ifa))
461                                         continue;
462                         fib_add_ifaddr(ifa);
463                 }
464
465         }
466         if (destroy)
467                 inet_free_ifa(ifa1);
468 }
469
470 static void inet_del_ifa(struct in_device *in_dev,
471                          struct in_ifaddr __rcu **ifap,
472                          int destroy)
473 {
474         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
475 }
476
477 static void check_lifetime(struct work_struct *work);
478
479 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
480
481 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
482                              u32 portid, struct netlink_ext_ack *extack)
483 {
484         struct in_ifaddr __rcu **last_primary, **ifap;
485         struct in_device *in_dev = ifa->ifa_dev;
486         struct in_validator_info ivi;
487         struct in_ifaddr *ifa1;
488         int ret;
489
490         ASSERT_RTNL();
491
492         if (!ifa->ifa_local) {
493                 inet_free_ifa(ifa);
494                 return 0;
495         }
496
497         ifa->ifa_flags &= ~IFA_F_SECONDARY;
498         last_primary = &in_dev->ifa_list;
499
500         /* Don't set IPv6 only flags to IPv4 addresses */
501         ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
502
503         ifap = &in_dev->ifa_list;
504         ifa1 = rtnl_dereference(*ifap);
505
506         while (ifa1) {
507                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
508                     ifa->ifa_scope <= ifa1->ifa_scope)
509                         last_primary = &ifa1->ifa_next;
510                 if (ifa1->ifa_mask == ifa->ifa_mask &&
511                     inet_ifa_match(ifa1->ifa_address, ifa)) {
512                         if (ifa1->ifa_local == ifa->ifa_local) {
513                                 inet_free_ifa(ifa);
514                                 return -EEXIST;
515                         }
516                         if (ifa1->ifa_scope != ifa->ifa_scope) {
517                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
518                                 inet_free_ifa(ifa);
519                                 return -EINVAL;
520                         }
521                         ifa->ifa_flags |= IFA_F_SECONDARY;
522                 }
523
524                 ifap = &ifa1->ifa_next;
525                 ifa1 = rtnl_dereference(*ifap);
526         }
527
528         /* Allow any devices that wish to register ifaddr validtors to weigh
529          * in now, before changes are committed.  The rntl lock is serializing
530          * access here, so the state should not change between a validator call
531          * and a final notify on commit.  This isn't invoked on promotion under
532          * the assumption that validators are checking the address itself, and
533          * not the flags.
534          */
535         ivi.ivi_addr = ifa->ifa_address;
536         ivi.ivi_dev = ifa->ifa_dev;
537         ivi.extack = extack;
538         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
539                                            NETDEV_UP, &ivi);
540         ret = notifier_to_errno(ret);
541         if (ret) {
542                 inet_free_ifa(ifa);
543                 return ret;
544         }
545
546         if (!(ifa->ifa_flags & IFA_F_SECONDARY))
547                 ifap = last_primary;
548
549         rcu_assign_pointer(ifa->ifa_next, *ifap);
550         rcu_assign_pointer(*ifap, ifa);
551
552         inet_hash_insert(dev_net(in_dev->dev), ifa);
553
554         cancel_delayed_work(&check_lifetime_work);
555         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
556
557         /* Send message first, then call notifier.
558            Notifier will trigger FIB update, so that
559            listeners of netlink will know about new ifaddr */
560         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
561         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
562
563         return 0;
564 }
565
566 static int inet_insert_ifa(struct in_ifaddr *ifa)
567 {
568         return __inet_insert_ifa(ifa, NULL, 0, NULL);
569 }
570
571 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
572 {
573         struct in_device *in_dev = __in_dev_get_rtnl(dev);
574
575         ASSERT_RTNL();
576
577         if (!in_dev) {
578                 inet_free_ifa(ifa);
579                 return -ENOBUFS;
580         }
581         ipv4_devconf_setall(in_dev);
582         neigh_parms_data_state_setall(in_dev->arp_parms);
583         if (ifa->ifa_dev != in_dev) {
584                 WARN_ON(ifa->ifa_dev);
585                 in_dev_hold(in_dev);
586                 ifa->ifa_dev = in_dev;
587         }
588         if (ipv4_is_loopback(ifa->ifa_local))
589                 ifa->ifa_scope = RT_SCOPE_HOST;
590         return inet_insert_ifa(ifa);
591 }
592
593 /* Caller must hold RCU or RTNL :
594  * We dont take a reference on found in_device
595  */
596 struct in_device *inetdev_by_index(struct net *net, int ifindex)
597 {
598         struct net_device *dev;
599         struct in_device *in_dev = NULL;
600
601         rcu_read_lock();
602         dev = dev_get_by_index_rcu(net, ifindex);
603         if (dev)
604                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
605         rcu_read_unlock();
606         return in_dev;
607 }
608 EXPORT_SYMBOL(inetdev_by_index);
609
610 /* Called only from RTNL semaphored context. No locks. */
611
612 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
613                                     __be32 mask)
614 {
615         struct in_ifaddr *ifa;
616
617         ASSERT_RTNL();
618
619         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
620                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
621                         return ifa;
622         }
623         return NULL;
624 }
625
626 static int ip_mc_autojoin_config(struct net *net, bool join,
627                                  const struct in_ifaddr *ifa)
628 {
629 #if defined(CONFIG_IP_MULTICAST)
630         struct ip_mreqn mreq = {
631                 .imr_multiaddr.s_addr = ifa->ifa_address,
632                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
633         };
634         struct sock *sk = net->ipv4.mc_autojoin_sk;
635         int ret;
636
637         ASSERT_RTNL();
638
639         lock_sock(sk);
640         if (join)
641                 ret = ip_mc_join_group(sk, &mreq);
642         else
643                 ret = ip_mc_leave_group(sk, &mreq);
644         release_sock(sk);
645
646         return ret;
647 #else
648         return -EOPNOTSUPP;
649 #endif
650 }
651
652 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
653                             struct netlink_ext_ack *extack)
654 {
655         struct net *net = sock_net(skb->sk);
656         struct in_ifaddr __rcu **ifap;
657         struct nlattr *tb[IFA_MAX+1];
658         struct in_device *in_dev;
659         struct ifaddrmsg *ifm;
660         struct in_ifaddr *ifa;
661         int err;
662
663         ASSERT_RTNL();
664
665         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
666                                      ifa_ipv4_policy, extack);
667         if (err < 0)
668                 goto errout;
669
670         ifm = nlmsg_data(nlh);
671         in_dev = inetdev_by_index(net, ifm->ifa_index);
672         if (!in_dev) {
673                 NL_SET_ERR_MSG(extack, "ipv4: Device not found");
674                 err = -ENODEV;
675                 goto errout;
676         }
677
678         for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
679              ifap = &ifa->ifa_next) {
680                 if (tb[IFA_LOCAL] &&
681                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
682                         continue;
683
684                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
685                         continue;
686
687                 if (tb[IFA_ADDRESS] &&
688                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
689                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
690                         continue;
691
692                 if (ipv4_is_multicast(ifa->ifa_address))
693                         ip_mc_autojoin_config(net, false, ifa);
694                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
695                 return 0;
696         }
697
698         NL_SET_ERR_MSG(extack, "ipv4: Address not found");
699         err = -EADDRNOTAVAIL;
700 errout:
701         return err;
702 }
703
704 #define INFINITY_LIFE_TIME      0xFFFFFFFF
705
706 static void check_lifetime(struct work_struct *work)
707 {
708         unsigned long now, next, next_sec, next_sched;
709         struct in_ifaddr *ifa;
710         struct hlist_node *n;
711         int i;
712
713         now = jiffies;
714         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
715
716         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
717                 bool change_needed = false;
718
719                 rcu_read_lock();
720                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
721                         unsigned long age, tstamp;
722                         u32 preferred_lft;
723                         u32 valid_lft;
724                         u32 flags;
725
726                         flags = READ_ONCE(ifa->ifa_flags);
727                         if (flags & IFA_F_PERMANENT)
728                                 continue;
729
730                         preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
731                         valid_lft = READ_ONCE(ifa->ifa_valid_lft);
732                         tstamp = READ_ONCE(ifa->ifa_tstamp);
733                         /* We try to batch several events at once. */
734                         age = (now - tstamp +
735                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
736
737                         if (valid_lft != INFINITY_LIFE_TIME &&
738                             age >= valid_lft) {
739                                 change_needed = true;
740                         } else if (preferred_lft ==
741                                    INFINITY_LIFE_TIME) {
742                                 continue;
743                         } else if (age >= preferred_lft) {
744                                 if (time_before(tstamp + valid_lft * HZ, next))
745                                         next = tstamp + valid_lft * HZ;
746
747                                 if (!(flags & IFA_F_DEPRECATED))
748                                         change_needed = true;
749                         } else if (time_before(tstamp + preferred_lft * HZ,
750                                                next)) {
751                                 next = tstamp + preferred_lft * HZ;
752                         }
753                 }
754                 rcu_read_unlock();
755                 if (!change_needed)
756                         continue;
757                 rtnl_lock();
758                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
759                         unsigned long age;
760
761                         if (ifa->ifa_flags & IFA_F_PERMANENT)
762                                 continue;
763
764                         /* We try to batch several events at once. */
765                         age = (now - ifa->ifa_tstamp +
766                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
767
768                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
769                             age >= ifa->ifa_valid_lft) {
770                                 struct in_ifaddr __rcu **ifap;
771                                 struct in_ifaddr *tmp;
772
773                                 ifap = &ifa->ifa_dev->ifa_list;
774                                 tmp = rtnl_dereference(*ifap);
775                                 while (tmp) {
776                                         if (tmp == ifa) {
777                                                 inet_del_ifa(ifa->ifa_dev,
778                                                              ifap, 1);
779                                                 break;
780                                         }
781                                         ifap = &tmp->ifa_next;
782                                         tmp = rtnl_dereference(*ifap);
783                                 }
784                         } else if (ifa->ifa_preferred_lft !=
785                                    INFINITY_LIFE_TIME &&
786                                    age >= ifa->ifa_preferred_lft &&
787                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
788                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
789                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
790                         }
791                 }
792                 rtnl_unlock();
793         }
794
795         next_sec = round_jiffies_up(next);
796         next_sched = next;
797
798         /* If rounded timeout is accurate enough, accept it. */
799         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
800                 next_sched = next_sec;
801
802         now = jiffies;
803         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
804         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
805                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
806
807         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
808                         next_sched - now);
809 }
810
811 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
812                              __u32 prefered_lft)
813 {
814         unsigned long timeout;
815         u32 flags;
816
817         flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
818
819         timeout = addrconf_timeout_fixup(valid_lft, HZ);
820         if (addrconf_finite_timeout(timeout))
821                 WRITE_ONCE(ifa->ifa_valid_lft, timeout);
822         else
823                 flags |= IFA_F_PERMANENT;
824
825         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
826         if (addrconf_finite_timeout(timeout)) {
827                 if (timeout == 0)
828                         flags |= IFA_F_DEPRECATED;
829                 WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
830         }
831         WRITE_ONCE(ifa->ifa_flags, flags);
832         WRITE_ONCE(ifa->ifa_tstamp, jiffies);
833         if (!ifa->ifa_cstamp)
834                 WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
835 }
836
837 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
838                                        __u32 *pvalid_lft, __u32 *pprefered_lft,
839                                        struct netlink_ext_ack *extack)
840 {
841         struct nlattr *tb[IFA_MAX+1];
842         struct in_ifaddr *ifa;
843         struct ifaddrmsg *ifm;
844         struct net_device *dev;
845         struct in_device *in_dev;
846         int err;
847
848         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
849                                      ifa_ipv4_policy, extack);
850         if (err < 0)
851                 goto errout;
852
853         ifm = nlmsg_data(nlh);
854         err = -EINVAL;
855
856         if (ifm->ifa_prefixlen > 32) {
857                 NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
858                 goto errout;
859         }
860
861         if (!tb[IFA_LOCAL]) {
862                 NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
863                 goto errout;
864         }
865
866         dev = __dev_get_by_index(net, ifm->ifa_index);
867         err = -ENODEV;
868         if (!dev) {
869                 NL_SET_ERR_MSG(extack, "ipv4: Device not found");
870                 goto errout;
871         }
872
873         in_dev = __in_dev_get_rtnl(dev);
874         err = -ENOBUFS;
875         if (!in_dev)
876                 goto errout;
877
878         ifa = inet_alloc_ifa();
879         if (!ifa)
880                 /*
881                  * A potential indev allocation can be left alive, it stays
882                  * assigned to its device and is destroy with it.
883                  */
884                 goto errout;
885
886         ipv4_devconf_setall(in_dev);
887         neigh_parms_data_state_setall(in_dev->arp_parms);
888         in_dev_hold(in_dev);
889
890         if (!tb[IFA_ADDRESS])
891                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
892
893         INIT_HLIST_NODE(&ifa->hash);
894         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
895         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
896         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
897                                          ifm->ifa_flags;
898         ifa->ifa_scope = ifm->ifa_scope;
899         ifa->ifa_dev = in_dev;
900
901         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
902         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
903
904         if (tb[IFA_BROADCAST])
905                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
906
907         if (tb[IFA_LABEL])
908                 nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
909         else
910                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
911
912         if (tb[IFA_RT_PRIORITY])
913                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
914
915         if (tb[IFA_PROTO])
916                 ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
917
918         if (tb[IFA_CACHEINFO]) {
919                 struct ifa_cacheinfo *ci;
920
921                 ci = nla_data(tb[IFA_CACHEINFO]);
922                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
923                         NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
924                         err = -EINVAL;
925                         goto errout_free;
926                 }
927                 *pvalid_lft = ci->ifa_valid;
928                 *pprefered_lft = ci->ifa_prefered;
929         }
930
931         return ifa;
932
933 errout_free:
934         inet_free_ifa(ifa);
935 errout:
936         return ERR_PTR(err);
937 }
938
939 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
940 {
941         struct in_device *in_dev = ifa->ifa_dev;
942         struct in_ifaddr *ifa1;
943
944         if (!ifa->ifa_local)
945                 return NULL;
946
947         in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
948                 if (ifa1->ifa_mask == ifa->ifa_mask &&
949                     inet_ifa_match(ifa1->ifa_address, ifa) &&
950                     ifa1->ifa_local == ifa->ifa_local)
951                         return ifa1;
952         }
953         return NULL;
954 }
955
956 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
957                             struct netlink_ext_ack *extack)
958 {
959         struct net *net = sock_net(skb->sk);
960         struct in_ifaddr *ifa;
961         struct in_ifaddr *ifa_existing;
962         __u32 valid_lft = INFINITY_LIFE_TIME;
963         __u32 prefered_lft = INFINITY_LIFE_TIME;
964
965         ASSERT_RTNL();
966
967         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
968         if (IS_ERR(ifa))
969                 return PTR_ERR(ifa);
970
971         ifa_existing = find_matching_ifa(ifa);
972         if (!ifa_existing) {
973                 /* It would be best to check for !NLM_F_CREATE here but
974                  * userspace already relies on not having to provide this.
975                  */
976                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
977                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
978                         int ret = ip_mc_autojoin_config(net, true, ifa);
979
980                         if (ret < 0) {
981                                 NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
982                                 inet_free_ifa(ifa);
983                                 return ret;
984                         }
985                 }
986                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
987                                          extack);
988         } else {
989                 u32 new_metric = ifa->ifa_rt_priority;
990                 u8 new_proto = ifa->ifa_proto;
991
992                 inet_free_ifa(ifa);
993
994                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
995                     !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
996                         NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
997                         return -EEXIST;
998                 }
999                 ifa = ifa_existing;
1000
1001                 if (ifa->ifa_rt_priority != new_metric) {
1002                         fib_modify_prefix_metric(ifa, new_metric);
1003                         ifa->ifa_rt_priority = new_metric;
1004                 }
1005
1006                 ifa->ifa_proto = new_proto;
1007
1008                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1009                 cancel_delayed_work(&check_lifetime_work);
1010                 queue_delayed_work(system_power_efficient_wq,
1011                                 &check_lifetime_work, 0);
1012                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1013         }
1014         return 0;
1015 }
1016
1017 /*
1018  *      Determine a default network mask, based on the IP address.
1019  */
1020
1021 static int inet_abc_len(__be32 addr)
1022 {
1023         int rc = -1;    /* Something else, probably a multicast. */
1024
1025         if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1026                 rc = 0;
1027         else {
1028                 __u32 haddr = ntohl(addr);
1029                 if (IN_CLASSA(haddr))
1030                         rc = 8;
1031                 else if (IN_CLASSB(haddr))
1032                         rc = 16;
1033                 else if (IN_CLASSC(haddr))
1034                         rc = 24;
1035                 else if (IN_CLASSE(haddr))
1036                         rc = 32;
1037         }
1038
1039         return rc;
1040 }
1041
1042
1043 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1044 {
1045         struct sockaddr_in sin_orig;
1046         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1047         struct in_ifaddr __rcu **ifap = NULL;
1048         struct in_device *in_dev;
1049         struct in_ifaddr *ifa = NULL;
1050         struct net_device *dev;
1051         char *colon;
1052         int ret = -EFAULT;
1053         int tryaddrmatch = 0;
1054
1055         ifr->ifr_name[IFNAMSIZ - 1] = 0;
1056
1057         /* save original address for comparison */
1058         memcpy(&sin_orig, sin, sizeof(*sin));
1059
1060         colon = strchr(ifr->ifr_name, ':');
1061         if (colon)
1062                 *colon = 0;
1063
1064         dev_load(net, ifr->ifr_name);
1065
1066         switch (cmd) {
1067         case SIOCGIFADDR:       /* Get interface address */
1068         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1069         case SIOCGIFDSTADDR:    /* Get the destination address */
1070         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1071                 /* Note that these ioctls will not sleep,
1072                    so that we do not impose a lock.
1073                    One day we will be forced to put shlock here (I mean SMP)
1074                  */
1075                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1076                 memset(sin, 0, sizeof(*sin));
1077                 sin->sin_family = AF_INET;
1078                 break;
1079
1080         case SIOCSIFFLAGS:
1081                 ret = -EPERM;
1082                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1083                         goto out;
1084                 break;
1085         case SIOCSIFADDR:       /* Set interface address (and family) */
1086         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1087         case SIOCSIFDSTADDR:    /* Set the destination address */
1088         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1089                 ret = -EPERM;
1090                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1091                         goto out;
1092                 ret = -EINVAL;
1093                 if (sin->sin_family != AF_INET)
1094                         goto out;
1095                 break;
1096         default:
1097                 ret = -EINVAL;
1098                 goto out;
1099         }
1100
1101         rtnl_lock();
1102
1103         ret = -ENODEV;
1104         dev = __dev_get_by_name(net, ifr->ifr_name);
1105         if (!dev)
1106                 goto done;
1107
1108         if (colon)
1109                 *colon = ':';
1110
1111         in_dev = __in_dev_get_rtnl(dev);
1112         if (in_dev) {
1113                 if (tryaddrmatch) {
1114                         /* Matthias Andree */
1115                         /* compare label and address (4.4BSD style) */
1116                         /* note: we only do this for a limited set of ioctls
1117                            and only if the original address family was AF_INET.
1118                            This is checked above. */
1119
1120                         for (ifap = &in_dev->ifa_list;
1121                              (ifa = rtnl_dereference(*ifap)) != NULL;
1122                              ifap = &ifa->ifa_next) {
1123                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1124                                     sin_orig.sin_addr.s_addr ==
1125                                                         ifa->ifa_local) {
1126                                         break; /* found */
1127                                 }
1128                         }
1129                 }
1130                 /* we didn't get a match, maybe the application is
1131                    4.3BSD-style and passed in junk so we fall back to
1132                    comparing just the label */
1133                 if (!ifa) {
1134                         for (ifap = &in_dev->ifa_list;
1135                              (ifa = rtnl_dereference(*ifap)) != NULL;
1136                              ifap = &ifa->ifa_next)
1137                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1138                                         break;
1139                 }
1140         }
1141
1142         ret = -EADDRNOTAVAIL;
1143         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1144                 goto done;
1145
1146         switch (cmd) {
1147         case SIOCGIFADDR:       /* Get interface address */
1148                 ret = 0;
1149                 sin->sin_addr.s_addr = ifa->ifa_local;
1150                 break;
1151
1152         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1153                 ret = 0;
1154                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1155                 break;
1156
1157         case SIOCGIFDSTADDR:    /* Get the destination address */
1158                 ret = 0;
1159                 sin->sin_addr.s_addr = ifa->ifa_address;
1160                 break;
1161
1162         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1163                 ret = 0;
1164                 sin->sin_addr.s_addr = ifa->ifa_mask;
1165                 break;
1166
1167         case SIOCSIFFLAGS:
1168                 if (colon) {
1169                         ret = -EADDRNOTAVAIL;
1170                         if (!ifa)
1171                                 break;
1172                         ret = 0;
1173                         if (!(ifr->ifr_flags & IFF_UP))
1174                                 inet_del_ifa(in_dev, ifap, 1);
1175                         break;
1176                 }
1177                 ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1178                 break;
1179
1180         case SIOCSIFADDR:       /* Set interface address (and family) */
1181                 ret = -EINVAL;
1182                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1183                         break;
1184
1185                 if (!ifa) {
1186                         ret = -ENOBUFS;
1187                         ifa = inet_alloc_ifa();
1188                         if (!ifa)
1189                                 break;
1190                         INIT_HLIST_NODE(&ifa->hash);
1191                         if (colon)
1192                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1193                         else
1194                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1195                 } else {
1196                         ret = 0;
1197                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1198                                 break;
1199                         inet_del_ifa(in_dev, ifap, 0);
1200                         ifa->ifa_broadcast = 0;
1201                         ifa->ifa_scope = 0;
1202                 }
1203
1204                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1205
1206                 if (!(dev->flags & IFF_POINTOPOINT)) {
1207                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1208                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1209                         if ((dev->flags & IFF_BROADCAST) &&
1210                             ifa->ifa_prefixlen < 31)
1211                                 ifa->ifa_broadcast = ifa->ifa_address |
1212                                                      ~ifa->ifa_mask;
1213                 } else {
1214                         ifa->ifa_prefixlen = 32;
1215                         ifa->ifa_mask = inet_make_mask(32);
1216                 }
1217                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1218                 ret = inet_set_ifa(dev, ifa);
1219                 break;
1220
1221         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1222                 ret = 0;
1223                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1224                         inet_del_ifa(in_dev, ifap, 0);
1225                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1226                         inet_insert_ifa(ifa);
1227                 }
1228                 break;
1229
1230         case SIOCSIFDSTADDR:    /* Set the destination address */
1231                 ret = 0;
1232                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1233                         break;
1234                 ret = -EINVAL;
1235                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1236                         break;
1237                 ret = 0;
1238                 inet_del_ifa(in_dev, ifap, 0);
1239                 ifa->ifa_address = sin->sin_addr.s_addr;
1240                 inet_insert_ifa(ifa);
1241                 break;
1242
1243         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1244
1245                 /*
1246                  *      The mask we set must be legal.
1247                  */
1248                 ret = -EINVAL;
1249                 if (bad_mask(sin->sin_addr.s_addr, 0))
1250                         break;
1251                 ret = 0;
1252                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1253                         __be32 old_mask = ifa->ifa_mask;
1254                         inet_del_ifa(in_dev, ifap, 0);
1255                         ifa->ifa_mask = sin->sin_addr.s_addr;
1256                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1257
1258                         /* See if current broadcast address matches
1259                          * with current netmask, then recalculate
1260                          * the broadcast address. Otherwise it's a
1261                          * funny address, so don't touch it since
1262                          * the user seems to know what (s)he's doing...
1263                          */
1264                         if ((dev->flags & IFF_BROADCAST) &&
1265                             (ifa->ifa_prefixlen < 31) &&
1266                             (ifa->ifa_broadcast ==
1267                              (ifa->ifa_local|~old_mask))) {
1268                                 ifa->ifa_broadcast = (ifa->ifa_local |
1269                                                       ~sin->sin_addr.s_addr);
1270                         }
1271                         inet_insert_ifa(ifa);
1272                 }
1273                 break;
1274         }
1275 done:
1276         rtnl_unlock();
1277 out:
1278         return ret;
1279 }
1280
1281 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1282 {
1283         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1284         const struct in_ifaddr *ifa;
1285         struct ifreq ifr;
1286         int done = 0;
1287
1288         if (WARN_ON(size > sizeof(struct ifreq)))
1289                 goto out;
1290
1291         if (!in_dev)
1292                 goto out;
1293
1294         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1295                 if (!buf) {
1296                         done += size;
1297                         continue;
1298                 }
1299                 if (len < size)
1300                         break;
1301                 memset(&ifr, 0, sizeof(struct ifreq));
1302                 strcpy(ifr.ifr_name, ifa->ifa_label);
1303
1304                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1305                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1306                                                                 ifa->ifa_local;
1307
1308                 if (copy_to_user(buf + done, &ifr, size)) {
1309                         done = -EFAULT;
1310                         break;
1311                 }
1312                 len  -= size;
1313                 done += size;
1314         }
1315 out:
1316         return done;
1317 }
1318
1319 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1320                                  int scope)
1321 {
1322         const struct in_ifaddr *ifa;
1323
1324         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1325                 if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1326                         continue;
1327                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1328                     ifa->ifa_scope <= scope)
1329                         return ifa->ifa_local;
1330         }
1331
1332         return 0;
1333 }
1334
1335 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1336 {
1337         const struct in_ifaddr *ifa;
1338         __be32 addr = 0;
1339         unsigned char localnet_scope = RT_SCOPE_HOST;
1340         struct in_device *in_dev;
1341         struct net *net = dev_net(dev);
1342         int master_idx;
1343
1344         rcu_read_lock();
1345         in_dev = __in_dev_get_rcu(dev);
1346         if (!in_dev)
1347                 goto no_in_dev;
1348
1349         if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1350                 localnet_scope = RT_SCOPE_LINK;
1351
1352         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1353                 if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1354                         continue;
1355                 if (min(ifa->ifa_scope, localnet_scope) > scope)
1356                         continue;
1357                 if (!dst || inet_ifa_match(dst, ifa)) {
1358                         addr = ifa->ifa_local;
1359                         break;
1360                 }
1361                 if (!addr)
1362                         addr = ifa->ifa_local;
1363         }
1364
1365         if (addr)
1366                 goto out_unlock;
1367 no_in_dev:
1368         master_idx = l3mdev_master_ifindex_rcu(dev);
1369
1370         /* For VRFs, the VRF device takes the place of the loopback device,
1371          * with addresses on it being preferred.  Note in such cases the
1372          * loopback device will be among the devices that fail the master_idx
1373          * equality check in the loop below.
1374          */
1375         if (master_idx &&
1376             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1377             (in_dev = __in_dev_get_rcu(dev))) {
1378                 addr = in_dev_select_addr(in_dev, scope);
1379                 if (addr)
1380                         goto out_unlock;
1381         }
1382
1383         /* Not loopback addresses on loopback should be preferred
1384            in this case. It is important that lo is the first interface
1385            in dev_base list.
1386          */
1387         for_each_netdev_rcu(net, dev) {
1388                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1389                         continue;
1390
1391                 in_dev = __in_dev_get_rcu(dev);
1392                 if (!in_dev)
1393                         continue;
1394
1395                 addr = in_dev_select_addr(in_dev, scope);
1396                 if (addr)
1397                         goto out_unlock;
1398         }
1399 out_unlock:
1400         rcu_read_unlock();
1401         return addr;
1402 }
1403 EXPORT_SYMBOL(inet_select_addr);
1404
1405 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1406                               __be32 local, int scope)
1407 {
1408         unsigned char localnet_scope = RT_SCOPE_HOST;
1409         const struct in_ifaddr *ifa;
1410         __be32 addr = 0;
1411         int same = 0;
1412
1413         if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1414                 localnet_scope = RT_SCOPE_LINK;
1415
1416         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1417                 unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1418
1419                 if (!addr &&
1420                     (local == ifa->ifa_local || !local) &&
1421                     min_scope <= scope) {
1422                         addr = ifa->ifa_local;
1423                         if (same)
1424                                 break;
1425                 }
1426                 if (!same) {
1427                         same = (!local || inet_ifa_match(local, ifa)) &&
1428                                 (!dst || inet_ifa_match(dst, ifa));
1429                         if (same && addr) {
1430                                 if (local || !dst)
1431                                         break;
1432                                 /* Is the selected addr into dst subnet? */
1433                                 if (inet_ifa_match(addr, ifa))
1434                                         break;
1435                                 /* No, then can we use new local src? */
1436                                 if (min_scope <= scope) {
1437                                         addr = ifa->ifa_local;
1438                                         break;
1439                                 }
1440                                 /* search for large dst subnet for addr */
1441                                 same = 0;
1442                         }
1443                 }
1444         }
1445
1446         return same ? addr : 0;
1447 }
1448
1449 /*
1450  * Confirm that local IP address exists using wildcards:
1451  * - net: netns to check, cannot be NULL
1452  * - in_dev: only on this interface, NULL=any interface
1453  * - dst: only in the same subnet as dst, 0=any dst
1454  * - local: address, 0=autoselect the local address
1455  * - scope: maximum allowed scope value for the local address
1456  */
1457 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1458                          __be32 dst, __be32 local, int scope)
1459 {
1460         __be32 addr = 0;
1461         struct net_device *dev;
1462
1463         if (in_dev)
1464                 return confirm_addr_indev(in_dev, dst, local, scope);
1465
1466         rcu_read_lock();
1467         for_each_netdev_rcu(net, dev) {
1468                 in_dev = __in_dev_get_rcu(dev);
1469                 if (in_dev) {
1470                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1471                         if (addr)
1472                                 break;
1473                 }
1474         }
1475         rcu_read_unlock();
1476
1477         return addr;
1478 }
1479 EXPORT_SYMBOL(inet_confirm_addr);
1480
1481 /*
1482  *      Device notifier
1483  */
1484
1485 int register_inetaddr_notifier(struct notifier_block *nb)
1486 {
1487         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1488 }
1489 EXPORT_SYMBOL(register_inetaddr_notifier);
1490
1491 int unregister_inetaddr_notifier(struct notifier_block *nb)
1492 {
1493         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1494 }
1495 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1496
1497 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1498 {
1499         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1500 }
1501 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1502
1503 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1504 {
1505         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1506             nb);
1507 }
1508 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1509
1510 /* Rename ifa_labels for a device name change. Make some effort to preserve
1511  * existing alias numbering and to create unique labels if possible.
1512 */
1513 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1514 {
1515         struct in_ifaddr *ifa;
1516         int named = 0;
1517
1518         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1519                 char old[IFNAMSIZ], *dot;
1520
1521                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1522                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1523                 if (named++ == 0)
1524                         goto skip;
1525                 dot = strchr(old, ':');
1526                 if (!dot) {
1527                         sprintf(old, ":%d", named);
1528                         dot = old;
1529                 }
1530                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1531                         strcat(ifa->ifa_label, dot);
1532                 else
1533                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1534 skip:
1535                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1536         }
1537 }
1538
1539 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1540                                         struct in_device *in_dev)
1541
1542 {
1543         const struct in_ifaddr *ifa;
1544
1545         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1546                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1547                          ifa->ifa_local, dev,
1548                          ifa->ifa_local, NULL,
1549                          dev->dev_addr, NULL);
1550         }
1551 }
1552
1553 /* Called only under RTNL semaphore */
1554
1555 static int inetdev_event(struct notifier_block *this, unsigned long event,
1556                          void *ptr)
1557 {
1558         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1559         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1560
1561         ASSERT_RTNL();
1562
1563         if (!in_dev) {
1564                 if (event == NETDEV_REGISTER) {
1565                         in_dev = inetdev_init(dev);
1566                         if (IS_ERR(in_dev))
1567                                 return notifier_from_errno(PTR_ERR(in_dev));
1568                         if (dev->flags & IFF_LOOPBACK) {
1569                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1570                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1571                         }
1572                 } else if (event == NETDEV_CHANGEMTU) {
1573                         /* Re-enabling IP */
1574                         if (inetdev_valid_mtu(dev->mtu))
1575                                 in_dev = inetdev_init(dev);
1576                 }
1577                 goto out;
1578         }
1579
1580         switch (event) {
1581         case NETDEV_REGISTER:
1582                 pr_debug("%s: bug\n", __func__);
1583                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1584                 break;
1585         case NETDEV_UP:
1586                 if (!inetdev_valid_mtu(dev->mtu))
1587                         break;
1588                 if (dev->flags & IFF_LOOPBACK) {
1589                         struct in_ifaddr *ifa = inet_alloc_ifa();
1590
1591                         if (ifa) {
1592                                 INIT_HLIST_NODE(&ifa->hash);
1593                                 ifa->ifa_local =
1594                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1595                                 ifa->ifa_prefixlen = 8;
1596                                 ifa->ifa_mask = inet_make_mask(8);
1597                                 in_dev_hold(in_dev);
1598                                 ifa->ifa_dev = in_dev;
1599                                 ifa->ifa_scope = RT_SCOPE_HOST;
1600                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1601                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1602                                                  INFINITY_LIFE_TIME);
1603                                 ipv4_devconf_setall(in_dev);
1604                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1605                                 inet_insert_ifa(ifa);
1606                         }
1607                 }
1608                 ip_mc_up(in_dev);
1609                 fallthrough;
1610         case NETDEV_CHANGEADDR:
1611                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1612                         break;
1613                 fallthrough;
1614         case NETDEV_NOTIFY_PEERS:
1615                 /* Send gratuitous ARP to notify of link change */
1616                 inetdev_send_gratuitous_arp(dev, in_dev);
1617                 break;
1618         case NETDEV_DOWN:
1619                 ip_mc_down(in_dev);
1620                 break;
1621         case NETDEV_PRE_TYPE_CHANGE:
1622                 ip_mc_unmap(in_dev);
1623                 break;
1624         case NETDEV_POST_TYPE_CHANGE:
1625                 ip_mc_remap(in_dev);
1626                 break;
1627         case NETDEV_CHANGEMTU:
1628                 if (inetdev_valid_mtu(dev->mtu))
1629                         break;
1630                 /* disable IP when MTU is not enough */
1631                 fallthrough;
1632         case NETDEV_UNREGISTER:
1633                 inetdev_destroy(in_dev);
1634                 break;
1635         case NETDEV_CHANGENAME:
1636                 /* Do not notify about label change, this event is
1637                  * not interesting to applications using netlink.
1638                  */
1639                 inetdev_changename(dev, in_dev);
1640
1641                 devinet_sysctl_unregister(in_dev);
1642                 devinet_sysctl_register(in_dev);
1643                 break;
1644         }
1645 out:
1646         return NOTIFY_DONE;
1647 }
1648
1649 static struct notifier_block ip_netdev_notifier = {
1650         .notifier_call = inetdev_event,
1651 };
1652
1653 static size_t inet_nlmsg_size(void)
1654 {
1655         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1656                + nla_total_size(4) /* IFA_ADDRESS */
1657                + nla_total_size(4) /* IFA_LOCAL */
1658                + nla_total_size(4) /* IFA_BROADCAST */
1659                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1660                + nla_total_size(4)  /* IFA_FLAGS */
1661                + nla_total_size(1)  /* IFA_PROTO */
1662                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1663                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1664 }
1665
1666 static inline u32 cstamp_delta(unsigned long cstamp)
1667 {
1668         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1669 }
1670
1671 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1672                          unsigned long tstamp, u32 preferred, u32 valid)
1673 {
1674         struct ifa_cacheinfo ci;
1675
1676         ci.cstamp = cstamp_delta(cstamp);
1677         ci.tstamp = cstamp_delta(tstamp);
1678         ci.ifa_prefered = preferred;
1679         ci.ifa_valid = valid;
1680
1681         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1682 }
1683
1684 static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
1685                             struct inet_fill_args *args)
1686 {
1687         struct ifaddrmsg *ifm;
1688         struct nlmsghdr  *nlh;
1689         unsigned long tstamp;
1690         u32 preferred, valid;
1691         u32 flags;
1692
1693         nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1694                         args->flags);
1695         if (!nlh)
1696                 return -EMSGSIZE;
1697
1698         ifm = nlmsg_data(nlh);
1699         ifm->ifa_family = AF_INET;
1700         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1701
1702         flags = READ_ONCE(ifa->ifa_flags);
1703         /* Warning : ifm->ifa_flags is an __u8, it holds only 8 bits.
1704          * The 32bit value is given in IFA_FLAGS attribute.
1705          */
1706         ifm->ifa_flags = (__u8)flags;
1707
1708         ifm->ifa_scope = ifa->ifa_scope;
1709         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1710
1711         if (args->netnsid >= 0 &&
1712             nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1713                 goto nla_put_failure;
1714
1715         tstamp = READ_ONCE(ifa->ifa_tstamp);
1716         if (!(flags & IFA_F_PERMANENT)) {
1717                 preferred = READ_ONCE(ifa->ifa_preferred_lft);
1718                 valid = READ_ONCE(ifa->ifa_valid_lft);
1719                 if (preferred != INFINITY_LIFE_TIME) {
1720                         long tval = (jiffies - tstamp) / HZ;
1721
1722                         if (preferred > tval)
1723                                 preferred -= tval;
1724                         else
1725                                 preferred = 0;
1726                         if (valid != INFINITY_LIFE_TIME) {
1727                                 if (valid > tval)
1728                                         valid -= tval;
1729                                 else
1730                                         valid = 0;
1731                         }
1732                 }
1733         } else {
1734                 preferred = INFINITY_LIFE_TIME;
1735                 valid = INFINITY_LIFE_TIME;
1736         }
1737         if ((ifa->ifa_address &&
1738              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1739             (ifa->ifa_local &&
1740              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1741             (ifa->ifa_broadcast &&
1742              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1743             (ifa->ifa_label[0] &&
1744              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1745             (ifa->ifa_proto &&
1746              nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1747             nla_put_u32(skb, IFA_FLAGS, flags) ||
1748             (ifa->ifa_rt_priority &&
1749              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1750             put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
1751                           preferred, valid))
1752                 goto nla_put_failure;
1753
1754         nlmsg_end(skb, nlh);
1755         return 0;
1756
1757 nla_put_failure:
1758         nlmsg_cancel(skb, nlh);
1759         return -EMSGSIZE;
1760 }
1761
1762 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1763                                       struct inet_fill_args *fillargs,
1764                                       struct net **tgt_net, struct sock *sk,
1765                                       struct netlink_callback *cb)
1766 {
1767         struct netlink_ext_ack *extack = cb->extack;
1768         struct nlattr *tb[IFA_MAX+1];
1769         struct ifaddrmsg *ifm;
1770         int err, i;
1771
1772         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1773                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1774                 return -EINVAL;
1775         }
1776
1777         ifm = nlmsg_data(nlh);
1778         if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1779                 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1780                 return -EINVAL;
1781         }
1782
1783         fillargs->ifindex = ifm->ifa_index;
1784         if (fillargs->ifindex) {
1785                 cb->answer_flags |= NLM_F_DUMP_FILTERED;
1786                 fillargs->flags |= NLM_F_DUMP_FILTERED;
1787         }
1788
1789         err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1790                                             ifa_ipv4_policy, extack);
1791         if (err < 0)
1792                 return err;
1793
1794         for (i = 0; i <= IFA_MAX; ++i) {
1795                 if (!tb[i])
1796                         continue;
1797
1798                 if (i == IFA_TARGET_NETNSID) {
1799                         struct net *net;
1800
1801                         fillargs->netnsid = nla_get_s32(tb[i]);
1802
1803                         net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1804                         if (IS_ERR(net)) {
1805                                 fillargs->netnsid = -1;
1806                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1807                                 return PTR_ERR(net);
1808                         }
1809                         *tgt_net = net;
1810                 } else {
1811                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1812                         return -EINVAL;
1813                 }
1814         }
1815
1816         return 0;
1817 }
1818
1819 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1820                             struct netlink_callback *cb, int *s_ip_idx,
1821                             struct inet_fill_args *fillargs)
1822 {
1823         struct in_ifaddr *ifa;
1824         int ip_idx = 0;
1825         int err;
1826
1827         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1828                 if (ip_idx < *s_ip_idx) {
1829                         ip_idx++;
1830                         continue;
1831                 }
1832                 err = inet_fill_ifaddr(skb, ifa, fillargs);
1833                 if (err < 0)
1834                         goto done;
1835
1836                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1837                 ip_idx++;
1838         }
1839         err = 0;
1840         ip_idx = 0;
1841 done:
1842         *s_ip_idx = ip_idx;
1843
1844         return err;
1845 }
1846
1847 /* Combine dev_addr_genid and dev_base_seq to detect changes.
1848  */
1849 static u32 inet_base_seq(const struct net *net)
1850 {
1851         u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1852                   READ_ONCE(net->dev_base_seq);
1853
1854         /* Must not return 0 (see nl_dump_check_consistent()).
1855          * Chose a value far away from 0.
1856          */
1857         if (!res)
1858                 res = 0x80000000;
1859         return res;
1860 }
1861
1862 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1863 {
1864         const struct nlmsghdr *nlh = cb->nlh;
1865         struct inet_fill_args fillargs = {
1866                 .portid = NETLINK_CB(cb->skb).portid,
1867                 .seq = nlh->nlmsg_seq,
1868                 .event = RTM_NEWADDR,
1869                 .flags = NLM_F_MULTI,
1870                 .netnsid = -1,
1871         };
1872         struct net *net = sock_net(skb->sk);
1873         struct net *tgt_net = net;
1874         struct {
1875                 unsigned long ifindex;
1876                 int ip_idx;
1877         } *ctx = (void *)cb->ctx;
1878         struct in_device *in_dev;
1879         struct net_device *dev;
1880         int err = 0;
1881
1882         rcu_read_lock();
1883         if (cb->strict_check) {
1884                 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1885                                                  skb->sk, cb);
1886                 if (err < 0)
1887                         goto done;
1888
1889                 if (fillargs.ifindex) {
1890                         dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
1891                         if (!dev) {
1892                                 err = -ENODEV;
1893                                 goto done;
1894                         }
1895                         in_dev = __in_dev_get_rcu(dev);
1896                         if (!in_dev)
1897                                 goto done;
1898                         err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1899                                                &fillargs);
1900                         goto done;
1901                 }
1902         }
1903
1904         cb->seq = inet_base_seq(tgt_net);
1905
1906         for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
1907                 in_dev = __in_dev_get_rcu(dev);
1908                 if (!in_dev)
1909                         continue;
1910                 err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1911                                        &fillargs);
1912                 if (err < 0)
1913                         goto done;
1914         }
1915 done:
1916         if (fillargs.netnsid >= 0)
1917                 put_net(tgt_net);
1918         rcu_read_unlock();
1919         return err;
1920 }
1921
1922 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1923                       u32 portid)
1924 {
1925         struct inet_fill_args fillargs = {
1926                 .portid = portid,
1927                 .seq = nlh ? nlh->nlmsg_seq : 0,
1928                 .event = event,
1929                 .flags = 0,
1930                 .netnsid = -1,
1931         };
1932         struct sk_buff *skb;
1933         int err = -ENOBUFS;
1934         struct net *net;
1935
1936         net = dev_net(ifa->ifa_dev->dev);
1937         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1938         if (!skb)
1939                 goto errout;
1940
1941         err = inet_fill_ifaddr(skb, ifa, &fillargs);
1942         if (err < 0) {
1943                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1944                 WARN_ON(err == -EMSGSIZE);
1945                 kfree_skb(skb);
1946                 goto errout;
1947         }
1948         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1949         return;
1950 errout:
1951         if (err < 0)
1952                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1953 }
1954
1955 static size_t inet_get_link_af_size(const struct net_device *dev,
1956                                     u32 ext_filter_mask)
1957 {
1958         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1959
1960         if (!in_dev)
1961                 return 0;
1962
1963         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1964 }
1965
1966 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1967                              u32 ext_filter_mask)
1968 {
1969         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1970         struct nlattr *nla;
1971         int i;
1972
1973         if (!in_dev)
1974                 return -ENODATA;
1975
1976         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1977         if (!nla)
1978                 return -EMSGSIZE;
1979
1980         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1981                 ((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
1982
1983         return 0;
1984 }
1985
1986 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1987         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1988 };
1989
1990 static int inet_validate_link_af(const struct net_device *dev,
1991                                  const struct nlattr *nla,
1992                                  struct netlink_ext_ack *extack)
1993 {
1994         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1995         int err, rem;
1996
1997         if (dev && !__in_dev_get_rtnl(dev))
1998                 return -EAFNOSUPPORT;
1999
2000         err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
2001                                           inet_af_policy, extack);
2002         if (err < 0)
2003                 return err;
2004
2005         if (tb[IFLA_INET_CONF]) {
2006                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
2007                         int cfgid = nla_type(a);
2008
2009                         if (nla_len(a) < 4)
2010                                 return -EINVAL;
2011
2012                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2013                                 return -EINVAL;
2014                 }
2015         }
2016
2017         return 0;
2018 }
2019
2020 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2021                             struct netlink_ext_ack *extack)
2022 {
2023         struct in_device *in_dev = __in_dev_get_rtnl(dev);
2024         struct nlattr *a, *tb[IFLA_INET_MAX+1];
2025         int rem;
2026
2027         if (!in_dev)
2028                 return -EAFNOSUPPORT;
2029
2030         if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2031                 return -EINVAL;
2032
2033         if (tb[IFLA_INET_CONF]) {
2034                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2035                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2036         }
2037
2038         return 0;
2039 }
2040
2041 static int inet_netconf_msgsize_devconf(int type)
2042 {
2043         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2044                    + nla_total_size(4); /* NETCONFA_IFINDEX */
2045         bool all = false;
2046
2047         if (type == NETCONFA_ALL)
2048                 all = true;
2049
2050         if (all || type == NETCONFA_FORWARDING)
2051                 size += nla_total_size(4);
2052         if (all || type == NETCONFA_RP_FILTER)
2053                 size += nla_total_size(4);
2054         if (all || type == NETCONFA_MC_FORWARDING)
2055                 size += nla_total_size(4);
2056         if (all || type == NETCONFA_BC_FORWARDING)
2057                 size += nla_total_size(4);
2058         if (all || type == NETCONFA_PROXY_NEIGH)
2059                 size += nla_total_size(4);
2060         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2061                 size += nla_total_size(4);
2062
2063         return size;
2064 }
2065
2066 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2067                                      const struct ipv4_devconf *devconf,
2068                                      u32 portid, u32 seq, int event,
2069                                      unsigned int flags, int type)
2070 {
2071         struct nlmsghdr  *nlh;
2072         struct netconfmsg *ncm;
2073         bool all = false;
2074
2075         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2076                         flags);
2077         if (!nlh)
2078                 return -EMSGSIZE;
2079
2080         if (type == NETCONFA_ALL)
2081                 all = true;
2082
2083         ncm = nlmsg_data(nlh);
2084         ncm->ncm_family = AF_INET;
2085
2086         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2087                 goto nla_put_failure;
2088
2089         if (!devconf)
2090                 goto out;
2091
2092         if ((all || type == NETCONFA_FORWARDING) &&
2093             nla_put_s32(skb, NETCONFA_FORWARDING,
2094                         IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
2095                 goto nla_put_failure;
2096         if ((all || type == NETCONFA_RP_FILTER) &&
2097             nla_put_s32(skb, NETCONFA_RP_FILTER,
2098                         IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
2099                 goto nla_put_failure;
2100         if ((all || type == NETCONFA_MC_FORWARDING) &&
2101             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2102                         IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
2103                 goto nla_put_failure;
2104         if ((all || type == NETCONFA_BC_FORWARDING) &&
2105             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2106                         IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
2107                 goto nla_put_failure;
2108         if ((all || type == NETCONFA_PROXY_NEIGH) &&
2109             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2110                         IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
2111                 goto nla_put_failure;
2112         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2113             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2114                         IPV4_DEVCONF_RO(*devconf,
2115                                         IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2116                 goto nla_put_failure;
2117
2118 out:
2119         nlmsg_end(skb, nlh);
2120         return 0;
2121
2122 nla_put_failure:
2123         nlmsg_cancel(skb, nlh);
2124         return -EMSGSIZE;
2125 }
2126
2127 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2128                                  int ifindex, struct ipv4_devconf *devconf)
2129 {
2130         struct sk_buff *skb;
2131         int err = -ENOBUFS;
2132
2133         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2134         if (!skb)
2135                 goto errout;
2136
2137         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2138                                         event, 0, type);
2139         if (err < 0) {
2140                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2141                 WARN_ON(err == -EMSGSIZE);
2142                 kfree_skb(skb);
2143                 goto errout;
2144         }
2145         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2146         return;
2147 errout:
2148         if (err < 0)
2149                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2150 }
2151
2152 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2153         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
2154         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
2155         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
2156         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
2157         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
2158 };
2159
2160 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2161                                       const struct nlmsghdr *nlh,
2162                                       struct nlattr **tb,
2163                                       struct netlink_ext_ack *extack)
2164 {
2165         int i, err;
2166
2167         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2168                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2169                 return -EINVAL;
2170         }
2171
2172         if (!netlink_strict_get_check(skb))
2173                 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2174                                               tb, NETCONFA_MAX,
2175                                               devconf_ipv4_policy, extack);
2176
2177         err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2178                                             tb, NETCONFA_MAX,
2179                                             devconf_ipv4_policy, extack);
2180         if (err)
2181                 return err;
2182
2183         for (i = 0; i <= NETCONFA_MAX; i++) {
2184                 if (!tb[i])
2185                         continue;
2186
2187                 switch (i) {
2188                 case NETCONFA_IFINDEX:
2189                         break;
2190                 default:
2191                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2192                         return -EINVAL;
2193                 }
2194         }
2195
2196         return 0;
2197 }
2198
2199 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2200                                     struct nlmsghdr *nlh,
2201                                     struct netlink_ext_ack *extack)
2202 {
2203         struct net *net = sock_net(in_skb->sk);
2204         struct nlattr *tb[NETCONFA_MAX + 1];
2205         const struct ipv4_devconf *devconf;
2206         struct in_device *in_dev = NULL;
2207         struct net_device *dev = NULL;
2208         struct sk_buff *skb;
2209         int ifindex;
2210         int err;
2211
2212         err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2213         if (err)
2214                 return err;
2215
2216         if (!tb[NETCONFA_IFINDEX])
2217                 return -EINVAL;
2218
2219         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2220         switch (ifindex) {
2221         case NETCONFA_IFINDEX_ALL:
2222                 devconf = net->ipv4.devconf_all;
2223                 break;
2224         case NETCONFA_IFINDEX_DEFAULT:
2225                 devconf = net->ipv4.devconf_dflt;
2226                 break;
2227         default:
2228                 err = -ENODEV;
2229                 dev = dev_get_by_index(net, ifindex);
2230                 if (dev)
2231                         in_dev = in_dev_get(dev);
2232                 if (!in_dev)
2233                         goto errout;
2234                 devconf = &in_dev->cnf;
2235                 break;
2236         }
2237
2238         err = -ENOBUFS;
2239         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2240         if (!skb)
2241                 goto errout;
2242
2243         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2244                                         NETLINK_CB(in_skb).portid,
2245                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2246                                         NETCONFA_ALL);
2247         if (err < 0) {
2248                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2249                 WARN_ON(err == -EMSGSIZE);
2250                 kfree_skb(skb);
2251                 goto errout;
2252         }
2253         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2254 errout:
2255         if (in_dev)
2256                 in_dev_put(in_dev);
2257         dev_put(dev);
2258         return err;
2259 }
2260
2261 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2262                                      struct netlink_callback *cb)
2263 {
2264         const struct nlmsghdr *nlh = cb->nlh;
2265         struct net *net = sock_net(skb->sk);
2266         struct {
2267                 unsigned long ifindex;
2268                 unsigned int all_default;
2269         } *ctx = (void *)cb->ctx;
2270         const struct in_device *in_dev;
2271         struct net_device *dev;
2272         int err = 0;
2273
2274         if (cb->strict_check) {
2275                 struct netlink_ext_ack *extack = cb->extack;
2276                 struct netconfmsg *ncm;
2277
2278                 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2279                         NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2280                         return -EINVAL;
2281                 }
2282
2283                 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2284                         NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2285                         return -EINVAL;
2286                 }
2287         }
2288
2289         rcu_read_lock();
2290         for_each_netdev_dump(net, dev, ctx->ifindex) {
2291                 in_dev = __in_dev_get_rcu(dev);
2292                 if (!in_dev)
2293                         continue;
2294                 err = inet_netconf_fill_devconf(skb, dev->ifindex,
2295                                                 &in_dev->cnf,
2296                                                 NETLINK_CB(cb->skb).portid,
2297                                                 nlh->nlmsg_seq,
2298                                                 RTM_NEWNETCONF, NLM_F_MULTI,
2299                                                 NETCONFA_ALL);
2300                 if (err < 0)
2301                         goto done;
2302         }
2303         if (ctx->all_default == 0) {
2304                 err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2305                                                 net->ipv4.devconf_all,
2306                                                 NETLINK_CB(cb->skb).portid,
2307                                                 nlh->nlmsg_seq,
2308                                                 RTM_NEWNETCONF, NLM_F_MULTI,
2309                                                 NETCONFA_ALL);
2310                 if (err < 0)
2311                         goto done;
2312                 ctx->all_default++;
2313         }
2314         if (ctx->all_default == 1) {
2315                 err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2316                                                 net->ipv4.devconf_dflt,
2317                                                 NETLINK_CB(cb->skb).portid,
2318                                                 nlh->nlmsg_seq,
2319                                                 RTM_NEWNETCONF, NLM_F_MULTI,
2320                                                 NETCONFA_ALL);
2321                 if (err < 0)
2322                         goto done;
2323                 ctx->all_default++;
2324         }
2325 done:
2326         rcu_read_unlock();
2327         return err;
2328 }
2329
2330 #ifdef CONFIG_SYSCTL
2331
2332 static void devinet_copy_dflt_conf(struct net *net, int i)
2333 {
2334         struct net_device *dev;
2335
2336         rcu_read_lock();
2337         for_each_netdev_rcu(net, dev) {
2338                 struct in_device *in_dev;
2339
2340                 in_dev = __in_dev_get_rcu(dev);
2341                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2342                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2343         }
2344         rcu_read_unlock();
2345 }
2346
2347 /* called with RTNL locked */
2348 static void inet_forward_change(struct net *net)
2349 {
2350         struct net_device *dev;
2351         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2352
2353         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2354         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2355         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2356                                     NETCONFA_FORWARDING,
2357                                     NETCONFA_IFINDEX_ALL,
2358                                     net->ipv4.devconf_all);
2359         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2360                                     NETCONFA_FORWARDING,
2361                                     NETCONFA_IFINDEX_DEFAULT,
2362                                     net->ipv4.devconf_dflt);
2363
2364         for_each_netdev(net, dev) {
2365                 struct in_device *in_dev;
2366
2367                 if (on)
2368                         dev_disable_lro(dev);
2369
2370                 in_dev = __in_dev_get_rtnl(dev);
2371                 if (in_dev) {
2372                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2373                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2374                                                     NETCONFA_FORWARDING,
2375                                                     dev->ifindex, &in_dev->cnf);
2376                 }
2377         }
2378 }
2379
2380 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2381 {
2382         if (cnf == net->ipv4.devconf_dflt)
2383                 return NETCONFA_IFINDEX_DEFAULT;
2384         else if (cnf == net->ipv4.devconf_all)
2385                 return NETCONFA_IFINDEX_ALL;
2386         else {
2387                 struct in_device *idev
2388                         = container_of(cnf, struct in_device, cnf);
2389                 return idev->dev->ifindex;
2390         }
2391 }
2392
2393 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2394                              void *buffer, size_t *lenp, loff_t *ppos)
2395 {
2396         int old_value = *(int *)ctl->data;
2397         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2398         int new_value = *(int *)ctl->data;
2399
2400         if (write) {
2401                 struct ipv4_devconf *cnf = ctl->extra1;
2402                 struct net *net = ctl->extra2;
2403                 int i = (int *)ctl->data - cnf->data;
2404                 int ifindex;
2405
2406                 set_bit(i, cnf->state);
2407
2408                 if (cnf == net->ipv4.devconf_dflt)
2409                         devinet_copy_dflt_conf(net, i);
2410                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2411                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2412                         if ((new_value == 0) && (old_value != 0))
2413                                 rt_cache_flush(net);
2414
2415                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2416                     new_value != old_value)
2417                         rt_cache_flush(net);
2418
2419                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2420                     new_value != old_value) {
2421                         ifindex = devinet_conf_ifindex(net, cnf);
2422                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2423                                                     NETCONFA_RP_FILTER,
2424                                                     ifindex, cnf);
2425                 }
2426                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2427                     new_value != old_value) {
2428                         ifindex = devinet_conf_ifindex(net, cnf);
2429                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2430                                                     NETCONFA_PROXY_NEIGH,
2431                                                     ifindex, cnf);
2432                 }
2433                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2434                     new_value != old_value) {
2435                         ifindex = devinet_conf_ifindex(net, cnf);
2436                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2437                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2438                                                     ifindex, cnf);
2439                 }
2440         }
2441
2442         return ret;
2443 }
2444
2445 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2446                                   void *buffer, size_t *lenp, loff_t *ppos)
2447 {
2448         int *valp = ctl->data;
2449         int val = *valp;
2450         loff_t pos = *ppos;
2451         struct net *net = ctl->extra2;
2452         int ret;
2453
2454         if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2455                 return -EPERM;
2456
2457         ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2458
2459         if (write && *valp != val) {
2460                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2461                         if (!rtnl_trylock()) {
2462                                 /* Restore the original values before restarting */
2463                                 *valp = val;
2464                                 *ppos = pos;
2465                                 return restart_syscall();
2466                         }
2467                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2468                                 inet_forward_change(net);
2469                         } else {
2470                                 struct ipv4_devconf *cnf = ctl->extra1;
2471                                 struct in_device *idev =
2472                                         container_of(cnf, struct in_device, cnf);
2473                                 if (*valp)
2474                                         dev_disable_lro(idev->dev);
2475                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2476                                                             NETCONFA_FORWARDING,
2477                                                             idev->dev->ifindex,
2478                                                             cnf);
2479                         }
2480                         rtnl_unlock();
2481                         rt_cache_flush(net);
2482                 } else
2483                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2484                                                     NETCONFA_FORWARDING,
2485                                                     NETCONFA_IFINDEX_DEFAULT,
2486                                                     net->ipv4.devconf_dflt);
2487         }
2488
2489         return ret;
2490 }
2491
2492 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2493                                 void *buffer, size_t *lenp, loff_t *ppos)
2494 {
2495         int *valp = ctl->data;
2496         int val = *valp;
2497         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2498         struct net *net = ctl->extra2;
2499
2500         if (write && *valp != val)
2501                 rt_cache_flush(net);
2502
2503         return ret;
2504 }
2505
2506 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2507         { \
2508                 .procname       = name, \
2509                 .data           = ipv4_devconf.data + \
2510                                   IPV4_DEVCONF_ ## attr - 1, \
2511                 .maxlen         = sizeof(int), \
2512                 .mode           = mval, \
2513                 .proc_handler   = proc, \
2514                 .extra1         = &ipv4_devconf, \
2515         }
2516
2517 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2518         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2519
2520 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2521         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2522
2523 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2524         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2525
2526 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2527         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2528
2529 static struct devinet_sysctl_table {
2530         struct ctl_table_header *sysctl_header;
2531         struct ctl_table devinet_vars[IPV4_DEVCONF_MAX];
2532 } devinet_sysctl = {
2533         .devinet_vars = {
2534                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2535                                              devinet_sysctl_forward),
2536                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2537                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2538
2539                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2540                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2541                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2542                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2543                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2544                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2545                                         "accept_source_route"),
2546                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2547                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2548                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2549                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2550                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2551                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2552                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2553                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2554                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2555                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2556                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2557                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2558                 DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2559                                         "arp_evict_nocarrier"),
2560                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2561                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2562                                         "force_igmp_version"),
2563                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2564                                         "igmpv2_unsolicited_report_interval"),
2565                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2566                                         "igmpv3_unsolicited_report_interval"),
2567                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2568                                         "ignore_routes_with_linkdown"),
2569                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2570                                         "drop_gratuitous_arp"),
2571
2572                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2573                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2574                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2575                                               "promote_secondaries"),
2576                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2577                                               "route_localnet"),
2578                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2579                                               "drop_unicast_in_l2_multicast"),
2580         },
2581 };
2582
2583 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2584                                      int ifindex, struct ipv4_devconf *p)
2585 {
2586         int i;
2587         struct devinet_sysctl_table *t;
2588         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2589
2590         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2591         if (!t)
2592                 goto out;
2593
2594         for (i = 0; i < ARRAY_SIZE(t->devinet_vars); i++) {
2595                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2596                 t->devinet_vars[i].extra1 = p;
2597                 t->devinet_vars[i].extra2 = net;
2598         }
2599
2600         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2601
2602         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2603         if (!t->sysctl_header)
2604                 goto free;
2605
2606         p->sysctl = t;
2607
2608         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2609                                     ifindex, p);
2610         return 0;
2611
2612 free:
2613         kfree(t);
2614 out:
2615         return -ENOMEM;
2616 }
2617
2618 static void __devinet_sysctl_unregister(struct net *net,
2619                                         struct ipv4_devconf *cnf, int ifindex)
2620 {
2621         struct devinet_sysctl_table *t = cnf->sysctl;
2622
2623         if (t) {
2624                 cnf->sysctl = NULL;
2625                 unregister_net_sysctl_table(t->sysctl_header);
2626                 kfree(t);
2627         }
2628
2629         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2630 }
2631
2632 static int devinet_sysctl_register(struct in_device *idev)
2633 {
2634         int err;
2635
2636         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2637                 return -EINVAL;
2638
2639         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2640         if (err)
2641                 return err;
2642         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2643                                         idev->dev->ifindex, &idev->cnf);
2644         if (err)
2645                 neigh_sysctl_unregister(idev->arp_parms);
2646         return err;
2647 }
2648
2649 static void devinet_sysctl_unregister(struct in_device *idev)
2650 {
2651         struct net *net = dev_net(idev->dev);
2652
2653         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2654         neigh_sysctl_unregister(idev->arp_parms);
2655 }
2656
2657 static struct ctl_table ctl_forward_entry[] = {
2658         {
2659                 .procname       = "ip_forward",
2660                 .data           = &ipv4_devconf.data[
2661                                         IPV4_DEVCONF_FORWARDING - 1],
2662                 .maxlen         = sizeof(int),
2663                 .mode           = 0644,
2664                 .proc_handler   = devinet_sysctl_forward,
2665                 .extra1         = &ipv4_devconf,
2666                 .extra2         = &init_net,
2667         },
2668 };
2669 #endif
2670
2671 static __net_init int devinet_init_net(struct net *net)
2672 {
2673         int err;
2674         struct ipv4_devconf *all, *dflt;
2675 #ifdef CONFIG_SYSCTL
2676         struct ctl_table *tbl;
2677         struct ctl_table_header *forw_hdr;
2678 #endif
2679
2680         err = -ENOMEM;
2681         all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2682         if (!all)
2683                 goto err_alloc_all;
2684
2685         dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2686         if (!dflt)
2687                 goto err_alloc_dflt;
2688
2689 #ifdef CONFIG_SYSCTL
2690         tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2691         if (!tbl)
2692                 goto err_alloc_ctl;
2693
2694         tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2695         tbl[0].extra1 = all;
2696         tbl[0].extra2 = net;
2697 #endif
2698
2699         if (!net_eq(net, &init_net)) {
2700                 switch (net_inherit_devconf()) {
2701                 case 3:
2702                         /* copy from the current netns */
2703                         memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2704                                sizeof(ipv4_devconf));
2705                         memcpy(dflt,
2706                                current->nsproxy->net_ns->ipv4.devconf_dflt,
2707                                sizeof(ipv4_devconf_dflt));
2708                         break;
2709                 case 0:
2710                 case 1:
2711                         /* copy from init_net */
2712                         memcpy(all, init_net.ipv4.devconf_all,
2713                                sizeof(ipv4_devconf));
2714                         memcpy(dflt, init_net.ipv4.devconf_dflt,
2715                                sizeof(ipv4_devconf_dflt));
2716                         break;
2717                 case 2:
2718                         /* use compiled values */
2719                         break;
2720                 }
2721         }
2722
2723 #ifdef CONFIG_SYSCTL
2724         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2725         if (err < 0)
2726                 goto err_reg_all;
2727
2728         err = __devinet_sysctl_register(net, "default",
2729                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2730         if (err < 0)
2731                 goto err_reg_dflt;
2732
2733         err = -ENOMEM;
2734         forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2735                                           ARRAY_SIZE(ctl_forward_entry));
2736         if (!forw_hdr)
2737                 goto err_reg_ctl;
2738         net->ipv4.forw_hdr = forw_hdr;
2739 #endif
2740
2741         net->ipv4.devconf_all = all;
2742         net->ipv4.devconf_dflt = dflt;
2743         return 0;
2744
2745 #ifdef CONFIG_SYSCTL
2746 err_reg_ctl:
2747         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2748 err_reg_dflt:
2749         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2750 err_reg_all:
2751         kfree(tbl);
2752 err_alloc_ctl:
2753 #endif
2754         kfree(dflt);
2755 err_alloc_dflt:
2756         kfree(all);
2757 err_alloc_all:
2758         return err;
2759 }
2760
2761 static __net_exit void devinet_exit_net(struct net *net)
2762 {
2763 #ifdef CONFIG_SYSCTL
2764         const struct ctl_table *tbl;
2765
2766         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2767         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2768         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2769                                     NETCONFA_IFINDEX_DEFAULT);
2770         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2771                                     NETCONFA_IFINDEX_ALL);
2772         kfree(tbl);
2773 #endif
2774         kfree(net->ipv4.devconf_dflt);
2775         kfree(net->ipv4.devconf_all);
2776 }
2777
2778 static __net_initdata struct pernet_operations devinet_ops = {
2779         .init = devinet_init_net,
2780         .exit = devinet_exit_net,
2781 };
2782
2783 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2784         .family           = AF_INET,
2785         .fill_link_af     = inet_fill_link_af,
2786         .get_link_af_size = inet_get_link_af_size,
2787         .validate_link_af = inet_validate_link_af,
2788         .set_link_af      = inet_set_link_af,
2789 };
2790
2791 void __init devinet_init(void)
2792 {
2793         int i;
2794
2795         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2796                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2797
2798         register_pernet_subsys(&devinet_ops);
2799         register_netdevice_notifier(&ip_netdev_notifier);
2800
2801         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2802
2803         rtnl_af_register(&inet_af_ops);
2804
2805         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2806         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2807         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
2808                       RTNL_FLAG_DUMP_UNLOCKED);
2809         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2810                       inet_netconf_dump_devconf,
2811                       RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
2812 }