fb4372cb1de1f2fdabdb2cfb9a8b2305234c939b
[linux-2.6-block.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 #include <linux/inetdevice.h>
42 #include <net/addrconf.h>
43
44 #define DEBUG
45 #define NEIGH_DEBUG 1
46 #define neigh_dbg(level, fmt, ...)              \
47 do {                                            \
48         if (level <= NEIGH_DEBUG)               \
49                 pr_debug(fmt, ##__VA_ARGS__);   \
50 } while (0)
51
52 #define PNEIGH_HASHMASK         0xF
53
54 static void neigh_timer_handler(struct timer_list *t);
55 static void __neigh_notify(struct neighbour *n, int type, int flags,
56                            u32 pid);
57 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
58 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
59                                     struct net_device *dev);
60
61 #ifdef CONFIG_PROC_FS
62 static const struct seq_operations neigh_stat_seq_ops;
63 #endif
64
65 /*
66    Neighbour hash table buckets are protected with rwlock tbl->lock.
67
68    - All the scans/updates to hash buckets MUST be made under this lock.
69    - NOTHING clever should be made under this lock: no callbacks
70      to protocol backends, no attempts to send something to network.
71      It will result in deadlocks, if backend/driver wants to use neighbour
72      cache.
73    - If the entry requires some non-trivial actions, increase
74      its reference count and release table lock.
75
76    Neighbour entries are protected:
77    - with reference count.
78    - with rwlock neigh->lock
79
80    Reference count prevents destruction.
81
82    neigh->lock mainly serializes ll address data and its validity state.
83    However, the same lock is used to protect another entry fields:
84     - timer
85     - resolution queue
86
87    Again, nothing clever shall be made under neigh->lock,
88    the most complicated procedure, which we allow is dev->hard_header.
89    It is supposed, that dev->hard_header is simplistic and does
90    not make callbacks to neighbour tables.
91  */
92
93 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
94 {
95         kfree_skb(skb);
96         return -ENETDOWN;
97 }
98
99 static void neigh_cleanup_and_release(struct neighbour *neigh)
100 {
101         if (neigh->parms->neigh_cleanup)
102                 neigh->parms->neigh_cleanup(neigh);
103
104         __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
105         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
106         neigh_release(neigh);
107 }
108
109 /*
110  * It is random distribution in the interval (1/2)*base...(3/2)*base.
111  * It corresponds to default IPv6 settings and is not overridable,
112  * because it is really reasonable choice.
113  */
114
115 unsigned long neigh_rand_reach_time(unsigned long base)
116 {
117         return base ? (prandom_u32() % base) + (base >> 1) : 0;
118 }
119 EXPORT_SYMBOL(neigh_rand_reach_time);
120
121 static void neigh_mark_dead(struct neighbour *n)
122 {
123         n->dead = 1;
124         if (!list_empty(&n->gc_list)) {
125                 list_del_init(&n->gc_list);
126                 atomic_dec(&n->tbl->gc_entries);
127         }
128 }
129
130 static void neigh_update_gc_list(struct neighbour *n)
131 {
132         bool on_gc_list, exempt_from_gc;
133
134         write_lock_bh(&n->tbl->lock);
135         write_lock(&n->lock);
136
137         /* remove from the gc list if new state is permanent or if neighbor
138          * is externally learned; otherwise entry should be on the gc list
139          */
140         exempt_from_gc = n->nud_state & NUD_PERMANENT ||
141                          n->flags & NTF_EXT_LEARNED;
142         on_gc_list = !list_empty(&n->gc_list);
143
144         if (exempt_from_gc && on_gc_list) {
145                 list_del_init(&n->gc_list);
146                 atomic_dec(&n->tbl->gc_entries);
147         } else if (!exempt_from_gc && !on_gc_list) {
148                 /* add entries to the tail; cleaning removes from the front */
149                 list_add_tail(&n->gc_list, &n->tbl->gc_list);
150                 atomic_inc(&n->tbl->gc_entries);
151         }
152
153         write_unlock(&n->lock);
154         write_unlock_bh(&n->tbl->lock);
155 }
156
157 static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
158                                      int *notify)
159 {
160         bool rc = false;
161         u8 ndm_flags;
162
163         if (!(flags & NEIGH_UPDATE_F_ADMIN))
164                 return rc;
165
166         ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
167         if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
168                 if (ndm_flags & NTF_EXT_LEARNED)
169                         neigh->flags |= NTF_EXT_LEARNED;
170                 else
171                         neigh->flags &= ~NTF_EXT_LEARNED;
172                 rc = true;
173                 *notify = 1;
174         }
175
176         return rc;
177 }
178
179 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
180                       struct neigh_table *tbl)
181 {
182         bool retval = false;
183
184         write_lock(&n->lock);
185         if (refcount_read(&n->refcnt) == 1) {
186                 struct neighbour *neigh;
187
188                 neigh = rcu_dereference_protected(n->next,
189                                                   lockdep_is_held(&tbl->lock));
190                 rcu_assign_pointer(*np, neigh);
191                 neigh_mark_dead(n);
192                 retval = true;
193         }
194         write_unlock(&n->lock);
195         if (retval)
196                 neigh_cleanup_and_release(n);
197         return retval;
198 }
199
200 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
201 {
202         struct neigh_hash_table *nht;
203         void *pkey = ndel->primary_key;
204         u32 hash_val;
205         struct neighbour *n;
206         struct neighbour __rcu **np;
207
208         nht = rcu_dereference_protected(tbl->nht,
209                                         lockdep_is_held(&tbl->lock));
210         hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
211         hash_val = hash_val >> (32 - nht->hash_shift);
212
213         np = &nht->hash_buckets[hash_val];
214         while ((n = rcu_dereference_protected(*np,
215                                               lockdep_is_held(&tbl->lock)))) {
216                 if (n == ndel)
217                         return neigh_del(n, np, tbl);
218                 np = &n->next;
219         }
220         return false;
221 }
222
223 static int neigh_forced_gc(struct neigh_table *tbl)
224 {
225         int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
226         unsigned long tref = jiffies - 5 * HZ;
227         struct neighbour *n, *tmp;
228         int shrunk = 0;
229
230         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
231
232         write_lock_bh(&tbl->lock);
233
234         list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
235                 if (refcount_read(&n->refcnt) == 1) {
236                         bool remove = false;
237
238                         write_lock(&n->lock);
239                         if ((n->nud_state == NUD_FAILED) ||
240                             time_after(tref, n->updated))
241                                 remove = true;
242                         write_unlock(&n->lock);
243
244                         if (remove && neigh_remove_one(n, tbl))
245                                 shrunk++;
246                         if (shrunk >= max_clean)
247                                 break;
248                 }
249         }
250
251         tbl->last_flush = jiffies;
252
253         write_unlock_bh(&tbl->lock);
254
255         return shrunk;
256 }
257
258 static void neigh_add_timer(struct neighbour *n, unsigned long when)
259 {
260         neigh_hold(n);
261         if (unlikely(mod_timer(&n->timer, when))) {
262                 printk("NEIGH: BUG, double timer add, state is %x\n",
263                        n->nud_state);
264                 dump_stack();
265         }
266 }
267
268 static int neigh_del_timer(struct neighbour *n)
269 {
270         if ((n->nud_state & NUD_IN_TIMER) &&
271             del_timer(&n->timer)) {
272                 neigh_release(n);
273                 return 1;
274         }
275         return 0;
276 }
277
278 static void pneigh_queue_purge(struct sk_buff_head *list)
279 {
280         struct sk_buff *skb;
281
282         while ((skb = skb_dequeue(list)) != NULL) {
283                 dev_put(skb->dev);
284                 kfree_skb(skb);
285         }
286 }
287
288 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
289                             bool skip_perm)
290 {
291         int i;
292         struct neigh_hash_table *nht;
293
294         nht = rcu_dereference_protected(tbl->nht,
295                                         lockdep_is_held(&tbl->lock));
296
297         for (i = 0; i < (1 << nht->hash_shift); i++) {
298                 struct neighbour *n;
299                 struct neighbour __rcu **np = &nht->hash_buckets[i];
300
301                 while ((n = rcu_dereference_protected(*np,
302                                         lockdep_is_held(&tbl->lock))) != NULL) {
303                         if (dev && n->dev != dev) {
304                                 np = &n->next;
305                                 continue;
306                         }
307                         if (skip_perm && n->nud_state & NUD_PERMANENT) {
308                                 np = &n->next;
309                                 continue;
310                         }
311                         rcu_assign_pointer(*np,
312                                    rcu_dereference_protected(n->next,
313                                                 lockdep_is_held(&tbl->lock)));
314                         write_lock(&n->lock);
315                         neigh_del_timer(n);
316                         neigh_mark_dead(n);
317                         if (refcount_read(&n->refcnt) != 1) {
318                                 /* The most unpleasant situation.
319                                    We must destroy neighbour entry,
320                                    but someone still uses it.
321
322                                    The destroy will be delayed until
323                                    the last user releases us, but
324                                    we must kill timers etc. and move
325                                    it to safe state.
326                                  */
327                                 __skb_queue_purge(&n->arp_queue);
328                                 n->arp_queue_len_bytes = 0;
329                                 n->output = neigh_blackhole;
330                                 if (n->nud_state & NUD_VALID)
331                                         n->nud_state = NUD_NOARP;
332                                 else
333                                         n->nud_state = NUD_NONE;
334                                 neigh_dbg(2, "neigh %p is stray\n", n);
335                         }
336                         write_unlock(&n->lock);
337                         neigh_cleanup_and_release(n);
338                 }
339         }
340 }
341
342 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
343 {
344         write_lock_bh(&tbl->lock);
345         neigh_flush_dev(tbl, dev, false);
346         write_unlock_bh(&tbl->lock);
347 }
348 EXPORT_SYMBOL(neigh_changeaddr);
349
350 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
351                           bool skip_perm)
352 {
353         write_lock_bh(&tbl->lock);
354         neigh_flush_dev(tbl, dev, skip_perm);
355         pneigh_ifdown_and_unlock(tbl, dev);
356
357         del_timer_sync(&tbl->proxy_timer);
358         pneigh_queue_purge(&tbl->proxy_queue);
359         return 0;
360 }
361
362 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
363 {
364         __neigh_ifdown(tbl, dev, true);
365         return 0;
366 }
367 EXPORT_SYMBOL(neigh_carrier_down);
368
369 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
370 {
371         __neigh_ifdown(tbl, dev, false);
372         return 0;
373 }
374 EXPORT_SYMBOL(neigh_ifdown);
375
376 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
377                                      struct net_device *dev,
378                                      bool exempt_from_gc)
379 {
380         struct neighbour *n = NULL;
381         unsigned long now = jiffies;
382         int entries;
383
384         if (exempt_from_gc)
385                 goto do_alloc;
386
387         entries = atomic_inc_return(&tbl->gc_entries) - 1;
388         if (entries >= tbl->gc_thresh3 ||
389             (entries >= tbl->gc_thresh2 &&
390              time_after(now, tbl->last_flush + 5 * HZ))) {
391                 if (!neigh_forced_gc(tbl) &&
392                     entries >= tbl->gc_thresh3) {
393                         net_info_ratelimited("%s: neighbor table overflow!\n",
394                                              tbl->id);
395                         NEIGH_CACHE_STAT_INC(tbl, table_fulls);
396                         goto out_entries;
397                 }
398         }
399
400 do_alloc:
401         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
402         if (!n)
403                 goto out_entries;
404
405         __skb_queue_head_init(&n->arp_queue);
406         rwlock_init(&n->lock);
407         seqlock_init(&n->ha_lock);
408         n->updated        = n->used = now;
409         n->nud_state      = NUD_NONE;
410         n->output         = neigh_blackhole;
411         seqlock_init(&n->hh.hh_lock);
412         n->parms          = neigh_parms_clone(&tbl->parms);
413         timer_setup(&n->timer, neigh_timer_handler, 0);
414
415         NEIGH_CACHE_STAT_INC(tbl, allocs);
416         n->tbl            = tbl;
417         refcount_set(&n->refcnt, 1);
418         n->dead           = 1;
419         INIT_LIST_HEAD(&n->gc_list);
420
421         atomic_inc(&tbl->entries);
422 out:
423         return n;
424
425 out_entries:
426         if (!exempt_from_gc)
427                 atomic_dec(&tbl->gc_entries);
428         goto out;
429 }
430
431 static void neigh_get_hash_rnd(u32 *x)
432 {
433         *x = get_random_u32() | 1;
434 }
435
436 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
437 {
438         size_t size = (1 << shift) * sizeof(struct neighbour *);
439         struct neigh_hash_table *ret;
440         struct neighbour __rcu **buckets;
441         int i;
442
443         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
444         if (!ret)
445                 return NULL;
446         if (size <= PAGE_SIZE)
447                 buckets = kzalloc(size, GFP_ATOMIC);
448         else
449                 buckets = (struct neighbour __rcu **)
450                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
451                                            get_order(size));
452         if (!buckets) {
453                 kfree(ret);
454                 return NULL;
455         }
456         ret->hash_buckets = buckets;
457         ret->hash_shift = shift;
458         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
459                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
460         return ret;
461 }
462
463 static void neigh_hash_free_rcu(struct rcu_head *head)
464 {
465         struct neigh_hash_table *nht = container_of(head,
466                                                     struct neigh_hash_table,
467                                                     rcu);
468         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
469         struct neighbour __rcu **buckets = nht->hash_buckets;
470
471         if (size <= PAGE_SIZE)
472                 kfree(buckets);
473         else
474                 free_pages((unsigned long)buckets, get_order(size));
475         kfree(nht);
476 }
477
478 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
479                                                 unsigned long new_shift)
480 {
481         unsigned int i, hash;
482         struct neigh_hash_table *new_nht, *old_nht;
483
484         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
485
486         old_nht = rcu_dereference_protected(tbl->nht,
487                                             lockdep_is_held(&tbl->lock));
488         new_nht = neigh_hash_alloc(new_shift);
489         if (!new_nht)
490                 return old_nht;
491
492         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
493                 struct neighbour *n, *next;
494
495                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
496                                                    lockdep_is_held(&tbl->lock));
497                      n != NULL;
498                      n = next) {
499                         hash = tbl->hash(n->primary_key, n->dev,
500                                          new_nht->hash_rnd);
501
502                         hash >>= (32 - new_nht->hash_shift);
503                         next = rcu_dereference_protected(n->next,
504                                                 lockdep_is_held(&tbl->lock));
505
506                         rcu_assign_pointer(n->next,
507                                            rcu_dereference_protected(
508                                                 new_nht->hash_buckets[hash],
509                                                 lockdep_is_held(&tbl->lock)));
510                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
511                 }
512         }
513
514         rcu_assign_pointer(tbl->nht, new_nht);
515         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
516         return new_nht;
517 }
518
519 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
520                                struct net_device *dev)
521 {
522         struct neighbour *n;
523
524         NEIGH_CACHE_STAT_INC(tbl, lookups);
525
526         rcu_read_lock_bh();
527         n = __neigh_lookup_noref(tbl, pkey, dev);
528         if (n) {
529                 if (!refcount_inc_not_zero(&n->refcnt))
530                         n = NULL;
531                 NEIGH_CACHE_STAT_INC(tbl, hits);
532         }
533
534         rcu_read_unlock_bh();
535         return n;
536 }
537 EXPORT_SYMBOL(neigh_lookup);
538
539 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
540                                      const void *pkey)
541 {
542         struct neighbour *n;
543         unsigned int key_len = tbl->key_len;
544         u32 hash_val;
545         struct neigh_hash_table *nht;
546
547         NEIGH_CACHE_STAT_INC(tbl, lookups);
548
549         rcu_read_lock_bh();
550         nht = rcu_dereference_bh(tbl->nht);
551         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
552
553         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
554              n != NULL;
555              n = rcu_dereference_bh(n->next)) {
556                 if (!memcmp(n->primary_key, pkey, key_len) &&
557                     net_eq(dev_net(n->dev), net)) {
558                         if (!refcount_inc_not_zero(&n->refcnt))
559                                 n = NULL;
560                         NEIGH_CACHE_STAT_INC(tbl, hits);
561                         break;
562                 }
563         }
564
565         rcu_read_unlock_bh();
566         return n;
567 }
568 EXPORT_SYMBOL(neigh_lookup_nodev);
569
570 static struct neighbour *___neigh_create(struct neigh_table *tbl,
571                                          const void *pkey,
572                                          struct net_device *dev,
573                                          bool exempt_from_gc, bool want_ref)
574 {
575         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc);
576         u32 hash_val;
577         unsigned int key_len = tbl->key_len;
578         int error;
579         struct neigh_hash_table *nht;
580
581         if (!n) {
582                 rc = ERR_PTR(-ENOBUFS);
583                 goto out;
584         }
585
586         memcpy(n->primary_key, pkey, key_len);
587         n->dev = dev;
588         dev_hold(dev);
589
590         /* Protocol specific setup. */
591         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
592                 rc = ERR_PTR(error);
593                 goto out_neigh_release;
594         }
595
596         if (dev->netdev_ops->ndo_neigh_construct) {
597                 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
598                 if (error < 0) {
599                         rc = ERR_PTR(error);
600                         goto out_neigh_release;
601                 }
602         }
603
604         /* Device specific setup. */
605         if (n->parms->neigh_setup &&
606             (error = n->parms->neigh_setup(n)) < 0) {
607                 rc = ERR_PTR(error);
608                 goto out_neigh_release;
609         }
610
611         n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
612
613         write_lock_bh(&tbl->lock);
614         nht = rcu_dereference_protected(tbl->nht,
615                                         lockdep_is_held(&tbl->lock));
616
617         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
618                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
619
620         hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
621
622         if (n->parms->dead) {
623                 rc = ERR_PTR(-EINVAL);
624                 goto out_tbl_unlock;
625         }
626
627         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
628                                             lockdep_is_held(&tbl->lock));
629              n1 != NULL;
630              n1 = rcu_dereference_protected(n1->next,
631                         lockdep_is_held(&tbl->lock))) {
632                 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
633                         if (want_ref)
634                                 neigh_hold(n1);
635                         rc = n1;
636                         goto out_tbl_unlock;
637                 }
638         }
639
640         n->dead = 0;
641         if (!exempt_from_gc)
642                 list_add_tail(&n->gc_list, &n->tbl->gc_list);
643
644         if (want_ref)
645                 neigh_hold(n);
646         rcu_assign_pointer(n->next,
647                            rcu_dereference_protected(nht->hash_buckets[hash_val],
648                                                      lockdep_is_held(&tbl->lock)));
649         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
650         write_unlock_bh(&tbl->lock);
651         neigh_dbg(2, "neigh %p is created\n", n);
652         rc = n;
653 out:
654         return rc;
655 out_tbl_unlock:
656         write_unlock_bh(&tbl->lock);
657 out_neigh_release:
658         neigh_release(n);
659         goto out;
660 }
661
662 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
663                                  struct net_device *dev, bool want_ref)
664 {
665         return ___neigh_create(tbl, pkey, dev, false, want_ref);
666 }
667 EXPORT_SYMBOL(__neigh_create);
668
669 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
670 {
671         u32 hash_val = *(u32 *)(pkey + key_len - 4);
672         hash_val ^= (hash_val >> 16);
673         hash_val ^= hash_val >> 8;
674         hash_val ^= hash_val >> 4;
675         hash_val &= PNEIGH_HASHMASK;
676         return hash_val;
677 }
678
679 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
680                                               struct net *net,
681                                               const void *pkey,
682                                               unsigned int key_len,
683                                               struct net_device *dev)
684 {
685         while (n) {
686                 if (!memcmp(n->key, pkey, key_len) &&
687                     net_eq(pneigh_net(n), net) &&
688                     (n->dev == dev || !n->dev))
689                         return n;
690                 n = n->next;
691         }
692         return NULL;
693 }
694
695 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
696                 struct net *net, const void *pkey, struct net_device *dev)
697 {
698         unsigned int key_len = tbl->key_len;
699         u32 hash_val = pneigh_hash(pkey, key_len);
700
701         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
702                                  net, pkey, key_len, dev);
703 }
704 EXPORT_SYMBOL_GPL(__pneigh_lookup);
705
706 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
707                                     struct net *net, const void *pkey,
708                                     struct net_device *dev, int creat)
709 {
710         struct pneigh_entry *n;
711         unsigned int key_len = tbl->key_len;
712         u32 hash_val = pneigh_hash(pkey, key_len);
713
714         read_lock_bh(&tbl->lock);
715         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
716                               net, pkey, key_len, dev);
717         read_unlock_bh(&tbl->lock);
718
719         if (n || !creat)
720                 goto out;
721
722         ASSERT_RTNL();
723
724         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
725         if (!n)
726                 goto out;
727
728         write_pnet(&n->net, net);
729         memcpy(n->key, pkey, key_len);
730         n->dev = dev;
731         if (dev)
732                 dev_hold(dev);
733
734         if (tbl->pconstructor && tbl->pconstructor(n)) {
735                 if (dev)
736                         dev_put(dev);
737                 kfree(n);
738                 n = NULL;
739                 goto out;
740         }
741
742         write_lock_bh(&tbl->lock);
743         n->next = tbl->phash_buckets[hash_val];
744         tbl->phash_buckets[hash_val] = n;
745         write_unlock_bh(&tbl->lock);
746 out:
747         return n;
748 }
749 EXPORT_SYMBOL(pneigh_lookup);
750
751
752 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
753                   struct net_device *dev)
754 {
755         struct pneigh_entry *n, **np;
756         unsigned int key_len = tbl->key_len;
757         u32 hash_val = pneigh_hash(pkey, key_len);
758
759         write_lock_bh(&tbl->lock);
760         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
761              np = &n->next) {
762                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
763                     net_eq(pneigh_net(n), net)) {
764                         *np = n->next;
765                         write_unlock_bh(&tbl->lock);
766                         if (tbl->pdestructor)
767                                 tbl->pdestructor(n);
768                         if (n->dev)
769                                 dev_put(n->dev);
770                         kfree(n);
771                         return 0;
772                 }
773         }
774         write_unlock_bh(&tbl->lock);
775         return -ENOENT;
776 }
777
778 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
779                                     struct net_device *dev)
780 {
781         struct pneigh_entry *n, **np, *freelist = NULL;
782         u32 h;
783
784         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
785                 np = &tbl->phash_buckets[h];
786                 while ((n = *np) != NULL) {
787                         if (!dev || n->dev == dev) {
788                                 *np = n->next;
789                                 n->next = freelist;
790                                 freelist = n;
791                                 continue;
792                         }
793                         np = &n->next;
794                 }
795         }
796         write_unlock_bh(&tbl->lock);
797         while ((n = freelist)) {
798                 freelist = n->next;
799                 n->next = NULL;
800                 if (tbl->pdestructor)
801                         tbl->pdestructor(n);
802                 if (n->dev)
803                         dev_put(n->dev);
804                 kfree(n);
805         }
806         return -ENOENT;
807 }
808
809 static void neigh_parms_destroy(struct neigh_parms *parms);
810
811 static inline void neigh_parms_put(struct neigh_parms *parms)
812 {
813         if (refcount_dec_and_test(&parms->refcnt))
814                 neigh_parms_destroy(parms);
815 }
816
817 /*
818  *      neighbour must already be out of the table;
819  *
820  */
821 void neigh_destroy(struct neighbour *neigh)
822 {
823         struct net_device *dev = neigh->dev;
824
825         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
826
827         if (!neigh->dead) {
828                 pr_warn("Destroying alive neighbour %p\n", neigh);
829                 dump_stack();
830                 return;
831         }
832
833         if (neigh_del_timer(neigh))
834                 pr_warn("Impossible event\n");
835
836         write_lock_bh(&neigh->lock);
837         __skb_queue_purge(&neigh->arp_queue);
838         write_unlock_bh(&neigh->lock);
839         neigh->arp_queue_len_bytes = 0;
840
841         if (dev->netdev_ops->ndo_neigh_destroy)
842                 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
843
844         dev_put(dev);
845         neigh_parms_put(neigh->parms);
846
847         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
848
849         atomic_dec(&neigh->tbl->entries);
850         kfree_rcu(neigh, rcu);
851 }
852 EXPORT_SYMBOL(neigh_destroy);
853
854 /* Neighbour state is suspicious;
855    disable fast path.
856
857    Called with write_locked neigh.
858  */
859 static void neigh_suspect(struct neighbour *neigh)
860 {
861         neigh_dbg(2, "neigh %p is suspected\n", neigh);
862
863         neigh->output = neigh->ops->output;
864 }
865
866 /* Neighbour state is OK;
867    enable fast path.
868
869    Called with write_locked neigh.
870  */
871 static void neigh_connect(struct neighbour *neigh)
872 {
873         neigh_dbg(2, "neigh %p is connected\n", neigh);
874
875         neigh->output = neigh->ops->connected_output;
876 }
877
878 static void neigh_periodic_work(struct work_struct *work)
879 {
880         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
881         struct neighbour *n;
882         struct neighbour __rcu **np;
883         unsigned int i;
884         struct neigh_hash_table *nht;
885
886         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
887
888         write_lock_bh(&tbl->lock);
889         nht = rcu_dereference_protected(tbl->nht,
890                                         lockdep_is_held(&tbl->lock));
891
892         /*
893          *      periodically recompute ReachableTime from random function
894          */
895
896         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
897                 struct neigh_parms *p;
898                 tbl->last_rand = jiffies;
899                 list_for_each_entry(p, &tbl->parms_list, list)
900                         p->reachable_time =
901                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
902         }
903
904         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
905                 goto out;
906
907         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
908                 np = &nht->hash_buckets[i];
909
910                 while ((n = rcu_dereference_protected(*np,
911                                 lockdep_is_held(&tbl->lock))) != NULL) {
912                         unsigned int state;
913
914                         write_lock(&n->lock);
915
916                         state = n->nud_state;
917                         if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
918                             (n->flags & NTF_EXT_LEARNED)) {
919                                 write_unlock(&n->lock);
920                                 goto next_elt;
921                         }
922
923                         if (time_before(n->used, n->confirmed))
924                                 n->used = n->confirmed;
925
926                         if (refcount_read(&n->refcnt) == 1 &&
927                             (state == NUD_FAILED ||
928                              time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
929                                 *np = n->next;
930                                 neigh_mark_dead(n);
931                                 write_unlock(&n->lock);
932                                 neigh_cleanup_and_release(n);
933                                 continue;
934                         }
935                         write_unlock(&n->lock);
936
937 next_elt:
938                         np = &n->next;
939                 }
940                 /*
941                  * It's fine to release lock here, even if hash table
942                  * grows while we are preempted.
943                  */
944                 write_unlock_bh(&tbl->lock);
945                 cond_resched();
946                 write_lock_bh(&tbl->lock);
947                 nht = rcu_dereference_protected(tbl->nht,
948                                                 lockdep_is_held(&tbl->lock));
949         }
950 out:
951         /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
952          * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
953          * BASE_REACHABLE_TIME.
954          */
955         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
956                               NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
957         write_unlock_bh(&tbl->lock);
958 }
959
960 static __inline__ int neigh_max_probes(struct neighbour *n)
961 {
962         struct neigh_parms *p = n->parms;
963         return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
964                (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
965                 NEIGH_VAR(p, MCAST_PROBES));
966 }
967
968 static void neigh_invalidate(struct neighbour *neigh)
969         __releases(neigh->lock)
970         __acquires(neigh->lock)
971 {
972         struct sk_buff *skb;
973
974         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
975         neigh_dbg(2, "neigh %p is failed\n", neigh);
976         neigh->updated = jiffies;
977
978         /* It is very thin place. report_unreachable is very complicated
979            routine. Particularly, it can hit the same neighbour entry!
980
981            So that, we try to be accurate and avoid dead loop. --ANK
982          */
983         while (neigh->nud_state == NUD_FAILED &&
984                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
985                 write_unlock(&neigh->lock);
986                 neigh->ops->error_report(neigh, skb);
987                 write_lock(&neigh->lock);
988         }
989         __skb_queue_purge(&neigh->arp_queue);
990         neigh->arp_queue_len_bytes = 0;
991 }
992
993 static void neigh_probe(struct neighbour *neigh)
994         __releases(neigh->lock)
995 {
996         struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
997         /* keep skb alive even if arp_queue overflows */
998         if (skb)
999                 skb = skb_clone(skb, GFP_ATOMIC);
1000         write_unlock(&neigh->lock);
1001         if (neigh->ops->solicit)
1002                 neigh->ops->solicit(neigh, skb);
1003         atomic_inc(&neigh->probes);
1004         kfree_skb(skb);
1005 }
1006
1007 /* Called when a timer expires for a neighbour entry. */
1008
1009 static void neigh_timer_handler(struct timer_list *t)
1010 {
1011         unsigned long now, next;
1012         struct neighbour *neigh = from_timer(neigh, t, timer);
1013         unsigned int state;
1014         int notify = 0;
1015
1016         write_lock(&neigh->lock);
1017
1018         state = neigh->nud_state;
1019         now = jiffies;
1020         next = now + HZ;
1021
1022         if (!(state & NUD_IN_TIMER))
1023                 goto out;
1024
1025         if (state & NUD_REACHABLE) {
1026                 if (time_before_eq(now,
1027                                    neigh->confirmed + neigh->parms->reachable_time)) {
1028                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
1029                         next = neigh->confirmed + neigh->parms->reachable_time;
1030                 } else if (time_before_eq(now,
1031                                           neigh->used +
1032                                           NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1033                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
1034                         neigh->nud_state = NUD_DELAY;
1035                         neigh->updated = jiffies;
1036                         neigh_suspect(neigh);
1037                         next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1038                 } else {
1039                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
1040                         neigh->nud_state = NUD_STALE;
1041                         neigh->updated = jiffies;
1042                         neigh_suspect(neigh);
1043                         notify = 1;
1044                 }
1045         } else if (state & NUD_DELAY) {
1046                 if (time_before_eq(now,
1047                                    neigh->confirmed +
1048                                    NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1049                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1050                         neigh->nud_state = NUD_REACHABLE;
1051                         neigh->updated = jiffies;
1052                         neigh_connect(neigh);
1053                         notify = 1;
1054                         next = neigh->confirmed + neigh->parms->reachable_time;
1055                 } else {
1056                         neigh_dbg(2, "neigh %p is probed\n", neigh);
1057                         neigh->nud_state = NUD_PROBE;
1058                         neigh->updated = jiffies;
1059                         atomic_set(&neigh->probes, 0);
1060                         notify = 1;
1061                         next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1062                 }
1063         } else {
1064                 /* NUD_PROBE|NUD_INCOMPLETE */
1065                 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1066         }
1067
1068         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1069             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1070                 neigh->nud_state = NUD_FAILED;
1071                 notify = 1;
1072                 neigh_invalidate(neigh);
1073                 goto out;
1074         }
1075
1076         if (neigh->nud_state & NUD_IN_TIMER) {
1077                 if (time_before(next, jiffies + HZ/2))
1078                         next = jiffies + HZ/2;
1079                 if (!mod_timer(&neigh->timer, next))
1080                         neigh_hold(neigh);
1081         }
1082         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1083                 neigh_probe(neigh);
1084         } else {
1085 out:
1086                 write_unlock(&neigh->lock);
1087         }
1088
1089         if (notify)
1090                 neigh_update_notify(neigh, 0);
1091
1092         neigh_release(neigh);
1093 }
1094
1095 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1096 {
1097         int rc;
1098         bool immediate_probe = false;
1099
1100         write_lock_bh(&neigh->lock);
1101
1102         rc = 0;
1103         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1104                 goto out_unlock_bh;
1105         if (neigh->dead)
1106                 goto out_dead;
1107
1108         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1109                 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1110                     NEIGH_VAR(neigh->parms, APP_PROBES)) {
1111                         unsigned long next, now = jiffies;
1112
1113                         atomic_set(&neigh->probes,
1114                                    NEIGH_VAR(neigh->parms, UCAST_PROBES));
1115                         neigh->nud_state     = NUD_INCOMPLETE;
1116                         neigh->updated = now;
1117                         next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1118                                          HZ/2);
1119                         neigh_add_timer(neigh, next);
1120                         immediate_probe = true;
1121                 } else {
1122                         neigh->nud_state = NUD_FAILED;
1123                         neigh->updated = jiffies;
1124                         write_unlock_bh(&neigh->lock);
1125
1126                         kfree_skb(skb);
1127                         return 1;
1128                 }
1129         } else if (neigh->nud_state & NUD_STALE) {
1130                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1131                 neigh->nud_state = NUD_DELAY;
1132                 neigh->updated = jiffies;
1133                 neigh_add_timer(neigh, jiffies +
1134                                 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1135         }
1136
1137         if (neigh->nud_state == NUD_INCOMPLETE) {
1138                 if (skb) {
1139                         while (neigh->arp_queue_len_bytes + skb->truesize >
1140                                NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1141                                 struct sk_buff *buff;
1142
1143                                 buff = __skb_dequeue(&neigh->arp_queue);
1144                                 if (!buff)
1145                                         break;
1146                                 neigh->arp_queue_len_bytes -= buff->truesize;
1147                                 kfree_skb(buff);
1148                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1149                         }
1150                         skb_dst_force(skb);
1151                         __skb_queue_tail(&neigh->arp_queue, skb);
1152                         neigh->arp_queue_len_bytes += skb->truesize;
1153                 }
1154                 rc = 1;
1155         }
1156 out_unlock_bh:
1157         if (immediate_probe)
1158                 neigh_probe(neigh);
1159         else
1160                 write_unlock(&neigh->lock);
1161         local_bh_enable();
1162         return rc;
1163
1164 out_dead:
1165         if (neigh->nud_state & NUD_STALE)
1166                 goto out_unlock_bh;
1167         write_unlock_bh(&neigh->lock);
1168         kfree_skb(skb);
1169         return 1;
1170 }
1171 EXPORT_SYMBOL(__neigh_event_send);
1172
1173 static void neigh_update_hhs(struct neighbour *neigh)
1174 {
1175         struct hh_cache *hh;
1176         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1177                 = NULL;
1178
1179         if (neigh->dev->header_ops)
1180                 update = neigh->dev->header_ops->cache_update;
1181
1182         if (update) {
1183                 hh = &neigh->hh;
1184                 if (hh->hh_len) {
1185                         write_seqlock_bh(&hh->hh_lock);
1186                         update(hh, neigh->dev, neigh->ha);
1187                         write_sequnlock_bh(&hh->hh_lock);
1188                 }
1189         }
1190 }
1191
1192
1193
1194 /* Generic update routine.
1195    -- lladdr is new lladdr or NULL, if it is not supplied.
1196    -- new    is new state.
1197    -- flags
1198         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1199                                 if it is different.
1200         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1201                                 lladdr instead of overriding it
1202                                 if it is different.
1203         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1204
1205         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1206                                 NTF_ROUTER flag.
1207         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1208                                 a router.
1209
1210    Caller MUST hold reference count on the entry.
1211  */
1212
1213 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1214                           u8 new, u32 flags, u32 nlmsg_pid,
1215                           struct netlink_ext_ack *extack)
1216 {
1217         bool ext_learn_change = false;
1218         u8 old;
1219         int err;
1220         int notify = 0;
1221         struct net_device *dev;
1222         int update_isrouter = 0;
1223
1224         write_lock_bh(&neigh->lock);
1225
1226         dev    = neigh->dev;
1227         old    = neigh->nud_state;
1228         err    = -EPERM;
1229
1230         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1231             (old & (NUD_NOARP | NUD_PERMANENT)))
1232                 goto out;
1233         if (neigh->dead) {
1234                 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1235                 goto out;
1236         }
1237
1238         ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
1239
1240         if (!(new & NUD_VALID)) {
1241                 neigh_del_timer(neigh);
1242                 if (old & NUD_CONNECTED)
1243                         neigh_suspect(neigh);
1244                 neigh->nud_state = new;
1245                 err = 0;
1246                 notify = old & NUD_VALID;
1247                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1248                     (new & NUD_FAILED)) {
1249                         neigh_invalidate(neigh);
1250                         notify = 1;
1251                 }
1252                 goto out;
1253         }
1254
1255         /* Compare new lladdr with cached one */
1256         if (!dev->addr_len) {
1257                 /* First case: device needs no address. */
1258                 lladdr = neigh->ha;
1259         } else if (lladdr) {
1260                 /* The second case: if something is already cached
1261                    and a new address is proposed:
1262                    - compare new & old
1263                    - if they are different, check override flag
1264                  */
1265                 if ((old & NUD_VALID) &&
1266                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1267                         lladdr = neigh->ha;
1268         } else {
1269                 /* No address is supplied; if we know something,
1270                    use it, otherwise discard the request.
1271                  */
1272                 err = -EINVAL;
1273                 if (!(old & NUD_VALID)) {
1274                         NL_SET_ERR_MSG(extack, "No link layer address given");
1275                         goto out;
1276                 }
1277                 lladdr = neigh->ha;
1278         }
1279
1280         /* Update confirmed timestamp for neighbour entry after we
1281          * received ARP packet even if it doesn't change IP to MAC binding.
1282          */
1283         if (new & NUD_CONNECTED)
1284                 neigh->confirmed = jiffies;
1285
1286         /* If entry was valid and address is not changed,
1287            do not change entry state, if new one is STALE.
1288          */
1289         err = 0;
1290         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1291         if (old & NUD_VALID) {
1292                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1293                         update_isrouter = 0;
1294                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1295                             (old & NUD_CONNECTED)) {
1296                                 lladdr = neigh->ha;
1297                                 new = NUD_STALE;
1298                         } else
1299                                 goto out;
1300                 } else {
1301                         if (lladdr == neigh->ha && new == NUD_STALE &&
1302                             !(flags & NEIGH_UPDATE_F_ADMIN))
1303                                 new = old;
1304                 }
1305         }
1306
1307         /* Update timestamp only once we know we will make a change to the
1308          * neighbour entry. Otherwise we risk to move the locktime window with
1309          * noop updates and ignore relevant ARP updates.
1310          */
1311         if (new != old || lladdr != neigh->ha)
1312                 neigh->updated = jiffies;
1313
1314         if (new != old) {
1315                 neigh_del_timer(neigh);
1316                 if (new & NUD_PROBE)
1317                         atomic_set(&neigh->probes, 0);
1318                 if (new & NUD_IN_TIMER)
1319                         neigh_add_timer(neigh, (jiffies +
1320                                                 ((new & NUD_REACHABLE) ?
1321                                                  neigh->parms->reachable_time :
1322                                                  0)));
1323                 neigh->nud_state = new;
1324                 notify = 1;
1325         }
1326
1327         if (lladdr != neigh->ha) {
1328                 write_seqlock(&neigh->ha_lock);
1329                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1330                 write_sequnlock(&neigh->ha_lock);
1331                 neigh_update_hhs(neigh);
1332                 if (!(new & NUD_CONNECTED))
1333                         neigh->confirmed = jiffies -
1334                                       (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1335                 notify = 1;
1336         }
1337         if (new == old)
1338                 goto out;
1339         if (new & NUD_CONNECTED)
1340                 neigh_connect(neigh);
1341         else
1342                 neigh_suspect(neigh);
1343         if (!(old & NUD_VALID)) {
1344                 struct sk_buff *skb;
1345
1346                 /* Again: avoid dead loop if something went wrong */
1347
1348                 while (neigh->nud_state & NUD_VALID &&
1349                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1350                         struct dst_entry *dst = skb_dst(skb);
1351                         struct neighbour *n2, *n1 = neigh;
1352                         write_unlock_bh(&neigh->lock);
1353
1354                         rcu_read_lock();
1355
1356                         /* Why not just use 'neigh' as-is?  The problem is that
1357                          * things such as shaper, eql, and sch_teql can end up
1358                          * using alternative, different, neigh objects to output
1359                          * the packet in the output path.  So what we need to do
1360                          * here is re-lookup the top-level neigh in the path so
1361                          * we can reinject the packet there.
1362                          */
1363                         n2 = NULL;
1364                         if (dst) {
1365                                 n2 = dst_neigh_lookup_skb(dst, skb);
1366                                 if (n2)
1367                                         n1 = n2;
1368                         }
1369                         n1->output(n1, skb);
1370                         if (n2)
1371                                 neigh_release(n2);
1372                         rcu_read_unlock();
1373
1374                         write_lock_bh(&neigh->lock);
1375                 }
1376                 __skb_queue_purge(&neigh->arp_queue);
1377                 neigh->arp_queue_len_bytes = 0;
1378         }
1379 out:
1380         if (update_isrouter)
1381                 neigh_update_is_router(neigh, flags, &notify);
1382         write_unlock_bh(&neigh->lock);
1383
1384         if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
1385                 neigh_update_gc_list(neigh);
1386
1387         if (notify)
1388                 neigh_update_notify(neigh, nlmsg_pid);
1389
1390         return err;
1391 }
1392
1393 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1394                  u32 flags, u32 nlmsg_pid)
1395 {
1396         return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1397 }
1398 EXPORT_SYMBOL(neigh_update);
1399
1400 /* Update the neigh to listen temporarily for probe responses, even if it is
1401  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1402  */
1403 void __neigh_set_probe_once(struct neighbour *neigh)
1404 {
1405         if (neigh->dead)
1406                 return;
1407         neigh->updated = jiffies;
1408         if (!(neigh->nud_state & NUD_FAILED))
1409                 return;
1410         neigh->nud_state = NUD_INCOMPLETE;
1411         atomic_set(&neigh->probes, neigh_max_probes(neigh));
1412         neigh_add_timer(neigh,
1413                         jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1414 }
1415 EXPORT_SYMBOL(__neigh_set_probe_once);
1416
1417 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1418                                  u8 *lladdr, void *saddr,
1419                                  struct net_device *dev)
1420 {
1421         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1422                                                  lladdr || !dev->addr_len);
1423         if (neigh)
1424                 neigh_update(neigh, lladdr, NUD_STALE,
1425                              NEIGH_UPDATE_F_OVERRIDE, 0);
1426         return neigh;
1427 }
1428 EXPORT_SYMBOL(neigh_event_ns);
1429
1430 /* called with read_lock_bh(&n->lock); */
1431 static void neigh_hh_init(struct neighbour *n)
1432 {
1433         struct net_device *dev = n->dev;
1434         __be16 prot = n->tbl->protocol;
1435         struct hh_cache *hh = &n->hh;
1436
1437         write_lock_bh(&n->lock);
1438
1439         /* Only one thread can come in here and initialize the
1440          * hh_cache entry.
1441          */
1442         if (!hh->hh_len)
1443                 dev->header_ops->cache(n, hh, prot);
1444
1445         write_unlock_bh(&n->lock);
1446 }
1447
1448 /* Slow and careful. */
1449
1450 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1451 {
1452         int rc = 0;
1453
1454         if (!neigh_event_send(neigh, skb)) {
1455                 int err;
1456                 struct net_device *dev = neigh->dev;
1457                 unsigned int seq;
1458
1459                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1460                         neigh_hh_init(neigh);
1461
1462                 do {
1463                         __skb_pull(skb, skb_network_offset(skb));
1464                         seq = read_seqbegin(&neigh->ha_lock);
1465                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1466                                               neigh->ha, NULL, skb->len);
1467                 } while (read_seqretry(&neigh->ha_lock, seq));
1468
1469                 if (err >= 0)
1470                         rc = dev_queue_xmit(skb);
1471                 else
1472                         goto out_kfree_skb;
1473         }
1474 out:
1475         return rc;
1476 out_kfree_skb:
1477         rc = -EINVAL;
1478         kfree_skb(skb);
1479         goto out;
1480 }
1481 EXPORT_SYMBOL(neigh_resolve_output);
1482
1483 /* As fast as possible without hh cache */
1484
1485 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1486 {
1487         struct net_device *dev = neigh->dev;
1488         unsigned int seq;
1489         int err;
1490
1491         do {
1492                 __skb_pull(skb, skb_network_offset(skb));
1493                 seq = read_seqbegin(&neigh->ha_lock);
1494                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1495                                       neigh->ha, NULL, skb->len);
1496         } while (read_seqretry(&neigh->ha_lock, seq));
1497
1498         if (err >= 0)
1499                 err = dev_queue_xmit(skb);
1500         else {
1501                 err = -EINVAL;
1502                 kfree_skb(skb);
1503         }
1504         return err;
1505 }
1506 EXPORT_SYMBOL(neigh_connected_output);
1507
1508 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1509 {
1510         return dev_queue_xmit(skb);
1511 }
1512 EXPORT_SYMBOL(neigh_direct_output);
1513
1514 static void neigh_proxy_process(struct timer_list *t)
1515 {
1516         struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1517         long sched_next = 0;
1518         unsigned long now = jiffies;
1519         struct sk_buff *skb, *n;
1520
1521         spin_lock(&tbl->proxy_queue.lock);
1522
1523         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1524                 long tdif = NEIGH_CB(skb)->sched_next - now;
1525
1526                 if (tdif <= 0) {
1527                         struct net_device *dev = skb->dev;
1528
1529                         __skb_unlink(skb, &tbl->proxy_queue);
1530                         if (tbl->proxy_redo && netif_running(dev)) {
1531                                 rcu_read_lock();
1532                                 tbl->proxy_redo(skb);
1533                                 rcu_read_unlock();
1534                         } else {
1535                                 kfree_skb(skb);
1536                         }
1537
1538                         dev_put(dev);
1539                 } else if (!sched_next || tdif < sched_next)
1540                         sched_next = tdif;
1541         }
1542         del_timer(&tbl->proxy_timer);
1543         if (sched_next)
1544                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1545         spin_unlock(&tbl->proxy_queue.lock);
1546 }
1547
1548 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1549                     struct sk_buff *skb)
1550 {
1551         unsigned long now = jiffies;
1552
1553         unsigned long sched_next = now + (prandom_u32() %
1554                                           NEIGH_VAR(p, PROXY_DELAY));
1555
1556         if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1557                 kfree_skb(skb);
1558                 return;
1559         }
1560
1561         NEIGH_CB(skb)->sched_next = sched_next;
1562         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1563
1564         spin_lock(&tbl->proxy_queue.lock);
1565         if (del_timer(&tbl->proxy_timer)) {
1566                 if (time_before(tbl->proxy_timer.expires, sched_next))
1567                         sched_next = tbl->proxy_timer.expires;
1568         }
1569         skb_dst_drop(skb);
1570         dev_hold(skb->dev);
1571         __skb_queue_tail(&tbl->proxy_queue, skb);
1572         mod_timer(&tbl->proxy_timer, sched_next);
1573         spin_unlock(&tbl->proxy_queue.lock);
1574 }
1575 EXPORT_SYMBOL(pneigh_enqueue);
1576
1577 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1578                                                       struct net *net, int ifindex)
1579 {
1580         struct neigh_parms *p;
1581
1582         list_for_each_entry(p, &tbl->parms_list, list) {
1583                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1584                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1585                         return p;
1586         }
1587
1588         return NULL;
1589 }
1590
1591 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1592                                       struct neigh_table *tbl)
1593 {
1594         struct neigh_parms *p;
1595         struct net *net = dev_net(dev);
1596         const struct net_device_ops *ops = dev->netdev_ops;
1597
1598         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1599         if (p) {
1600                 p->tbl            = tbl;
1601                 refcount_set(&p->refcnt, 1);
1602                 p->reachable_time =
1603                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1604                 dev_hold(dev);
1605                 p->dev = dev;
1606                 write_pnet(&p->net, net);
1607                 p->sysctl_table = NULL;
1608
1609                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1610                         dev_put(dev);
1611                         kfree(p);
1612                         return NULL;
1613                 }
1614
1615                 write_lock_bh(&tbl->lock);
1616                 list_add(&p->list, &tbl->parms.list);
1617                 write_unlock_bh(&tbl->lock);
1618
1619                 neigh_parms_data_state_cleanall(p);
1620         }
1621         return p;
1622 }
1623 EXPORT_SYMBOL(neigh_parms_alloc);
1624
1625 static void neigh_rcu_free_parms(struct rcu_head *head)
1626 {
1627         struct neigh_parms *parms =
1628                 container_of(head, struct neigh_parms, rcu_head);
1629
1630         neigh_parms_put(parms);
1631 }
1632
1633 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1634 {
1635         if (!parms || parms == &tbl->parms)
1636                 return;
1637         write_lock_bh(&tbl->lock);
1638         list_del(&parms->list);
1639         parms->dead = 1;
1640         write_unlock_bh(&tbl->lock);
1641         if (parms->dev)
1642                 dev_put(parms->dev);
1643         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1644 }
1645 EXPORT_SYMBOL(neigh_parms_release);
1646
1647 static void neigh_parms_destroy(struct neigh_parms *parms)
1648 {
1649         kfree(parms);
1650 }
1651
1652 static struct lock_class_key neigh_table_proxy_queue_class;
1653
1654 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1655
1656 void neigh_table_init(int index, struct neigh_table *tbl)
1657 {
1658         unsigned long now = jiffies;
1659         unsigned long phsize;
1660
1661         INIT_LIST_HEAD(&tbl->parms_list);
1662         INIT_LIST_HEAD(&tbl->gc_list);
1663         list_add(&tbl->parms.list, &tbl->parms_list);
1664         write_pnet(&tbl->parms.net, &init_net);
1665         refcount_set(&tbl->parms.refcnt, 1);
1666         tbl->parms.reachable_time =
1667                           neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1668
1669         tbl->stats = alloc_percpu(struct neigh_statistics);
1670         if (!tbl->stats)
1671                 panic("cannot create neighbour cache statistics");
1672
1673 #ifdef CONFIG_PROC_FS
1674         if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1675                               &neigh_stat_seq_ops, tbl))
1676                 panic("cannot create neighbour proc dir entry");
1677 #endif
1678
1679         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1680
1681         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1682         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1683
1684         if (!tbl->nht || !tbl->phash_buckets)
1685                 panic("cannot allocate neighbour cache hashes");
1686
1687         if (!tbl->entry_size)
1688                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1689                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1690         else
1691                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1692
1693         rwlock_init(&tbl->lock);
1694         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1695         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1696                         tbl->parms.reachable_time);
1697         timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1698         skb_queue_head_init_class(&tbl->proxy_queue,
1699                         &neigh_table_proxy_queue_class);
1700
1701         tbl->last_flush = now;
1702         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1703
1704         neigh_tables[index] = tbl;
1705 }
1706 EXPORT_SYMBOL(neigh_table_init);
1707
1708 int neigh_table_clear(int index, struct neigh_table *tbl)
1709 {
1710         neigh_tables[index] = NULL;
1711         /* It is not clean... Fix it to unload IPv6 module safely */
1712         cancel_delayed_work_sync(&tbl->gc_work);
1713         del_timer_sync(&tbl->proxy_timer);
1714         pneigh_queue_purge(&tbl->proxy_queue);
1715         neigh_ifdown(tbl, NULL);
1716         if (atomic_read(&tbl->entries))
1717                 pr_crit("neighbour leakage\n");
1718
1719         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1720                  neigh_hash_free_rcu);
1721         tbl->nht = NULL;
1722
1723         kfree(tbl->phash_buckets);
1724         tbl->phash_buckets = NULL;
1725
1726         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1727
1728         free_percpu(tbl->stats);
1729         tbl->stats = NULL;
1730
1731         return 0;
1732 }
1733 EXPORT_SYMBOL(neigh_table_clear);
1734
1735 static struct neigh_table *neigh_find_table(int family)
1736 {
1737         struct neigh_table *tbl = NULL;
1738
1739         switch (family) {
1740         case AF_INET:
1741                 tbl = neigh_tables[NEIGH_ARP_TABLE];
1742                 break;
1743         case AF_INET6:
1744                 tbl = neigh_tables[NEIGH_ND_TABLE];
1745                 break;
1746         case AF_DECnet:
1747                 tbl = neigh_tables[NEIGH_DN_TABLE];
1748                 break;
1749         }
1750
1751         return tbl;
1752 }
1753
1754 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1755                         struct netlink_ext_ack *extack)
1756 {
1757         struct net *net = sock_net(skb->sk);
1758         struct ndmsg *ndm;
1759         struct nlattr *dst_attr;
1760         struct neigh_table *tbl;
1761         struct neighbour *neigh;
1762         struct net_device *dev = NULL;
1763         int err = -EINVAL;
1764
1765         ASSERT_RTNL();
1766         if (nlmsg_len(nlh) < sizeof(*ndm))
1767                 goto out;
1768
1769         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1770         if (!dst_attr) {
1771                 NL_SET_ERR_MSG(extack, "Network address not specified");
1772                 goto out;
1773         }
1774
1775         ndm = nlmsg_data(nlh);
1776         if (ndm->ndm_ifindex) {
1777                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1778                 if (dev == NULL) {
1779                         err = -ENODEV;
1780                         goto out;
1781                 }
1782         }
1783
1784         tbl = neigh_find_table(ndm->ndm_family);
1785         if (tbl == NULL)
1786                 return -EAFNOSUPPORT;
1787
1788         if (nla_len(dst_attr) < (int)tbl->key_len) {
1789                 NL_SET_ERR_MSG(extack, "Invalid network address");
1790                 goto out;
1791         }
1792
1793         if (ndm->ndm_flags & NTF_PROXY) {
1794                 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1795                 goto out;
1796         }
1797
1798         if (dev == NULL)
1799                 goto out;
1800
1801         neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1802         if (neigh == NULL) {
1803                 err = -ENOENT;
1804                 goto out;
1805         }
1806
1807         err = __neigh_update(neigh, NULL, NUD_FAILED,
1808                              NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1809                              NETLINK_CB(skb).portid, extack);
1810         write_lock_bh(&tbl->lock);
1811         neigh_release(neigh);
1812         neigh_remove_one(neigh, tbl);
1813         write_unlock_bh(&tbl->lock);
1814
1815 out:
1816         return err;
1817 }
1818
1819 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1820                      struct netlink_ext_ack *extack)
1821 {
1822         int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1823                 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1824         struct net *net = sock_net(skb->sk);
1825         struct ndmsg *ndm;
1826         struct nlattr *tb[NDA_MAX+1];
1827         struct neigh_table *tbl;
1828         struct net_device *dev = NULL;
1829         struct neighbour *neigh;
1830         void *dst, *lladdr;
1831         u8 protocol = 0;
1832         int err;
1833
1834         ASSERT_RTNL();
1835         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
1836         if (err < 0)
1837                 goto out;
1838
1839         err = -EINVAL;
1840         if (!tb[NDA_DST]) {
1841                 NL_SET_ERR_MSG(extack, "Network address not specified");
1842                 goto out;
1843         }
1844
1845         ndm = nlmsg_data(nlh);
1846         if (ndm->ndm_ifindex) {
1847                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1848                 if (dev == NULL) {
1849                         err = -ENODEV;
1850                         goto out;
1851                 }
1852
1853                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
1854                         NL_SET_ERR_MSG(extack, "Invalid link address");
1855                         goto out;
1856                 }
1857         }
1858
1859         tbl = neigh_find_table(ndm->ndm_family);
1860         if (tbl == NULL)
1861                 return -EAFNOSUPPORT;
1862
1863         if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
1864                 NL_SET_ERR_MSG(extack, "Invalid network address");
1865                 goto out;
1866         }
1867
1868         dst = nla_data(tb[NDA_DST]);
1869         lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1870
1871         if (tb[NDA_PROTOCOL]) {
1872                 if (nla_len(tb[NDA_PROTOCOL]) != sizeof(u8)) {
1873                         NL_SET_ERR_MSG(extack, "Invalid protocol attribute");
1874                         goto out;
1875                 }
1876                 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
1877         }
1878
1879         if (ndm->ndm_flags & NTF_PROXY) {
1880                 struct pneigh_entry *pn;
1881
1882                 err = -ENOBUFS;
1883                 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1884                 if (pn) {
1885                         pn->flags = ndm->ndm_flags;
1886                         if (protocol)
1887                                 pn->protocol = protocol;
1888                         err = 0;
1889                 }
1890                 goto out;
1891         }
1892
1893         if (!dev) {
1894                 NL_SET_ERR_MSG(extack, "Device not specified");
1895                 goto out;
1896         }
1897
1898         neigh = neigh_lookup(tbl, dst, dev);
1899         if (neigh == NULL) {
1900                 bool exempt_from_gc;
1901
1902                 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1903                         err = -ENOENT;
1904                         goto out;
1905                 }
1906
1907                 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
1908                                  ndm->ndm_flags & NTF_EXT_LEARNED;
1909                 neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true);
1910                 if (IS_ERR(neigh)) {
1911                         err = PTR_ERR(neigh);
1912                         goto out;
1913                 }
1914         } else {
1915                 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1916                         err = -EEXIST;
1917                         neigh_release(neigh);
1918                         goto out;
1919                 }
1920
1921                 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1922                         flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
1923                                    NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1924         }
1925
1926         if (ndm->ndm_flags & NTF_EXT_LEARNED)
1927                 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1928
1929         if (ndm->ndm_flags & NTF_ROUTER)
1930                 flags |= NEIGH_UPDATE_F_ISROUTER;
1931
1932         if (ndm->ndm_flags & NTF_USE) {
1933                 neigh_event_send(neigh, NULL);
1934                 err = 0;
1935         } else
1936                 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1937                                      NETLINK_CB(skb).portid, extack);
1938
1939         if (protocol)
1940                 neigh->protocol = protocol;
1941
1942         neigh_release(neigh);
1943
1944 out:
1945         return err;
1946 }
1947
1948 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1949 {
1950         struct nlattr *nest;
1951
1952         nest = nla_nest_start(skb, NDTA_PARMS);
1953         if (nest == NULL)
1954                 return -ENOBUFS;
1955
1956         if ((parms->dev &&
1957              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1958             nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1959             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1960                         NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1961             /* approximative value for deprecated QUEUE_LEN (in packets) */
1962             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1963                         NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1964             nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1965             nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1966             nla_put_u32(skb, NDTPA_UCAST_PROBES,
1967                         NEIGH_VAR(parms, UCAST_PROBES)) ||
1968             nla_put_u32(skb, NDTPA_MCAST_PROBES,
1969                         NEIGH_VAR(parms, MCAST_PROBES)) ||
1970             nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1971                         NEIGH_VAR(parms, MCAST_REPROBES)) ||
1972             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1973                           NDTPA_PAD) ||
1974             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1975                           NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1976             nla_put_msecs(skb, NDTPA_GC_STALETIME,
1977                           NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1978             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1979                           NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1980             nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1981                           NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1982             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1983                           NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1984             nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1985                           NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
1986             nla_put_msecs(skb, NDTPA_LOCKTIME,
1987                           NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1988                 goto nla_put_failure;
1989         return nla_nest_end(skb, nest);
1990
1991 nla_put_failure:
1992         nla_nest_cancel(skb, nest);
1993         return -EMSGSIZE;
1994 }
1995
1996 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1997                               u32 pid, u32 seq, int type, int flags)
1998 {
1999         struct nlmsghdr *nlh;
2000         struct ndtmsg *ndtmsg;
2001
2002         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2003         if (nlh == NULL)
2004                 return -EMSGSIZE;
2005
2006         ndtmsg = nlmsg_data(nlh);
2007
2008         read_lock_bh(&tbl->lock);
2009         ndtmsg->ndtm_family = tbl->family;
2010         ndtmsg->ndtm_pad1   = 0;
2011         ndtmsg->ndtm_pad2   = 0;
2012
2013         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2014             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
2015             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
2016             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
2017             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
2018                 goto nla_put_failure;
2019         {
2020                 unsigned long now = jiffies;
2021                 unsigned int flush_delta = now - tbl->last_flush;
2022                 unsigned int rand_delta = now - tbl->last_rand;
2023                 struct neigh_hash_table *nht;
2024                 struct ndt_config ndc = {
2025                         .ndtc_key_len           = tbl->key_len,
2026                         .ndtc_entry_size        = tbl->entry_size,
2027                         .ndtc_entries           = atomic_read(&tbl->entries),
2028                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
2029                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
2030                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
2031                 };
2032
2033                 rcu_read_lock_bh();
2034                 nht = rcu_dereference_bh(tbl->nht);
2035                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2036                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2037                 rcu_read_unlock_bh();
2038
2039                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2040                         goto nla_put_failure;
2041         }
2042
2043         {
2044                 int cpu;
2045                 struct ndt_stats ndst;
2046
2047                 memset(&ndst, 0, sizeof(ndst));
2048
2049                 for_each_possible_cpu(cpu) {
2050                         struct neigh_statistics *st;
2051
2052                         st = per_cpu_ptr(tbl->stats, cpu);
2053                         ndst.ndts_allocs                += st->allocs;
2054                         ndst.ndts_destroys              += st->destroys;
2055                         ndst.ndts_hash_grows            += st->hash_grows;
2056                         ndst.ndts_res_failed            += st->res_failed;
2057                         ndst.ndts_lookups               += st->lookups;
2058                         ndst.ndts_hits                  += st->hits;
2059                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
2060                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
2061                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
2062                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
2063                         ndst.ndts_table_fulls           += st->table_fulls;
2064                 }
2065
2066                 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2067                                   NDTA_PAD))
2068                         goto nla_put_failure;
2069         }
2070
2071         BUG_ON(tbl->parms.dev);
2072         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2073                 goto nla_put_failure;
2074
2075         read_unlock_bh(&tbl->lock);
2076         nlmsg_end(skb, nlh);
2077         return 0;
2078
2079 nla_put_failure:
2080         read_unlock_bh(&tbl->lock);
2081         nlmsg_cancel(skb, nlh);
2082         return -EMSGSIZE;
2083 }
2084
2085 static int neightbl_fill_param_info(struct sk_buff *skb,
2086                                     struct neigh_table *tbl,
2087                                     struct neigh_parms *parms,
2088                                     u32 pid, u32 seq, int type,
2089                                     unsigned int flags)
2090 {
2091         struct ndtmsg *ndtmsg;
2092         struct nlmsghdr *nlh;
2093
2094         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2095         if (nlh == NULL)
2096                 return -EMSGSIZE;
2097
2098         ndtmsg = nlmsg_data(nlh);
2099
2100         read_lock_bh(&tbl->lock);
2101         ndtmsg->ndtm_family = tbl->family;
2102         ndtmsg->ndtm_pad1   = 0;
2103         ndtmsg->ndtm_pad2   = 0;
2104
2105         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2106             neightbl_fill_parms(skb, parms) < 0)
2107                 goto errout;
2108
2109         read_unlock_bh(&tbl->lock);
2110         nlmsg_end(skb, nlh);
2111         return 0;
2112 errout:
2113         read_unlock_bh(&tbl->lock);
2114         nlmsg_cancel(skb, nlh);
2115         return -EMSGSIZE;
2116 }
2117
2118 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2119         [NDTA_NAME]             = { .type = NLA_STRING },
2120         [NDTA_THRESH1]          = { .type = NLA_U32 },
2121         [NDTA_THRESH2]          = { .type = NLA_U32 },
2122         [NDTA_THRESH3]          = { .type = NLA_U32 },
2123         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
2124         [NDTA_PARMS]            = { .type = NLA_NESTED },
2125 };
2126
2127 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2128         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
2129         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
2130         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
2131         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
2132         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
2133         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
2134         [NDTPA_MCAST_REPROBES]          = { .type = NLA_U32 },
2135         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
2136         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
2137         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
2138         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
2139         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
2140         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
2141         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
2142 };
2143
2144 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2145                         struct netlink_ext_ack *extack)
2146 {
2147         struct net *net = sock_net(skb->sk);
2148         struct neigh_table *tbl;
2149         struct ndtmsg *ndtmsg;
2150         struct nlattr *tb[NDTA_MAX+1];
2151         bool found = false;
2152         int err, tidx;
2153
2154         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2155                           nl_neightbl_policy, extack);
2156         if (err < 0)
2157                 goto errout;
2158
2159         if (tb[NDTA_NAME] == NULL) {
2160                 err = -EINVAL;
2161                 goto errout;
2162         }
2163
2164         ndtmsg = nlmsg_data(nlh);
2165
2166         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2167                 tbl = neigh_tables[tidx];
2168                 if (!tbl)
2169                         continue;
2170                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2171                         continue;
2172                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2173                         found = true;
2174                         break;
2175                 }
2176         }
2177
2178         if (!found)
2179                 return -ENOENT;
2180
2181         /*
2182          * We acquire tbl->lock to be nice to the periodic timers and
2183          * make sure they always see a consistent set of values.
2184          */
2185         write_lock_bh(&tbl->lock);
2186
2187         if (tb[NDTA_PARMS]) {
2188                 struct nlattr *tbp[NDTPA_MAX+1];
2189                 struct neigh_parms *p;
2190                 int i, ifindex = 0;
2191
2192                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2193                                        nl_ntbl_parm_policy, extack);
2194                 if (err < 0)
2195                         goto errout_tbl_lock;
2196
2197                 if (tbp[NDTPA_IFINDEX])
2198                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2199
2200                 p = lookup_neigh_parms(tbl, net, ifindex);
2201                 if (p == NULL) {
2202                         err = -ENOENT;
2203                         goto errout_tbl_lock;
2204                 }
2205
2206                 for (i = 1; i <= NDTPA_MAX; i++) {
2207                         if (tbp[i] == NULL)
2208                                 continue;
2209
2210                         switch (i) {
2211                         case NDTPA_QUEUE_LEN:
2212                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2213                                               nla_get_u32(tbp[i]) *
2214                                               SKB_TRUESIZE(ETH_FRAME_LEN));
2215                                 break;
2216                         case NDTPA_QUEUE_LENBYTES:
2217                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2218                                               nla_get_u32(tbp[i]));
2219                                 break;
2220                         case NDTPA_PROXY_QLEN:
2221                                 NEIGH_VAR_SET(p, PROXY_QLEN,
2222                                               nla_get_u32(tbp[i]));
2223                                 break;
2224                         case NDTPA_APP_PROBES:
2225                                 NEIGH_VAR_SET(p, APP_PROBES,
2226                                               nla_get_u32(tbp[i]));
2227                                 break;
2228                         case NDTPA_UCAST_PROBES:
2229                                 NEIGH_VAR_SET(p, UCAST_PROBES,
2230                                               nla_get_u32(tbp[i]));
2231                                 break;
2232                         case NDTPA_MCAST_PROBES:
2233                                 NEIGH_VAR_SET(p, MCAST_PROBES,
2234                                               nla_get_u32(tbp[i]));
2235                                 break;
2236                         case NDTPA_MCAST_REPROBES:
2237                                 NEIGH_VAR_SET(p, MCAST_REPROBES,
2238                                               nla_get_u32(tbp[i]));
2239                                 break;
2240                         case NDTPA_BASE_REACHABLE_TIME:
2241                                 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2242                                               nla_get_msecs(tbp[i]));
2243                                 /* update reachable_time as well, otherwise, the change will
2244                                  * only be effective after the next time neigh_periodic_work
2245                                  * decides to recompute it (can be multiple minutes)
2246                                  */
2247                                 p->reachable_time =
2248                                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2249                                 break;
2250                         case NDTPA_GC_STALETIME:
2251                                 NEIGH_VAR_SET(p, GC_STALETIME,
2252                                               nla_get_msecs(tbp[i]));
2253                                 break;
2254                         case NDTPA_DELAY_PROBE_TIME:
2255                                 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2256                                               nla_get_msecs(tbp[i]));
2257                                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2258                                 break;
2259                         case NDTPA_RETRANS_TIME:
2260                                 NEIGH_VAR_SET(p, RETRANS_TIME,
2261                                               nla_get_msecs(tbp[i]));
2262                                 break;
2263                         case NDTPA_ANYCAST_DELAY:
2264                                 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2265                                               nla_get_msecs(tbp[i]));
2266                                 break;
2267                         case NDTPA_PROXY_DELAY:
2268                                 NEIGH_VAR_SET(p, PROXY_DELAY,
2269                                               nla_get_msecs(tbp[i]));
2270                                 break;
2271                         case NDTPA_LOCKTIME:
2272                                 NEIGH_VAR_SET(p, LOCKTIME,
2273                                               nla_get_msecs(tbp[i]));
2274                                 break;
2275                         }
2276                 }
2277         }
2278
2279         err = -ENOENT;
2280         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2281              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2282             !net_eq(net, &init_net))
2283                 goto errout_tbl_lock;
2284
2285         if (tb[NDTA_THRESH1])
2286                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2287
2288         if (tb[NDTA_THRESH2])
2289                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2290
2291         if (tb[NDTA_THRESH3])
2292                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2293
2294         if (tb[NDTA_GC_INTERVAL])
2295                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2296
2297         err = 0;
2298
2299 errout_tbl_lock:
2300         write_unlock_bh(&tbl->lock);
2301 errout:
2302         return err;
2303 }
2304
2305 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2306                                     struct netlink_ext_ack *extack)
2307 {
2308         struct ndtmsg *ndtm;
2309
2310         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2311                 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2312                 return -EINVAL;
2313         }
2314
2315         ndtm = nlmsg_data(nlh);
2316         if (ndtm->ndtm_pad1  || ndtm->ndtm_pad2) {
2317                 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2318                 return -EINVAL;
2319         }
2320
2321         if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2322                 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2323                 return -EINVAL;
2324         }
2325
2326         return 0;
2327 }
2328
2329 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2330 {
2331         const struct nlmsghdr *nlh = cb->nlh;
2332         struct net *net = sock_net(skb->sk);
2333         int family, tidx, nidx = 0;
2334         int tbl_skip = cb->args[0];
2335         int neigh_skip = cb->args[1];
2336         struct neigh_table *tbl;
2337
2338         if (cb->strict_check) {
2339                 int err = neightbl_valid_dump_info(nlh, cb->extack);
2340
2341                 if (err < 0)
2342                         return err;
2343         }
2344
2345         family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2346
2347         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2348                 struct neigh_parms *p;
2349
2350                 tbl = neigh_tables[tidx];
2351                 if (!tbl)
2352                         continue;
2353
2354                 if (tidx < tbl_skip || (family && tbl->family != family))
2355                         continue;
2356
2357                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2358                                        nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2359                                        NLM_F_MULTI) < 0)
2360                         break;
2361
2362                 nidx = 0;
2363                 p = list_next_entry(&tbl->parms, list);
2364                 list_for_each_entry_from(p, &tbl->parms_list, list) {
2365                         if (!net_eq(neigh_parms_net(p), net))
2366                                 continue;
2367
2368                         if (nidx < neigh_skip)
2369                                 goto next;
2370
2371                         if (neightbl_fill_param_info(skb, tbl, p,
2372                                                      NETLINK_CB(cb->skb).portid,
2373                                                      nlh->nlmsg_seq,
2374                                                      RTM_NEWNEIGHTBL,
2375                                                      NLM_F_MULTI) < 0)
2376                                 goto out;
2377                 next:
2378                         nidx++;
2379                 }
2380
2381                 neigh_skip = 0;
2382         }
2383 out:
2384         cb->args[0] = tidx;
2385         cb->args[1] = nidx;
2386
2387         return skb->len;
2388 }
2389
2390 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2391                            u32 pid, u32 seq, int type, unsigned int flags)
2392 {
2393         unsigned long now = jiffies;
2394         struct nda_cacheinfo ci;
2395         struct nlmsghdr *nlh;
2396         struct ndmsg *ndm;
2397
2398         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2399         if (nlh == NULL)
2400                 return -EMSGSIZE;
2401
2402         ndm = nlmsg_data(nlh);
2403         ndm->ndm_family  = neigh->ops->family;
2404         ndm->ndm_pad1    = 0;
2405         ndm->ndm_pad2    = 0;
2406         ndm->ndm_flags   = neigh->flags;
2407         ndm->ndm_type    = neigh->type;
2408         ndm->ndm_ifindex = neigh->dev->ifindex;
2409
2410         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2411                 goto nla_put_failure;
2412
2413         read_lock_bh(&neigh->lock);
2414         ndm->ndm_state   = neigh->nud_state;
2415         if (neigh->nud_state & NUD_VALID) {
2416                 char haddr[MAX_ADDR_LEN];
2417
2418                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2419                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2420                         read_unlock_bh(&neigh->lock);
2421                         goto nla_put_failure;
2422                 }
2423         }
2424
2425         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2426         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2427         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2428         ci.ndm_refcnt    = refcount_read(&neigh->refcnt) - 1;
2429         read_unlock_bh(&neigh->lock);
2430
2431         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2432             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2433                 goto nla_put_failure;
2434
2435         if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2436                 goto nla_put_failure;
2437
2438         nlmsg_end(skb, nlh);
2439         return 0;
2440
2441 nla_put_failure:
2442         nlmsg_cancel(skb, nlh);
2443         return -EMSGSIZE;
2444 }
2445
2446 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2447                             u32 pid, u32 seq, int type, unsigned int flags,
2448                             struct neigh_table *tbl)
2449 {
2450         struct nlmsghdr *nlh;
2451         struct ndmsg *ndm;
2452
2453         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2454         if (nlh == NULL)
2455                 return -EMSGSIZE;
2456
2457         ndm = nlmsg_data(nlh);
2458         ndm->ndm_family  = tbl->family;
2459         ndm->ndm_pad1    = 0;
2460         ndm->ndm_pad2    = 0;
2461         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2462         ndm->ndm_type    = RTN_UNICAST;
2463         ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2464         ndm->ndm_state   = NUD_NONE;
2465
2466         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2467                 goto nla_put_failure;
2468
2469         if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2470                 goto nla_put_failure;
2471
2472         nlmsg_end(skb, nlh);
2473         return 0;
2474
2475 nla_put_failure:
2476         nlmsg_cancel(skb, nlh);
2477         return -EMSGSIZE;
2478 }
2479
2480 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2481 {
2482         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2483         __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2484 }
2485
2486 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2487 {
2488         struct net_device *master;
2489
2490         if (!master_idx)
2491                 return false;
2492
2493         master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2494         if (!master || master->ifindex != master_idx)
2495                 return true;
2496
2497         return false;
2498 }
2499
2500 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2501 {
2502         if (filter_idx && (!dev || dev->ifindex != filter_idx))
2503                 return true;
2504
2505         return false;
2506 }
2507
2508 struct neigh_dump_filter {
2509         int master_idx;
2510         int dev_idx;
2511 };
2512
2513 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2514                             struct netlink_callback *cb,
2515                             struct neigh_dump_filter *filter)
2516 {
2517         struct net *net = sock_net(skb->sk);
2518         struct neighbour *n;
2519         int rc, h, s_h = cb->args[1];
2520         int idx, s_idx = idx = cb->args[2];
2521         struct neigh_hash_table *nht;
2522         unsigned int flags = NLM_F_MULTI;
2523
2524         if (filter->dev_idx || filter->master_idx)
2525                 flags |= NLM_F_DUMP_FILTERED;
2526
2527         rcu_read_lock_bh();
2528         nht = rcu_dereference_bh(tbl->nht);
2529
2530         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2531                 if (h > s_h)
2532                         s_idx = 0;
2533                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2534                      n != NULL;
2535                      n = rcu_dereference_bh(n->next)) {
2536                         if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2537                                 goto next;
2538                         if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2539                             neigh_master_filtered(n->dev, filter->master_idx))
2540                                 goto next;
2541                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2542                                             cb->nlh->nlmsg_seq,
2543                                             RTM_NEWNEIGH,
2544                                             flags) < 0) {
2545                                 rc = -1;
2546                                 goto out;
2547                         }
2548 next:
2549                         idx++;
2550                 }
2551         }
2552         rc = skb->len;
2553 out:
2554         rcu_read_unlock_bh();
2555         cb->args[1] = h;
2556         cb->args[2] = idx;
2557         return rc;
2558 }
2559
2560 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2561                              struct netlink_callback *cb,
2562                              struct neigh_dump_filter *filter)
2563 {
2564         struct pneigh_entry *n;
2565         struct net *net = sock_net(skb->sk);
2566         int rc, h, s_h = cb->args[3];
2567         int idx, s_idx = idx = cb->args[4];
2568         unsigned int flags = NLM_F_MULTI;
2569
2570         if (filter->dev_idx || filter->master_idx)
2571                 flags |= NLM_F_DUMP_FILTERED;
2572
2573         read_lock_bh(&tbl->lock);
2574
2575         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2576                 if (h > s_h)
2577                         s_idx = 0;
2578                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2579                         if (idx < s_idx || pneigh_net(n) != net)
2580                                 goto next;
2581                         if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2582                             neigh_master_filtered(n->dev, filter->master_idx))
2583                                 goto next;
2584                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2585                                             cb->nlh->nlmsg_seq,
2586                                             RTM_NEWNEIGH, flags, tbl) < 0) {
2587                                 read_unlock_bh(&tbl->lock);
2588                                 rc = -1;
2589                                 goto out;
2590                         }
2591                 next:
2592                         idx++;
2593                 }
2594         }
2595
2596         read_unlock_bh(&tbl->lock);
2597         rc = skb->len;
2598 out:
2599         cb->args[3] = h;
2600         cb->args[4] = idx;
2601         return rc;
2602
2603 }
2604
2605 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2606                                 bool strict_check,
2607                                 struct neigh_dump_filter *filter,
2608                                 struct netlink_ext_ack *extack)
2609 {
2610         struct nlattr *tb[NDA_MAX + 1];
2611         int err, i;
2612
2613         if (strict_check) {
2614                 struct ndmsg *ndm;
2615
2616                 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2617                         NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2618                         return -EINVAL;
2619                 }
2620
2621                 ndm = nlmsg_data(nlh);
2622                 if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_ifindex ||
2623                     ndm->ndm_state || ndm->ndm_flags || ndm->ndm_type) {
2624                         NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2625                         return -EINVAL;
2626                 }
2627
2628                 err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2629                                          NULL, extack);
2630         } else {
2631                 err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2632                                   NULL, extack);
2633         }
2634         if (err < 0)
2635                 return err;
2636
2637         for (i = 0; i <= NDA_MAX; ++i) {
2638                 if (!tb[i])
2639                         continue;
2640
2641                 /* all new attributes should require strict_check */
2642                 switch (i) {
2643                 case NDA_IFINDEX:
2644                         if (nla_len(tb[i]) != sizeof(u32)) {
2645                                 NL_SET_ERR_MSG(extack, "Invalid IFINDEX attribute in neighbor dump request");
2646                                 return -EINVAL;
2647                         }
2648                         filter->dev_idx = nla_get_u32(tb[i]);
2649                         break;
2650                 case NDA_MASTER:
2651                         if (nla_len(tb[i]) != sizeof(u32)) {
2652                                 NL_SET_ERR_MSG(extack, "Invalid MASTER attribute in neighbor dump request");
2653                                 return -EINVAL;
2654                         }
2655                         filter->master_idx = nla_get_u32(tb[i]);
2656                         break;
2657                 default:
2658                         if (strict_check) {
2659                                 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2660                                 return -EINVAL;
2661                         }
2662                 }
2663         }
2664
2665         return 0;
2666 }
2667
2668 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2669 {
2670         const struct nlmsghdr *nlh = cb->nlh;
2671         struct neigh_dump_filter filter = {};
2672         struct neigh_table *tbl;
2673         int t, family, s_t;
2674         int proxy = 0;
2675         int err;
2676
2677         family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2678
2679         /* check for full ndmsg structure presence, family member is
2680          * the same for both structures
2681          */
2682         if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2683             ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2684                 proxy = 1;
2685
2686         err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2687         if (err < 0 && cb->strict_check)
2688                 return err;
2689
2690         s_t = cb->args[0];
2691
2692         for (t = 0; t < NEIGH_NR_TABLES; t++) {
2693                 tbl = neigh_tables[t];
2694
2695                 if (!tbl)
2696                         continue;
2697                 if (t < s_t || (family && tbl->family != family))
2698                         continue;
2699                 if (t > s_t)
2700                         memset(&cb->args[1], 0, sizeof(cb->args) -
2701                                                 sizeof(cb->args[0]));
2702                 if (proxy)
2703                         err = pneigh_dump_table(tbl, skb, cb, &filter);
2704                 else
2705                         err = neigh_dump_table(tbl, skb, cb, &filter);
2706                 if (err < 0)
2707                         break;
2708         }
2709
2710         cb->args[0] = t;
2711         return skb->len;
2712 }
2713
2714 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2715 {
2716         int chain;
2717         struct neigh_hash_table *nht;
2718
2719         rcu_read_lock_bh();
2720         nht = rcu_dereference_bh(tbl->nht);
2721
2722         read_lock(&tbl->lock); /* avoid resizes */
2723         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2724                 struct neighbour *n;
2725
2726                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2727                      n != NULL;
2728                      n = rcu_dereference_bh(n->next))
2729                         cb(n, cookie);
2730         }
2731         read_unlock(&tbl->lock);
2732         rcu_read_unlock_bh();
2733 }
2734 EXPORT_SYMBOL(neigh_for_each);
2735
2736 /* The tbl->lock must be held as a writer and BH disabled. */
2737 void __neigh_for_each_release(struct neigh_table *tbl,
2738                               int (*cb)(struct neighbour *))
2739 {
2740         int chain;
2741         struct neigh_hash_table *nht;
2742
2743         nht = rcu_dereference_protected(tbl->nht,
2744                                         lockdep_is_held(&tbl->lock));
2745         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2746                 struct neighbour *n;
2747                 struct neighbour __rcu **np;
2748
2749                 np = &nht->hash_buckets[chain];
2750                 while ((n = rcu_dereference_protected(*np,
2751                                         lockdep_is_held(&tbl->lock))) != NULL) {
2752                         int release;
2753
2754                         write_lock(&n->lock);
2755                         release = cb(n);
2756                         if (release) {
2757                                 rcu_assign_pointer(*np,
2758                                         rcu_dereference_protected(n->next,
2759                                                 lockdep_is_held(&tbl->lock)));
2760                                 neigh_mark_dead(n);
2761                         } else
2762                                 np = &n->next;
2763                         write_unlock(&n->lock);
2764                         if (release)
2765                                 neigh_cleanup_and_release(n);
2766                 }
2767         }
2768 }
2769 EXPORT_SYMBOL(__neigh_for_each_release);
2770
2771 int neigh_xmit(int index, struct net_device *dev,
2772                const void *addr, struct sk_buff *skb)
2773 {
2774         int err = -EAFNOSUPPORT;
2775         if (likely(index < NEIGH_NR_TABLES)) {
2776                 struct neigh_table *tbl;
2777                 struct neighbour *neigh;
2778
2779                 tbl = neigh_tables[index];
2780                 if (!tbl)
2781                         goto out;
2782                 rcu_read_lock_bh();
2783                 neigh = __neigh_lookup_noref(tbl, addr, dev);
2784                 if (!neigh)
2785                         neigh = __neigh_create(tbl, addr, dev, false);
2786                 err = PTR_ERR(neigh);
2787                 if (IS_ERR(neigh)) {
2788                         rcu_read_unlock_bh();
2789                         goto out_kfree_skb;
2790                 }
2791                 err = neigh->output(neigh, skb);
2792                 rcu_read_unlock_bh();
2793         }
2794         else if (index == NEIGH_LINK_TABLE) {
2795                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2796                                       addr, NULL, skb->len);
2797                 if (err < 0)
2798                         goto out_kfree_skb;
2799                 err = dev_queue_xmit(skb);
2800         }
2801 out:
2802         return err;
2803 out_kfree_skb:
2804         kfree_skb(skb);
2805         goto out;
2806 }
2807 EXPORT_SYMBOL(neigh_xmit);
2808
2809 #ifdef CONFIG_PROC_FS
2810
2811 static struct neighbour *neigh_get_first(struct seq_file *seq)
2812 {
2813         struct neigh_seq_state *state = seq->private;
2814         struct net *net = seq_file_net(seq);
2815         struct neigh_hash_table *nht = state->nht;
2816         struct neighbour *n = NULL;
2817         int bucket = state->bucket;
2818
2819         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2820         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2821                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2822
2823                 while (n) {
2824                         if (!net_eq(dev_net(n->dev), net))
2825                                 goto next;
2826                         if (state->neigh_sub_iter) {
2827                                 loff_t fakep = 0;
2828                                 void *v;
2829
2830                                 v = state->neigh_sub_iter(state, n, &fakep);
2831                                 if (!v)
2832                                         goto next;
2833                         }
2834                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2835                                 break;
2836                         if (n->nud_state & ~NUD_NOARP)
2837                                 break;
2838 next:
2839                         n = rcu_dereference_bh(n->next);
2840                 }
2841
2842                 if (n)
2843                         break;
2844         }
2845         state->bucket = bucket;
2846
2847         return n;
2848 }
2849
2850 static struct neighbour *neigh_get_next(struct seq_file *seq,
2851                                         struct neighbour *n,
2852                                         loff_t *pos)
2853 {
2854         struct neigh_seq_state *state = seq->private;
2855         struct net *net = seq_file_net(seq);
2856         struct neigh_hash_table *nht = state->nht;
2857
2858         if (state->neigh_sub_iter) {
2859                 void *v = state->neigh_sub_iter(state, n, pos);
2860                 if (v)
2861                         return n;
2862         }
2863         n = rcu_dereference_bh(n->next);
2864
2865         while (1) {
2866                 while (n) {
2867                         if (!net_eq(dev_net(n->dev), net))
2868                                 goto next;
2869                         if (state->neigh_sub_iter) {
2870                                 void *v = state->neigh_sub_iter(state, n, pos);
2871                                 if (v)
2872                                         return n;
2873                                 goto next;
2874                         }
2875                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2876                                 break;
2877
2878                         if (n->nud_state & ~NUD_NOARP)
2879                                 break;
2880 next:
2881                         n = rcu_dereference_bh(n->next);
2882                 }
2883
2884                 if (n)
2885                         break;
2886
2887                 if (++state->bucket >= (1 << nht->hash_shift))
2888                         break;
2889
2890                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2891         }
2892
2893         if (n && pos)
2894                 --(*pos);
2895         return n;
2896 }
2897
2898 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2899 {
2900         struct neighbour *n = neigh_get_first(seq);
2901
2902         if (n) {
2903                 --(*pos);
2904                 while (*pos) {
2905                         n = neigh_get_next(seq, n, pos);
2906                         if (!n)
2907                                 break;
2908                 }
2909         }
2910         return *pos ? NULL : n;
2911 }
2912
2913 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2914 {
2915         struct neigh_seq_state *state = seq->private;
2916         struct net *net = seq_file_net(seq);
2917         struct neigh_table *tbl = state->tbl;
2918         struct pneigh_entry *pn = NULL;
2919         int bucket = state->bucket;
2920
2921         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2922         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2923                 pn = tbl->phash_buckets[bucket];
2924                 while (pn && !net_eq(pneigh_net(pn), net))
2925                         pn = pn->next;
2926                 if (pn)
2927                         break;
2928         }
2929         state->bucket = bucket;
2930
2931         return pn;
2932 }
2933
2934 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2935                                             struct pneigh_entry *pn,
2936                                             loff_t *pos)
2937 {
2938         struct neigh_seq_state *state = seq->private;
2939         struct net *net = seq_file_net(seq);
2940         struct neigh_table *tbl = state->tbl;
2941
2942         do {
2943                 pn = pn->next;
2944         } while (pn && !net_eq(pneigh_net(pn), net));
2945
2946         while (!pn) {
2947                 if (++state->bucket > PNEIGH_HASHMASK)
2948                         break;
2949                 pn = tbl->phash_buckets[state->bucket];
2950                 while (pn && !net_eq(pneigh_net(pn), net))
2951                         pn = pn->next;
2952                 if (pn)
2953                         break;
2954         }
2955
2956         if (pn && pos)
2957                 --(*pos);
2958
2959         return pn;
2960 }
2961
2962 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2963 {
2964         struct pneigh_entry *pn = pneigh_get_first(seq);
2965
2966         if (pn) {
2967                 --(*pos);
2968                 while (*pos) {
2969                         pn = pneigh_get_next(seq, pn, pos);
2970                         if (!pn)
2971                                 break;
2972                 }
2973         }
2974         return *pos ? NULL : pn;
2975 }
2976
2977 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2978 {
2979         struct neigh_seq_state *state = seq->private;
2980         void *rc;
2981         loff_t idxpos = *pos;
2982
2983         rc = neigh_get_idx(seq, &idxpos);
2984         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2985                 rc = pneigh_get_idx(seq, &idxpos);
2986
2987         return rc;
2988 }
2989
2990 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2991         __acquires(rcu_bh)
2992 {
2993         struct neigh_seq_state *state = seq->private;
2994
2995         state->tbl = tbl;
2996         state->bucket = 0;
2997         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2998
2999         rcu_read_lock_bh();
3000         state->nht = rcu_dereference_bh(tbl->nht);
3001
3002         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3003 }
3004 EXPORT_SYMBOL(neigh_seq_start);
3005
3006 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3007 {
3008         struct neigh_seq_state *state;
3009         void *rc;
3010
3011         if (v == SEQ_START_TOKEN) {
3012                 rc = neigh_get_first(seq);
3013                 goto out;
3014         }
3015
3016         state = seq->private;
3017         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3018                 rc = neigh_get_next(seq, v, NULL);
3019                 if (rc)
3020                         goto out;
3021                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3022                         rc = pneigh_get_first(seq);
3023         } else {
3024                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3025                 rc = pneigh_get_next(seq, v, NULL);
3026         }
3027 out:
3028         ++(*pos);
3029         return rc;
3030 }
3031 EXPORT_SYMBOL(neigh_seq_next);
3032
3033 void neigh_seq_stop(struct seq_file *seq, void *v)
3034         __releases(rcu_bh)
3035 {
3036         rcu_read_unlock_bh();
3037 }
3038 EXPORT_SYMBOL(neigh_seq_stop);
3039
3040 /* statistics via seq_file */
3041
3042 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3043 {
3044         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3045         int cpu;
3046
3047         if (*pos == 0)
3048                 return SEQ_START_TOKEN;
3049
3050         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3051                 if (!cpu_possible(cpu))
3052                         continue;
3053                 *pos = cpu+1;
3054                 return per_cpu_ptr(tbl->stats, cpu);
3055         }
3056         return NULL;
3057 }
3058
3059 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3060 {
3061         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3062         int cpu;
3063
3064         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3065                 if (!cpu_possible(cpu))
3066                         continue;
3067                 *pos = cpu+1;
3068                 return per_cpu_ptr(tbl->stats, cpu);
3069         }
3070         return NULL;
3071 }
3072
3073 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3074 {
3075
3076 }
3077
3078 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3079 {
3080         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3081         struct neigh_statistics *st = v;
3082
3083         if (v == SEQ_START_TOKEN) {
3084                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3085                 return 0;
3086         }
3087
3088         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
3089                         "%08lx %08lx  %08lx %08lx %08lx %08lx\n",
3090                    atomic_read(&tbl->entries),
3091
3092                    st->allocs,
3093                    st->destroys,
3094                    st->hash_grows,
3095
3096                    st->lookups,
3097                    st->hits,
3098
3099                    st->res_failed,
3100
3101                    st->rcv_probes_mcast,
3102                    st->rcv_probes_ucast,
3103
3104                    st->periodic_gc_runs,
3105                    st->forced_gc_runs,
3106                    st->unres_discards,
3107                    st->table_fulls
3108                    );
3109
3110         return 0;
3111 }
3112
3113 static const struct seq_operations neigh_stat_seq_ops = {
3114         .start  = neigh_stat_seq_start,
3115         .next   = neigh_stat_seq_next,
3116         .stop   = neigh_stat_seq_stop,
3117         .show   = neigh_stat_seq_show,
3118 };
3119 #endif /* CONFIG_PROC_FS */
3120
3121 static inline size_t neigh_nlmsg_size(void)
3122 {
3123         return NLMSG_ALIGN(sizeof(struct ndmsg))
3124                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
3125                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
3126                + nla_total_size(sizeof(struct nda_cacheinfo))
3127                + nla_total_size(4)  /* NDA_PROBES */
3128                + nla_total_size(1); /* NDA_PROTOCOL */
3129 }
3130
3131 static void __neigh_notify(struct neighbour *n, int type, int flags,
3132                            u32 pid)
3133 {
3134         struct net *net = dev_net(n->dev);
3135         struct sk_buff *skb;
3136         int err = -ENOBUFS;
3137
3138         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3139         if (skb == NULL)
3140                 goto errout;
3141
3142         err = neigh_fill_info(skb, n, pid, 0, type, flags);
3143         if (err < 0) {
3144                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3145                 WARN_ON(err == -EMSGSIZE);
3146                 kfree_skb(skb);
3147                 goto errout;
3148         }
3149         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3150         return;
3151 errout:
3152         if (err < 0)
3153                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3154 }
3155
3156 void neigh_app_ns(struct neighbour *n)
3157 {
3158         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3159 }
3160 EXPORT_SYMBOL(neigh_app_ns);
3161
3162 #ifdef CONFIG_SYSCTL
3163 static int zero;
3164 static int int_max = INT_MAX;
3165 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3166
3167 static int proc_unres_qlen(struct ctl_table *ctl, int write,
3168                            void __user *buffer, size_t *lenp, loff_t *ppos)
3169 {
3170         int size, ret;
3171         struct ctl_table tmp = *ctl;
3172
3173         tmp.extra1 = &zero;
3174         tmp.extra2 = &unres_qlen_max;
3175         tmp.data = &size;
3176
3177         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3178         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3179
3180         if (write && !ret)
3181                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3182         return ret;
3183 }
3184
3185 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
3186                                                    int family)
3187 {
3188         switch (family) {
3189         case AF_INET:
3190                 return __in_dev_arp_parms_get_rcu(dev);
3191         case AF_INET6:
3192                 return __in6_dev_nd_parms_get_rcu(dev);
3193         }
3194         return NULL;
3195 }
3196
3197 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3198                                   int index)
3199 {
3200         struct net_device *dev;
3201         int family = neigh_parms_family(p);
3202
3203         rcu_read_lock();
3204         for_each_netdev_rcu(net, dev) {
3205                 struct neigh_parms *dst_p =
3206                                 neigh_get_dev_parms_rcu(dev, family);
3207
3208                 if (dst_p && !test_bit(index, dst_p->data_state))
3209                         dst_p->data[index] = p->data[index];
3210         }
3211         rcu_read_unlock();
3212 }
3213
3214 static void neigh_proc_update(struct ctl_table *ctl, int write)
3215 {
3216         struct net_device *dev = ctl->extra1;
3217         struct neigh_parms *p = ctl->extra2;
3218         struct net *net = neigh_parms_net(p);
3219         int index = (int *) ctl->data - p->data;
3220
3221         if (!write)
3222                 return;
3223
3224         set_bit(index, p->data_state);
3225         if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3226                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3227         if (!dev) /* NULL dev means this is default value */
3228                 neigh_copy_dflt_parms(net, p, index);
3229 }
3230
3231 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3232                                            void __user *buffer,
3233                                            size_t *lenp, loff_t *ppos)
3234 {
3235         struct ctl_table tmp = *ctl;
3236         int ret;
3237
3238         tmp.extra1 = &zero;
3239         tmp.extra2 = &int_max;
3240
3241         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3242         neigh_proc_update(ctl, write);
3243         return ret;
3244 }
3245
3246 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3247                         void __user *buffer, size_t *lenp, loff_t *ppos)
3248 {
3249         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3250
3251         neigh_proc_update(ctl, write);
3252         return ret;
3253 }
3254 EXPORT_SYMBOL(neigh_proc_dointvec);
3255
3256 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3257                                 void __user *buffer,
3258                                 size_t *lenp, loff_t *ppos)
3259 {
3260         int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3261
3262         neigh_proc_update(ctl, write);
3263         return ret;
3264 }
3265 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3266
3267 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3268                                               void __user *buffer,
3269                                               size_t *lenp, loff_t *ppos)
3270 {
3271         int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3272
3273         neigh_proc_update(ctl, write);
3274         return ret;
3275 }
3276
3277 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3278                                    void __user *buffer,
3279                                    size_t *lenp, loff_t *ppos)
3280 {
3281         int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3282
3283         neigh_proc_update(ctl, write);
3284         return ret;
3285 }
3286 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3287
3288 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3289                                           void __user *buffer,
3290                                           size_t *lenp, loff_t *ppos)
3291 {
3292         int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3293
3294         neigh_proc_update(ctl, write);
3295         return ret;
3296 }
3297
3298 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3299                                           void __user *buffer,
3300                                           size_t *lenp, loff_t *ppos)
3301 {
3302         struct neigh_parms *p = ctl->extra2;
3303         int ret;
3304
3305         if (strcmp(ctl->procname, "base_reachable_time") == 0)
3306                 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3307         else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3308                 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3309         else
3310                 ret = -1;
3311
3312         if (write && ret == 0) {
3313                 /* update reachable_time as well, otherwise, the change will
3314                  * only be effective after the next time neigh_periodic_work
3315                  * decides to recompute it
3316                  */
3317                 p->reachable_time =
3318                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3319         }
3320         return ret;
3321 }
3322
3323 #define NEIGH_PARMS_DATA_OFFSET(index)  \
3324         (&((struct neigh_parms *) 0)->data[index])
3325
3326 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3327         [NEIGH_VAR_ ## attr] = { \
3328                 .procname       = name, \
3329                 .data           = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3330                 .maxlen         = sizeof(int), \
3331                 .mode           = mval, \
3332                 .proc_handler   = proc, \
3333         }
3334
3335 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3336         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3337
3338 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3339         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3340
3341 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3342         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3343
3344 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3345         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3346
3347 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3348         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3349
3350 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3351         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3352
3353 static struct neigh_sysctl_table {
3354         struct ctl_table_header *sysctl_header;
3355         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3356 } neigh_sysctl_template __read_mostly = {
3357         .neigh_vars = {
3358                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3359                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3360                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3361                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3362                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3363                 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3364                 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3365                 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3366                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3367                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3368                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3369                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3370                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3371                 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3372                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3373                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3374                 [NEIGH_VAR_GC_INTERVAL] = {
3375                         .procname       = "gc_interval",
3376                         .maxlen         = sizeof(int),
3377                         .mode           = 0644,
3378                         .proc_handler   = proc_dointvec_jiffies,
3379                 },
3380                 [NEIGH_VAR_GC_THRESH1] = {
3381                         .procname       = "gc_thresh1",
3382                         .maxlen         = sizeof(int),
3383                         .mode           = 0644,
3384                         .extra1         = &zero,
3385                         .extra2         = &int_max,
3386                         .proc_handler   = proc_dointvec_minmax,
3387                 },
3388                 [NEIGH_VAR_GC_THRESH2] = {
3389                         .procname       = "gc_thresh2",
3390                         .maxlen         = sizeof(int),
3391                         .mode           = 0644,
3392                         .extra1         = &zero,
3393                         .extra2         = &int_max,
3394                         .proc_handler   = proc_dointvec_minmax,
3395                 },
3396                 [NEIGH_VAR_GC_THRESH3] = {
3397                         .procname       = "gc_thresh3",
3398                         .maxlen         = sizeof(int),
3399                         .mode           = 0644,
3400                         .extra1         = &zero,
3401                         .extra2         = &int_max,
3402                         .proc_handler   = proc_dointvec_minmax,
3403                 },
3404                 {},
3405         },
3406 };
3407
3408 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3409                           proc_handler *handler)
3410 {
3411         int i;
3412         struct neigh_sysctl_table *t;
3413         const char *dev_name_source;
3414         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3415         char *p_name;
3416
3417         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3418         if (!t)
3419                 goto err;
3420
3421         for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3422                 t->neigh_vars[i].data += (long) p;
3423                 t->neigh_vars[i].extra1 = dev;
3424                 t->neigh_vars[i].extra2 = p;
3425         }
3426
3427         if (dev) {
3428                 dev_name_source = dev->name;
3429                 /* Terminate the table early */
3430                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3431                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3432         } else {
3433                 struct neigh_table *tbl = p->tbl;
3434                 dev_name_source = "default";
3435                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3436                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3437                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3438                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3439         }
3440
3441         if (handler) {
3442                 /* RetransTime */
3443                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3444                 /* ReachableTime */
3445                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3446                 /* RetransTime (in milliseconds)*/
3447                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3448                 /* ReachableTime (in milliseconds) */
3449                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3450         } else {
3451                 /* Those handlers will update p->reachable_time after
3452                  * base_reachable_time(_ms) is set to ensure the new timer starts being
3453                  * applied after the next neighbour update instead of waiting for
3454                  * neigh_periodic_work to update its value (can be multiple minutes)
3455                  * So any handler that replaces them should do this as well
3456                  */
3457                 /* ReachableTime */
3458                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3459                         neigh_proc_base_reachable_time;
3460                 /* ReachableTime (in milliseconds) */
3461                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3462                         neigh_proc_base_reachable_time;
3463         }
3464
3465         /* Don't export sysctls to unprivileged users */
3466         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3467                 t->neigh_vars[0].procname = NULL;
3468
3469         switch (neigh_parms_family(p)) {
3470         case AF_INET:
3471               p_name = "ipv4";
3472               break;
3473         case AF_INET6:
3474               p_name = "ipv6";
3475               break;
3476         default:
3477               BUG();
3478         }
3479
3480         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3481                 p_name, dev_name_source);
3482         t->sysctl_header =
3483                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3484         if (!t->sysctl_header)
3485                 goto free;
3486
3487         p->sysctl_table = t;
3488         return 0;
3489
3490 free:
3491         kfree(t);
3492 err:
3493         return -ENOBUFS;
3494 }
3495 EXPORT_SYMBOL(neigh_sysctl_register);
3496
3497 void neigh_sysctl_unregister(struct neigh_parms *p)
3498 {
3499         if (p->sysctl_table) {
3500                 struct neigh_sysctl_table *t = p->sysctl_table;
3501                 p->sysctl_table = NULL;
3502                 unregister_net_sysctl_table(t->sysctl_header);
3503                 kfree(t);
3504         }
3505 }
3506 EXPORT_SYMBOL(neigh_sysctl_unregister);
3507
3508 #endif  /* CONFIG_SYSCTL */
3509
3510 static int __init neigh_init(void)
3511 {
3512         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3513         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3514         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
3515
3516         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3517                       0);
3518         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3519
3520         return 0;
3521 }
3522
3523 subsys_initcall(neigh_init);