2 * Generic address resolution entity
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
15 * Harald Welte Add neighbour cache statistics like rtstat
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
28 #include <linux/sysctl.h>
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 #include <linux/inetdevice.h>
42 #include <net/addrconf.h>
46 #define neigh_dbg(level, fmt, ...) \
48 if (level <= NEIGH_DEBUG) \
49 pr_debug(fmt, ##__VA_ARGS__); \
52 #define PNEIGH_HASHMASK 0xF
54 static void neigh_timer_handler(struct timer_list *t);
55 static void __neigh_notify(struct neighbour *n, int type, int flags,
57 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
58 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
59 struct net_device *dev);
62 static const struct seq_operations neigh_stat_seq_ops;
66 Neighbour hash table buckets are protected with rwlock tbl->lock.
68 - All the scans/updates to hash buckets MUST be made under this lock.
69 - NOTHING clever should be made under this lock: no callbacks
70 to protocol backends, no attempts to send something to network.
71 It will result in deadlocks, if backend/driver wants to use neighbour
73 - If the entry requires some non-trivial actions, increase
74 its reference count and release table lock.
76 Neighbour entries are protected:
77 - with reference count.
78 - with rwlock neigh->lock
80 Reference count prevents destruction.
82 neigh->lock mainly serializes ll address data and its validity state.
83 However, the same lock is used to protect another entry fields:
87 Again, nothing clever shall be made under neigh->lock,
88 the most complicated procedure, which we allow is dev->hard_header.
89 It is supposed, that dev->hard_header is simplistic and does
90 not make callbacks to neighbour tables.
93 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
99 static void neigh_cleanup_and_release(struct neighbour *neigh)
101 if (neigh->parms->neigh_cleanup)
102 neigh->parms->neigh_cleanup(neigh);
104 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
105 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
106 neigh_release(neigh);
110 * It is random distribution in the interval (1/2)*base...(3/2)*base.
111 * It corresponds to default IPv6 settings and is not overridable,
112 * because it is really reasonable choice.
115 unsigned long neigh_rand_reach_time(unsigned long base)
117 return base ? (prandom_u32() % base) + (base >> 1) : 0;
119 EXPORT_SYMBOL(neigh_rand_reach_time);
121 static void neigh_mark_dead(struct neighbour *n)
124 if (!list_empty(&n->gc_list)) {
125 list_del_init(&n->gc_list);
126 atomic_dec(&n->tbl->gc_entries);
130 static void neigh_update_gc_list(struct neighbour *n)
132 bool on_gc_list, exempt_from_gc;
134 write_lock_bh(&n->tbl->lock);
135 write_lock(&n->lock);
137 /* remove from the gc list if new state is permanent or if neighbor
138 * is externally learned; otherwise entry should be on the gc list
140 exempt_from_gc = n->nud_state & NUD_PERMANENT ||
141 n->flags & NTF_EXT_LEARNED;
142 on_gc_list = !list_empty(&n->gc_list);
144 if (exempt_from_gc && on_gc_list) {
145 list_del_init(&n->gc_list);
146 atomic_dec(&n->tbl->gc_entries);
147 } else if (!exempt_from_gc && !on_gc_list) {
148 /* add entries to the tail; cleaning removes from the front */
149 list_add_tail(&n->gc_list, &n->tbl->gc_list);
150 atomic_inc(&n->tbl->gc_entries);
153 write_unlock(&n->lock);
154 write_unlock_bh(&n->tbl->lock);
157 static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
163 if (!(flags & NEIGH_UPDATE_F_ADMIN))
166 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
167 if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
168 if (ndm_flags & NTF_EXT_LEARNED)
169 neigh->flags |= NTF_EXT_LEARNED;
171 neigh->flags &= ~NTF_EXT_LEARNED;
179 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
180 struct neigh_table *tbl)
184 write_lock(&n->lock);
185 if (refcount_read(&n->refcnt) == 1) {
186 struct neighbour *neigh;
188 neigh = rcu_dereference_protected(n->next,
189 lockdep_is_held(&tbl->lock));
190 rcu_assign_pointer(*np, neigh);
194 write_unlock(&n->lock);
196 neigh_cleanup_and_release(n);
200 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
202 struct neigh_hash_table *nht;
203 void *pkey = ndel->primary_key;
206 struct neighbour __rcu **np;
208 nht = rcu_dereference_protected(tbl->nht,
209 lockdep_is_held(&tbl->lock));
210 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
211 hash_val = hash_val >> (32 - nht->hash_shift);
213 np = &nht->hash_buckets[hash_val];
214 while ((n = rcu_dereference_protected(*np,
215 lockdep_is_held(&tbl->lock)))) {
217 return neigh_del(n, np, tbl);
223 static int neigh_forced_gc(struct neigh_table *tbl)
225 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
226 unsigned long tref = jiffies - 5 * HZ;
227 struct neighbour *n, *tmp;
230 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
232 write_lock_bh(&tbl->lock);
234 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
235 if (refcount_read(&n->refcnt) == 1) {
238 write_lock(&n->lock);
239 if ((n->nud_state == NUD_FAILED) ||
240 time_after(tref, n->updated))
242 write_unlock(&n->lock);
244 if (remove && neigh_remove_one(n, tbl))
246 if (shrunk >= max_clean)
251 tbl->last_flush = jiffies;
253 write_unlock_bh(&tbl->lock);
258 static void neigh_add_timer(struct neighbour *n, unsigned long when)
261 if (unlikely(mod_timer(&n->timer, when))) {
262 printk("NEIGH: BUG, double timer add, state is %x\n",
268 static int neigh_del_timer(struct neighbour *n)
270 if ((n->nud_state & NUD_IN_TIMER) &&
271 del_timer(&n->timer)) {
278 static void pneigh_queue_purge(struct sk_buff_head *list)
282 while ((skb = skb_dequeue(list)) != NULL) {
288 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
292 struct neigh_hash_table *nht;
294 nht = rcu_dereference_protected(tbl->nht,
295 lockdep_is_held(&tbl->lock));
297 for (i = 0; i < (1 << nht->hash_shift); i++) {
299 struct neighbour __rcu **np = &nht->hash_buckets[i];
301 while ((n = rcu_dereference_protected(*np,
302 lockdep_is_held(&tbl->lock))) != NULL) {
303 if (dev && n->dev != dev) {
307 if (skip_perm && n->nud_state & NUD_PERMANENT) {
311 rcu_assign_pointer(*np,
312 rcu_dereference_protected(n->next,
313 lockdep_is_held(&tbl->lock)));
314 write_lock(&n->lock);
317 if (refcount_read(&n->refcnt) != 1) {
318 /* The most unpleasant situation.
319 We must destroy neighbour entry,
320 but someone still uses it.
322 The destroy will be delayed until
323 the last user releases us, but
324 we must kill timers etc. and move
327 __skb_queue_purge(&n->arp_queue);
328 n->arp_queue_len_bytes = 0;
329 n->output = neigh_blackhole;
330 if (n->nud_state & NUD_VALID)
331 n->nud_state = NUD_NOARP;
333 n->nud_state = NUD_NONE;
334 neigh_dbg(2, "neigh %p is stray\n", n);
336 write_unlock(&n->lock);
337 neigh_cleanup_and_release(n);
342 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
344 write_lock_bh(&tbl->lock);
345 neigh_flush_dev(tbl, dev, false);
346 write_unlock_bh(&tbl->lock);
348 EXPORT_SYMBOL(neigh_changeaddr);
350 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
353 write_lock_bh(&tbl->lock);
354 neigh_flush_dev(tbl, dev, skip_perm);
355 pneigh_ifdown_and_unlock(tbl, dev);
357 del_timer_sync(&tbl->proxy_timer);
358 pneigh_queue_purge(&tbl->proxy_queue);
362 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
364 __neigh_ifdown(tbl, dev, true);
367 EXPORT_SYMBOL(neigh_carrier_down);
369 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
371 __neigh_ifdown(tbl, dev, false);
374 EXPORT_SYMBOL(neigh_ifdown);
376 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
377 struct net_device *dev,
380 struct neighbour *n = NULL;
381 unsigned long now = jiffies;
387 entries = atomic_inc_return(&tbl->gc_entries) - 1;
388 if (entries >= tbl->gc_thresh3 ||
389 (entries >= tbl->gc_thresh2 &&
390 time_after(now, tbl->last_flush + 5 * HZ))) {
391 if (!neigh_forced_gc(tbl) &&
392 entries >= tbl->gc_thresh3) {
393 net_info_ratelimited("%s: neighbor table overflow!\n",
395 NEIGH_CACHE_STAT_INC(tbl, table_fulls);
401 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
405 __skb_queue_head_init(&n->arp_queue);
406 rwlock_init(&n->lock);
407 seqlock_init(&n->ha_lock);
408 n->updated = n->used = now;
409 n->nud_state = NUD_NONE;
410 n->output = neigh_blackhole;
411 seqlock_init(&n->hh.hh_lock);
412 n->parms = neigh_parms_clone(&tbl->parms);
413 timer_setup(&n->timer, neigh_timer_handler, 0);
415 NEIGH_CACHE_STAT_INC(tbl, allocs);
417 refcount_set(&n->refcnt, 1);
419 INIT_LIST_HEAD(&n->gc_list);
421 atomic_inc(&tbl->entries);
427 atomic_dec(&tbl->gc_entries);
431 static void neigh_get_hash_rnd(u32 *x)
433 *x = get_random_u32() | 1;
436 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
438 size_t size = (1 << shift) * sizeof(struct neighbour *);
439 struct neigh_hash_table *ret;
440 struct neighbour __rcu **buckets;
443 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
446 if (size <= PAGE_SIZE)
447 buckets = kzalloc(size, GFP_ATOMIC);
449 buckets = (struct neighbour __rcu **)
450 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
456 ret->hash_buckets = buckets;
457 ret->hash_shift = shift;
458 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
459 neigh_get_hash_rnd(&ret->hash_rnd[i]);
463 static void neigh_hash_free_rcu(struct rcu_head *head)
465 struct neigh_hash_table *nht = container_of(head,
466 struct neigh_hash_table,
468 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
469 struct neighbour __rcu **buckets = nht->hash_buckets;
471 if (size <= PAGE_SIZE)
474 free_pages((unsigned long)buckets, get_order(size));
478 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
479 unsigned long new_shift)
481 unsigned int i, hash;
482 struct neigh_hash_table *new_nht, *old_nht;
484 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
486 old_nht = rcu_dereference_protected(tbl->nht,
487 lockdep_is_held(&tbl->lock));
488 new_nht = neigh_hash_alloc(new_shift);
492 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
493 struct neighbour *n, *next;
495 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
496 lockdep_is_held(&tbl->lock));
499 hash = tbl->hash(n->primary_key, n->dev,
502 hash >>= (32 - new_nht->hash_shift);
503 next = rcu_dereference_protected(n->next,
504 lockdep_is_held(&tbl->lock));
506 rcu_assign_pointer(n->next,
507 rcu_dereference_protected(
508 new_nht->hash_buckets[hash],
509 lockdep_is_held(&tbl->lock)));
510 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
514 rcu_assign_pointer(tbl->nht, new_nht);
515 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
519 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
520 struct net_device *dev)
524 NEIGH_CACHE_STAT_INC(tbl, lookups);
527 n = __neigh_lookup_noref(tbl, pkey, dev);
529 if (!refcount_inc_not_zero(&n->refcnt))
531 NEIGH_CACHE_STAT_INC(tbl, hits);
534 rcu_read_unlock_bh();
537 EXPORT_SYMBOL(neigh_lookup);
539 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
543 unsigned int key_len = tbl->key_len;
545 struct neigh_hash_table *nht;
547 NEIGH_CACHE_STAT_INC(tbl, lookups);
550 nht = rcu_dereference_bh(tbl->nht);
551 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
553 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
555 n = rcu_dereference_bh(n->next)) {
556 if (!memcmp(n->primary_key, pkey, key_len) &&
557 net_eq(dev_net(n->dev), net)) {
558 if (!refcount_inc_not_zero(&n->refcnt))
560 NEIGH_CACHE_STAT_INC(tbl, hits);
565 rcu_read_unlock_bh();
568 EXPORT_SYMBOL(neigh_lookup_nodev);
570 static struct neighbour *___neigh_create(struct neigh_table *tbl,
572 struct net_device *dev,
573 bool exempt_from_gc, bool want_ref)
575 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc);
577 unsigned int key_len = tbl->key_len;
579 struct neigh_hash_table *nht;
582 rc = ERR_PTR(-ENOBUFS);
586 memcpy(n->primary_key, pkey, key_len);
590 /* Protocol specific setup. */
591 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
593 goto out_neigh_release;
596 if (dev->netdev_ops->ndo_neigh_construct) {
597 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
600 goto out_neigh_release;
604 /* Device specific setup. */
605 if (n->parms->neigh_setup &&
606 (error = n->parms->neigh_setup(n)) < 0) {
608 goto out_neigh_release;
611 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
613 write_lock_bh(&tbl->lock);
614 nht = rcu_dereference_protected(tbl->nht,
615 lockdep_is_held(&tbl->lock));
617 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
618 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
620 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
622 if (n->parms->dead) {
623 rc = ERR_PTR(-EINVAL);
627 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
628 lockdep_is_held(&tbl->lock));
630 n1 = rcu_dereference_protected(n1->next,
631 lockdep_is_held(&tbl->lock))) {
632 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
642 list_add_tail(&n->gc_list, &n->tbl->gc_list);
646 rcu_assign_pointer(n->next,
647 rcu_dereference_protected(nht->hash_buckets[hash_val],
648 lockdep_is_held(&tbl->lock)));
649 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
650 write_unlock_bh(&tbl->lock);
651 neigh_dbg(2, "neigh %p is created\n", n);
656 write_unlock_bh(&tbl->lock);
662 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
663 struct net_device *dev, bool want_ref)
665 return ___neigh_create(tbl, pkey, dev, false, want_ref);
667 EXPORT_SYMBOL(__neigh_create);
669 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
671 u32 hash_val = *(u32 *)(pkey + key_len - 4);
672 hash_val ^= (hash_val >> 16);
673 hash_val ^= hash_val >> 8;
674 hash_val ^= hash_val >> 4;
675 hash_val &= PNEIGH_HASHMASK;
679 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
682 unsigned int key_len,
683 struct net_device *dev)
686 if (!memcmp(n->key, pkey, key_len) &&
687 net_eq(pneigh_net(n), net) &&
688 (n->dev == dev || !n->dev))
695 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
696 struct net *net, const void *pkey, struct net_device *dev)
698 unsigned int key_len = tbl->key_len;
699 u32 hash_val = pneigh_hash(pkey, key_len);
701 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
702 net, pkey, key_len, dev);
704 EXPORT_SYMBOL_GPL(__pneigh_lookup);
706 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
707 struct net *net, const void *pkey,
708 struct net_device *dev, int creat)
710 struct pneigh_entry *n;
711 unsigned int key_len = tbl->key_len;
712 u32 hash_val = pneigh_hash(pkey, key_len);
714 read_lock_bh(&tbl->lock);
715 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
716 net, pkey, key_len, dev);
717 read_unlock_bh(&tbl->lock);
724 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
728 write_pnet(&n->net, net);
729 memcpy(n->key, pkey, key_len);
734 if (tbl->pconstructor && tbl->pconstructor(n)) {
742 write_lock_bh(&tbl->lock);
743 n->next = tbl->phash_buckets[hash_val];
744 tbl->phash_buckets[hash_val] = n;
745 write_unlock_bh(&tbl->lock);
749 EXPORT_SYMBOL(pneigh_lookup);
752 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
753 struct net_device *dev)
755 struct pneigh_entry *n, **np;
756 unsigned int key_len = tbl->key_len;
757 u32 hash_val = pneigh_hash(pkey, key_len);
759 write_lock_bh(&tbl->lock);
760 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
762 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
763 net_eq(pneigh_net(n), net)) {
765 write_unlock_bh(&tbl->lock);
766 if (tbl->pdestructor)
774 write_unlock_bh(&tbl->lock);
778 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
779 struct net_device *dev)
781 struct pneigh_entry *n, **np, *freelist = NULL;
784 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
785 np = &tbl->phash_buckets[h];
786 while ((n = *np) != NULL) {
787 if (!dev || n->dev == dev) {
796 write_unlock_bh(&tbl->lock);
797 while ((n = freelist)) {
800 if (tbl->pdestructor)
809 static void neigh_parms_destroy(struct neigh_parms *parms);
811 static inline void neigh_parms_put(struct neigh_parms *parms)
813 if (refcount_dec_and_test(&parms->refcnt))
814 neigh_parms_destroy(parms);
818 * neighbour must already be out of the table;
821 void neigh_destroy(struct neighbour *neigh)
823 struct net_device *dev = neigh->dev;
825 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
828 pr_warn("Destroying alive neighbour %p\n", neigh);
833 if (neigh_del_timer(neigh))
834 pr_warn("Impossible event\n");
836 write_lock_bh(&neigh->lock);
837 __skb_queue_purge(&neigh->arp_queue);
838 write_unlock_bh(&neigh->lock);
839 neigh->arp_queue_len_bytes = 0;
841 if (dev->netdev_ops->ndo_neigh_destroy)
842 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
845 neigh_parms_put(neigh->parms);
847 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
849 atomic_dec(&neigh->tbl->entries);
850 kfree_rcu(neigh, rcu);
852 EXPORT_SYMBOL(neigh_destroy);
854 /* Neighbour state is suspicious;
857 Called with write_locked neigh.
859 static void neigh_suspect(struct neighbour *neigh)
861 neigh_dbg(2, "neigh %p is suspected\n", neigh);
863 neigh->output = neigh->ops->output;
866 /* Neighbour state is OK;
869 Called with write_locked neigh.
871 static void neigh_connect(struct neighbour *neigh)
873 neigh_dbg(2, "neigh %p is connected\n", neigh);
875 neigh->output = neigh->ops->connected_output;
878 static void neigh_periodic_work(struct work_struct *work)
880 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
882 struct neighbour __rcu **np;
884 struct neigh_hash_table *nht;
886 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
888 write_lock_bh(&tbl->lock);
889 nht = rcu_dereference_protected(tbl->nht,
890 lockdep_is_held(&tbl->lock));
893 * periodically recompute ReachableTime from random function
896 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
897 struct neigh_parms *p;
898 tbl->last_rand = jiffies;
899 list_for_each_entry(p, &tbl->parms_list, list)
901 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
904 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
907 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
908 np = &nht->hash_buckets[i];
910 while ((n = rcu_dereference_protected(*np,
911 lockdep_is_held(&tbl->lock))) != NULL) {
914 write_lock(&n->lock);
916 state = n->nud_state;
917 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
918 (n->flags & NTF_EXT_LEARNED)) {
919 write_unlock(&n->lock);
923 if (time_before(n->used, n->confirmed))
924 n->used = n->confirmed;
926 if (refcount_read(&n->refcnt) == 1 &&
927 (state == NUD_FAILED ||
928 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
931 write_unlock(&n->lock);
932 neigh_cleanup_and_release(n);
935 write_unlock(&n->lock);
941 * It's fine to release lock here, even if hash table
942 * grows while we are preempted.
944 write_unlock_bh(&tbl->lock);
946 write_lock_bh(&tbl->lock);
947 nht = rcu_dereference_protected(tbl->nht,
948 lockdep_is_held(&tbl->lock));
951 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
952 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
953 * BASE_REACHABLE_TIME.
955 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
956 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
957 write_unlock_bh(&tbl->lock);
960 static __inline__ int neigh_max_probes(struct neighbour *n)
962 struct neigh_parms *p = n->parms;
963 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
964 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
965 NEIGH_VAR(p, MCAST_PROBES));
968 static void neigh_invalidate(struct neighbour *neigh)
969 __releases(neigh->lock)
970 __acquires(neigh->lock)
974 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
975 neigh_dbg(2, "neigh %p is failed\n", neigh);
976 neigh->updated = jiffies;
978 /* It is very thin place. report_unreachable is very complicated
979 routine. Particularly, it can hit the same neighbour entry!
981 So that, we try to be accurate and avoid dead loop. --ANK
983 while (neigh->nud_state == NUD_FAILED &&
984 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
985 write_unlock(&neigh->lock);
986 neigh->ops->error_report(neigh, skb);
987 write_lock(&neigh->lock);
989 __skb_queue_purge(&neigh->arp_queue);
990 neigh->arp_queue_len_bytes = 0;
993 static void neigh_probe(struct neighbour *neigh)
994 __releases(neigh->lock)
996 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
997 /* keep skb alive even if arp_queue overflows */
999 skb = skb_clone(skb, GFP_ATOMIC);
1000 write_unlock(&neigh->lock);
1001 if (neigh->ops->solicit)
1002 neigh->ops->solicit(neigh, skb);
1003 atomic_inc(&neigh->probes);
1007 /* Called when a timer expires for a neighbour entry. */
1009 static void neigh_timer_handler(struct timer_list *t)
1011 unsigned long now, next;
1012 struct neighbour *neigh = from_timer(neigh, t, timer);
1016 write_lock(&neigh->lock);
1018 state = neigh->nud_state;
1022 if (!(state & NUD_IN_TIMER))
1025 if (state & NUD_REACHABLE) {
1026 if (time_before_eq(now,
1027 neigh->confirmed + neigh->parms->reachable_time)) {
1028 neigh_dbg(2, "neigh %p is still alive\n", neigh);
1029 next = neigh->confirmed + neigh->parms->reachable_time;
1030 } else if (time_before_eq(now,
1032 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1033 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1034 neigh->nud_state = NUD_DELAY;
1035 neigh->updated = jiffies;
1036 neigh_suspect(neigh);
1037 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1039 neigh_dbg(2, "neigh %p is suspected\n", neigh);
1040 neigh->nud_state = NUD_STALE;
1041 neigh->updated = jiffies;
1042 neigh_suspect(neigh);
1045 } else if (state & NUD_DELAY) {
1046 if (time_before_eq(now,
1048 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1049 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1050 neigh->nud_state = NUD_REACHABLE;
1051 neigh->updated = jiffies;
1052 neigh_connect(neigh);
1054 next = neigh->confirmed + neigh->parms->reachable_time;
1056 neigh_dbg(2, "neigh %p is probed\n", neigh);
1057 neigh->nud_state = NUD_PROBE;
1058 neigh->updated = jiffies;
1059 atomic_set(&neigh->probes, 0);
1061 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1064 /* NUD_PROBE|NUD_INCOMPLETE */
1065 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1068 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1069 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1070 neigh->nud_state = NUD_FAILED;
1072 neigh_invalidate(neigh);
1076 if (neigh->nud_state & NUD_IN_TIMER) {
1077 if (time_before(next, jiffies + HZ/2))
1078 next = jiffies + HZ/2;
1079 if (!mod_timer(&neigh->timer, next))
1082 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1086 write_unlock(&neigh->lock);
1090 neigh_update_notify(neigh, 0);
1092 neigh_release(neigh);
1095 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1098 bool immediate_probe = false;
1100 write_lock_bh(&neigh->lock);
1103 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1108 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1109 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1110 NEIGH_VAR(neigh->parms, APP_PROBES)) {
1111 unsigned long next, now = jiffies;
1113 atomic_set(&neigh->probes,
1114 NEIGH_VAR(neigh->parms, UCAST_PROBES));
1115 neigh->nud_state = NUD_INCOMPLETE;
1116 neigh->updated = now;
1117 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1119 neigh_add_timer(neigh, next);
1120 immediate_probe = true;
1122 neigh->nud_state = NUD_FAILED;
1123 neigh->updated = jiffies;
1124 write_unlock_bh(&neigh->lock);
1129 } else if (neigh->nud_state & NUD_STALE) {
1130 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1131 neigh->nud_state = NUD_DELAY;
1132 neigh->updated = jiffies;
1133 neigh_add_timer(neigh, jiffies +
1134 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1137 if (neigh->nud_state == NUD_INCOMPLETE) {
1139 while (neigh->arp_queue_len_bytes + skb->truesize >
1140 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1141 struct sk_buff *buff;
1143 buff = __skb_dequeue(&neigh->arp_queue);
1146 neigh->arp_queue_len_bytes -= buff->truesize;
1148 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1151 __skb_queue_tail(&neigh->arp_queue, skb);
1152 neigh->arp_queue_len_bytes += skb->truesize;
1157 if (immediate_probe)
1160 write_unlock(&neigh->lock);
1165 if (neigh->nud_state & NUD_STALE)
1167 write_unlock_bh(&neigh->lock);
1171 EXPORT_SYMBOL(__neigh_event_send);
1173 static void neigh_update_hhs(struct neighbour *neigh)
1175 struct hh_cache *hh;
1176 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1179 if (neigh->dev->header_ops)
1180 update = neigh->dev->header_ops->cache_update;
1185 write_seqlock_bh(&hh->hh_lock);
1186 update(hh, neigh->dev, neigh->ha);
1187 write_sequnlock_bh(&hh->hh_lock);
1194 /* Generic update routine.
1195 -- lladdr is new lladdr or NULL, if it is not supplied.
1196 -- new is new state.
1198 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1200 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1201 lladdr instead of overriding it
1203 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1205 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1207 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1210 Caller MUST hold reference count on the entry.
1213 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1214 u8 new, u32 flags, u32 nlmsg_pid,
1215 struct netlink_ext_ack *extack)
1217 bool ext_learn_change = false;
1221 struct net_device *dev;
1222 int update_isrouter = 0;
1224 write_lock_bh(&neigh->lock);
1227 old = neigh->nud_state;
1230 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1231 (old & (NUD_NOARP | NUD_PERMANENT)))
1234 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1238 ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify);
1240 if (!(new & NUD_VALID)) {
1241 neigh_del_timer(neigh);
1242 if (old & NUD_CONNECTED)
1243 neigh_suspect(neigh);
1244 neigh->nud_state = new;
1246 notify = old & NUD_VALID;
1247 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1248 (new & NUD_FAILED)) {
1249 neigh_invalidate(neigh);
1255 /* Compare new lladdr with cached one */
1256 if (!dev->addr_len) {
1257 /* First case: device needs no address. */
1259 } else if (lladdr) {
1260 /* The second case: if something is already cached
1261 and a new address is proposed:
1263 - if they are different, check override flag
1265 if ((old & NUD_VALID) &&
1266 !memcmp(lladdr, neigh->ha, dev->addr_len))
1269 /* No address is supplied; if we know something,
1270 use it, otherwise discard the request.
1273 if (!(old & NUD_VALID)) {
1274 NL_SET_ERR_MSG(extack, "No link layer address given");
1280 /* Update confirmed timestamp for neighbour entry after we
1281 * received ARP packet even if it doesn't change IP to MAC binding.
1283 if (new & NUD_CONNECTED)
1284 neigh->confirmed = jiffies;
1286 /* If entry was valid and address is not changed,
1287 do not change entry state, if new one is STALE.
1290 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1291 if (old & NUD_VALID) {
1292 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1293 update_isrouter = 0;
1294 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1295 (old & NUD_CONNECTED)) {
1301 if (lladdr == neigh->ha && new == NUD_STALE &&
1302 !(flags & NEIGH_UPDATE_F_ADMIN))
1307 /* Update timestamp only once we know we will make a change to the
1308 * neighbour entry. Otherwise we risk to move the locktime window with
1309 * noop updates and ignore relevant ARP updates.
1311 if (new != old || lladdr != neigh->ha)
1312 neigh->updated = jiffies;
1315 neigh_del_timer(neigh);
1316 if (new & NUD_PROBE)
1317 atomic_set(&neigh->probes, 0);
1318 if (new & NUD_IN_TIMER)
1319 neigh_add_timer(neigh, (jiffies +
1320 ((new & NUD_REACHABLE) ?
1321 neigh->parms->reachable_time :
1323 neigh->nud_state = new;
1327 if (lladdr != neigh->ha) {
1328 write_seqlock(&neigh->ha_lock);
1329 memcpy(&neigh->ha, lladdr, dev->addr_len);
1330 write_sequnlock(&neigh->ha_lock);
1331 neigh_update_hhs(neigh);
1332 if (!(new & NUD_CONNECTED))
1333 neigh->confirmed = jiffies -
1334 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1339 if (new & NUD_CONNECTED)
1340 neigh_connect(neigh);
1342 neigh_suspect(neigh);
1343 if (!(old & NUD_VALID)) {
1344 struct sk_buff *skb;
1346 /* Again: avoid dead loop if something went wrong */
1348 while (neigh->nud_state & NUD_VALID &&
1349 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1350 struct dst_entry *dst = skb_dst(skb);
1351 struct neighbour *n2, *n1 = neigh;
1352 write_unlock_bh(&neigh->lock);
1356 /* Why not just use 'neigh' as-is? The problem is that
1357 * things such as shaper, eql, and sch_teql can end up
1358 * using alternative, different, neigh objects to output
1359 * the packet in the output path. So what we need to do
1360 * here is re-lookup the top-level neigh in the path so
1361 * we can reinject the packet there.
1365 n2 = dst_neigh_lookup_skb(dst, skb);
1369 n1->output(n1, skb);
1374 write_lock_bh(&neigh->lock);
1376 __skb_queue_purge(&neigh->arp_queue);
1377 neigh->arp_queue_len_bytes = 0;
1380 if (update_isrouter)
1381 neigh_update_is_router(neigh, flags, ¬ify);
1382 write_unlock_bh(&neigh->lock);
1384 if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
1385 neigh_update_gc_list(neigh);
1388 neigh_update_notify(neigh, nlmsg_pid);
1393 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1394 u32 flags, u32 nlmsg_pid)
1396 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1398 EXPORT_SYMBOL(neigh_update);
1400 /* Update the neigh to listen temporarily for probe responses, even if it is
1401 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1403 void __neigh_set_probe_once(struct neighbour *neigh)
1407 neigh->updated = jiffies;
1408 if (!(neigh->nud_state & NUD_FAILED))
1410 neigh->nud_state = NUD_INCOMPLETE;
1411 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1412 neigh_add_timer(neigh,
1413 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1415 EXPORT_SYMBOL(__neigh_set_probe_once);
1417 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1418 u8 *lladdr, void *saddr,
1419 struct net_device *dev)
1421 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1422 lladdr || !dev->addr_len);
1424 neigh_update(neigh, lladdr, NUD_STALE,
1425 NEIGH_UPDATE_F_OVERRIDE, 0);
1428 EXPORT_SYMBOL(neigh_event_ns);
1430 /* called with read_lock_bh(&n->lock); */
1431 static void neigh_hh_init(struct neighbour *n)
1433 struct net_device *dev = n->dev;
1434 __be16 prot = n->tbl->protocol;
1435 struct hh_cache *hh = &n->hh;
1437 write_lock_bh(&n->lock);
1439 /* Only one thread can come in here and initialize the
1443 dev->header_ops->cache(n, hh, prot);
1445 write_unlock_bh(&n->lock);
1448 /* Slow and careful. */
1450 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1454 if (!neigh_event_send(neigh, skb)) {
1456 struct net_device *dev = neigh->dev;
1459 if (dev->header_ops->cache && !neigh->hh.hh_len)
1460 neigh_hh_init(neigh);
1463 __skb_pull(skb, skb_network_offset(skb));
1464 seq = read_seqbegin(&neigh->ha_lock);
1465 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1466 neigh->ha, NULL, skb->len);
1467 } while (read_seqretry(&neigh->ha_lock, seq));
1470 rc = dev_queue_xmit(skb);
1481 EXPORT_SYMBOL(neigh_resolve_output);
1483 /* As fast as possible without hh cache */
1485 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1487 struct net_device *dev = neigh->dev;
1492 __skb_pull(skb, skb_network_offset(skb));
1493 seq = read_seqbegin(&neigh->ha_lock);
1494 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1495 neigh->ha, NULL, skb->len);
1496 } while (read_seqretry(&neigh->ha_lock, seq));
1499 err = dev_queue_xmit(skb);
1506 EXPORT_SYMBOL(neigh_connected_output);
1508 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1510 return dev_queue_xmit(skb);
1512 EXPORT_SYMBOL(neigh_direct_output);
1514 static void neigh_proxy_process(struct timer_list *t)
1516 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1517 long sched_next = 0;
1518 unsigned long now = jiffies;
1519 struct sk_buff *skb, *n;
1521 spin_lock(&tbl->proxy_queue.lock);
1523 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1524 long tdif = NEIGH_CB(skb)->sched_next - now;
1527 struct net_device *dev = skb->dev;
1529 __skb_unlink(skb, &tbl->proxy_queue);
1530 if (tbl->proxy_redo && netif_running(dev)) {
1532 tbl->proxy_redo(skb);
1539 } else if (!sched_next || tdif < sched_next)
1542 del_timer(&tbl->proxy_timer);
1544 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1545 spin_unlock(&tbl->proxy_queue.lock);
1548 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1549 struct sk_buff *skb)
1551 unsigned long now = jiffies;
1553 unsigned long sched_next = now + (prandom_u32() %
1554 NEIGH_VAR(p, PROXY_DELAY));
1556 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1561 NEIGH_CB(skb)->sched_next = sched_next;
1562 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1564 spin_lock(&tbl->proxy_queue.lock);
1565 if (del_timer(&tbl->proxy_timer)) {
1566 if (time_before(tbl->proxy_timer.expires, sched_next))
1567 sched_next = tbl->proxy_timer.expires;
1571 __skb_queue_tail(&tbl->proxy_queue, skb);
1572 mod_timer(&tbl->proxy_timer, sched_next);
1573 spin_unlock(&tbl->proxy_queue.lock);
1575 EXPORT_SYMBOL(pneigh_enqueue);
1577 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1578 struct net *net, int ifindex)
1580 struct neigh_parms *p;
1582 list_for_each_entry(p, &tbl->parms_list, list) {
1583 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1584 (!p->dev && !ifindex && net_eq(net, &init_net)))
1591 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1592 struct neigh_table *tbl)
1594 struct neigh_parms *p;
1595 struct net *net = dev_net(dev);
1596 const struct net_device_ops *ops = dev->netdev_ops;
1598 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1601 refcount_set(&p->refcnt, 1);
1603 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1606 write_pnet(&p->net, net);
1607 p->sysctl_table = NULL;
1609 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1615 write_lock_bh(&tbl->lock);
1616 list_add(&p->list, &tbl->parms.list);
1617 write_unlock_bh(&tbl->lock);
1619 neigh_parms_data_state_cleanall(p);
1623 EXPORT_SYMBOL(neigh_parms_alloc);
1625 static void neigh_rcu_free_parms(struct rcu_head *head)
1627 struct neigh_parms *parms =
1628 container_of(head, struct neigh_parms, rcu_head);
1630 neigh_parms_put(parms);
1633 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1635 if (!parms || parms == &tbl->parms)
1637 write_lock_bh(&tbl->lock);
1638 list_del(&parms->list);
1640 write_unlock_bh(&tbl->lock);
1642 dev_put(parms->dev);
1643 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1645 EXPORT_SYMBOL(neigh_parms_release);
1647 static void neigh_parms_destroy(struct neigh_parms *parms)
1652 static struct lock_class_key neigh_table_proxy_queue_class;
1654 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1656 void neigh_table_init(int index, struct neigh_table *tbl)
1658 unsigned long now = jiffies;
1659 unsigned long phsize;
1661 INIT_LIST_HEAD(&tbl->parms_list);
1662 INIT_LIST_HEAD(&tbl->gc_list);
1663 list_add(&tbl->parms.list, &tbl->parms_list);
1664 write_pnet(&tbl->parms.net, &init_net);
1665 refcount_set(&tbl->parms.refcnt, 1);
1666 tbl->parms.reachable_time =
1667 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1669 tbl->stats = alloc_percpu(struct neigh_statistics);
1671 panic("cannot create neighbour cache statistics");
1673 #ifdef CONFIG_PROC_FS
1674 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1675 &neigh_stat_seq_ops, tbl))
1676 panic("cannot create neighbour proc dir entry");
1679 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1681 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1682 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1684 if (!tbl->nht || !tbl->phash_buckets)
1685 panic("cannot allocate neighbour cache hashes");
1687 if (!tbl->entry_size)
1688 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1689 tbl->key_len, NEIGH_PRIV_ALIGN);
1691 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1693 rwlock_init(&tbl->lock);
1694 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1695 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1696 tbl->parms.reachable_time);
1697 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1698 skb_queue_head_init_class(&tbl->proxy_queue,
1699 &neigh_table_proxy_queue_class);
1701 tbl->last_flush = now;
1702 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1704 neigh_tables[index] = tbl;
1706 EXPORT_SYMBOL(neigh_table_init);
1708 int neigh_table_clear(int index, struct neigh_table *tbl)
1710 neigh_tables[index] = NULL;
1711 /* It is not clean... Fix it to unload IPv6 module safely */
1712 cancel_delayed_work_sync(&tbl->gc_work);
1713 del_timer_sync(&tbl->proxy_timer);
1714 pneigh_queue_purge(&tbl->proxy_queue);
1715 neigh_ifdown(tbl, NULL);
1716 if (atomic_read(&tbl->entries))
1717 pr_crit("neighbour leakage\n");
1719 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1720 neigh_hash_free_rcu);
1723 kfree(tbl->phash_buckets);
1724 tbl->phash_buckets = NULL;
1726 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1728 free_percpu(tbl->stats);
1733 EXPORT_SYMBOL(neigh_table_clear);
1735 static struct neigh_table *neigh_find_table(int family)
1737 struct neigh_table *tbl = NULL;
1741 tbl = neigh_tables[NEIGH_ARP_TABLE];
1744 tbl = neigh_tables[NEIGH_ND_TABLE];
1747 tbl = neigh_tables[NEIGH_DN_TABLE];
1754 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1755 struct netlink_ext_ack *extack)
1757 struct net *net = sock_net(skb->sk);
1759 struct nlattr *dst_attr;
1760 struct neigh_table *tbl;
1761 struct neighbour *neigh;
1762 struct net_device *dev = NULL;
1766 if (nlmsg_len(nlh) < sizeof(*ndm))
1769 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1771 NL_SET_ERR_MSG(extack, "Network address not specified");
1775 ndm = nlmsg_data(nlh);
1776 if (ndm->ndm_ifindex) {
1777 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1784 tbl = neigh_find_table(ndm->ndm_family);
1786 return -EAFNOSUPPORT;
1788 if (nla_len(dst_attr) < (int)tbl->key_len) {
1789 NL_SET_ERR_MSG(extack, "Invalid network address");
1793 if (ndm->ndm_flags & NTF_PROXY) {
1794 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1801 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1802 if (neigh == NULL) {
1807 err = __neigh_update(neigh, NULL, NUD_FAILED,
1808 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1809 NETLINK_CB(skb).portid, extack);
1810 write_lock_bh(&tbl->lock);
1811 neigh_release(neigh);
1812 neigh_remove_one(neigh, tbl);
1813 write_unlock_bh(&tbl->lock);
1819 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1820 struct netlink_ext_ack *extack)
1822 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1823 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1824 struct net *net = sock_net(skb->sk);
1826 struct nlattr *tb[NDA_MAX+1];
1827 struct neigh_table *tbl;
1828 struct net_device *dev = NULL;
1829 struct neighbour *neigh;
1835 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
1841 NL_SET_ERR_MSG(extack, "Network address not specified");
1845 ndm = nlmsg_data(nlh);
1846 if (ndm->ndm_ifindex) {
1847 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1853 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
1854 NL_SET_ERR_MSG(extack, "Invalid link address");
1859 tbl = neigh_find_table(ndm->ndm_family);
1861 return -EAFNOSUPPORT;
1863 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
1864 NL_SET_ERR_MSG(extack, "Invalid network address");
1868 dst = nla_data(tb[NDA_DST]);
1869 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1871 if (tb[NDA_PROTOCOL]) {
1872 if (nla_len(tb[NDA_PROTOCOL]) != sizeof(u8)) {
1873 NL_SET_ERR_MSG(extack, "Invalid protocol attribute");
1876 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
1879 if (ndm->ndm_flags & NTF_PROXY) {
1880 struct pneigh_entry *pn;
1883 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1885 pn->flags = ndm->ndm_flags;
1887 pn->protocol = protocol;
1894 NL_SET_ERR_MSG(extack, "Device not specified");
1898 neigh = neigh_lookup(tbl, dst, dev);
1899 if (neigh == NULL) {
1900 bool exempt_from_gc;
1902 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1907 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
1908 ndm->ndm_flags & NTF_EXT_LEARNED;
1909 neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true);
1910 if (IS_ERR(neigh)) {
1911 err = PTR_ERR(neigh);
1915 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1917 neigh_release(neigh);
1921 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1922 flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
1923 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1926 if (ndm->ndm_flags & NTF_EXT_LEARNED)
1927 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1929 if (ndm->ndm_flags & NTF_ROUTER)
1930 flags |= NEIGH_UPDATE_F_ISROUTER;
1932 if (ndm->ndm_flags & NTF_USE) {
1933 neigh_event_send(neigh, NULL);
1936 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1937 NETLINK_CB(skb).portid, extack);
1940 neigh->protocol = protocol;
1942 neigh_release(neigh);
1948 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1950 struct nlattr *nest;
1952 nest = nla_nest_start(skb, NDTA_PARMS);
1957 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1958 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1959 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1960 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1961 /* approximative value for deprecated QUEUE_LEN (in packets) */
1962 nla_put_u32(skb, NDTPA_QUEUE_LEN,
1963 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1964 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1965 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1966 nla_put_u32(skb, NDTPA_UCAST_PROBES,
1967 NEIGH_VAR(parms, UCAST_PROBES)) ||
1968 nla_put_u32(skb, NDTPA_MCAST_PROBES,
1969 NEIGH_VAR(parms, MCAST_PROBES)) ||
1970 nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1971 NEIGH_VAR(parms, MCAST_REPROBES)) ||
1972 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1974 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1975 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1976 nla_put_msecs(skb, NDTPA_GC_STALETIME,
1977 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1978 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1979 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1980 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1981 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1982 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1983 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1984 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1985 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
1986 nla_put_msecs(skb, NDTPA_LOCKTIME,
1987 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1988 goto nla_put_failure;
1989 return nla_nest_end(skb, nest);
1992 nla_nest_cancel(skb, nest);
1996 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1997 u32 pid, u32 seq, int type, int flags)
1999 struct nlmsghdr *nlh;
2000 struct ndtmsg *ndtmsg;
2002 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2006 ndtmsg = nlmsg_data(nlh);
2008 read_lock_bh(&tbl->lock);
2009 ndtmsg->ndtm_family = tbl->family;
2010 ndtmsg->ndtm_pad1 = 0;
2011 ndtmsg->ndtm_pad2 = 0;
2013 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2014 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
2015 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
2016 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
2017 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
2018 goto nla_put_failure;
2020 unsigned long now = jiffies;
2021 unsigned int flush_delta = now - tbl->last_flush;
2022 unsigned int rand_delta = now - tbl->last_rand;
2023 struct neigh_hash_table *nht;
2024 struct ndt_config ndc = {
2025 .ndtc_key_len = tbl->key_len,
2026 .ndtc_entry_size = tbl->entry_size,
2027 .ndtc_entries = atomic_read(&tbl->entries),
2028 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
2029 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
2030 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
2034 nht = rcu_dereference_bh(tbl->nht);
2035 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2036 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2037 rcu_read_unlock_bh();
2039 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2040 goto nla_put_failure;
2045 struct ndt_stats ndst;
2047 memset(&ndst, 0, sizeof(ndst));
2049 for_each_possible_cpu(cpu) {
2050 struct neigh_statistics *st;
2052 st = per_cpu_ptr(tbl->stats, cpu);
2053 ndst.ndts_allocs += st->allocs;
2054 ndst.ndts_destroys += st->destroys;
2055 ndst.ndts_hash_grows += st->hash_grows;
2056 ndst.ndts_res_failed += st->res_failed;
2057 ndst.ndts_lookups += st->lookups;
2058 ndst.ndts_hits += st->hits;
2059 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
2060 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
2061 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
2062 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
2063 ndst.ndts_table_fulls += st->table_fulls;
2066 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2068 goto nla_put_failure;
2071 BUG_ON(tbl->parms.dev);
2072 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2073 goto nla_put_failure;
2075 read_unlock_bh(&tbl->lock);
2076 nlmsg_end(skb, nlh);
2080 read_unlock_bh(&tbl->lock);
2081 nlmsg_cancel(skb, nlh);
2085 static int neightbl_fill_param_info(struct sk_buff *skb,
2086 struct neigh_table *tbl,
2087 struct neigh_parms *parms,
2088 u32 pid, u32 seq, int type,
2091 struct ndtmsg *ndtmsg;
2092 struct nlmsghdr *nlh;
2094 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2098 ndtmsg = nlmsg_data(nlh);
2100 read_lock_bh(&tbl->lock);
2101 ndtmsg->ndtm_family = tbl->family;
2102 ndtmsg->ndtm_pad1 = 0;
2103 ndtmsg->ndtm_pad2 = 0;
2105 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2106 neightbl_fill_parms(skb, parms) < 0)
2109 read_unlock_bh(&tbl->lock);
2110 nlmsg_end(skb, nlh);
2113 read_unlock_bh(&tbl->lock);
2114 nlmsg_cancel(skb, nlh);
2118 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2119 [NDTA_NAME] = { .type = NLA_STRING },
2120 [NDTA_THRESH1] = { .type = NLA_U32 },
2121 [NDTA_THRESH2] = { .type = NLA_U32 },
2122 [NDTA_THRESH3] = { .type = NLA_U32 },
2123 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
2124 [NDTA_PARMS] = { .type = NLA_NESTED },
2127 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2128 [NDTPA_IFINDEX] = { .type = NLA_U32 },
2129 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
2130 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
2131 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
2132 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
2133 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
2134 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
2135 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
2136 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
2137 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
2138 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
2139 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
2140 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
2141 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
2144 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2145 struct netlink_ext_ack *extack)
2147 struct net *net = sock_net(skb->sk);
2148 struct neigh_table *tbl;
2149 struct ndtmsg *ndtmsg;
2150 struct nlattr *tb[NDTA_MAX+1];
2154 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2155 nl_neightbl_policy, extack);
2159 if (tb[NDTA_NAME] == NULL) {
2164 ndtmsg = nlmsg_data(nlh);
2166 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2167 tbl = neigh_tables[tidx];
2170 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2172 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2182 * We acquire tbl->lock to be nice to the periodic timers and
2183 * make sure they always see a consistent set of values.
2185 write_lock_bh(&tbl->lock);
2187 if (tb[NDTA_PARMS]) {
2188 struct nlattr *tbp[NDTPA_MAX+1];
2189 struct neigh_parms *p;
2192 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2193 nl_ntbl_parm_policy, extack);
2195 goto errout_tbl_lock;
2197 if (tbp[NDTPA_IFINDEX])
2198 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2200 p = lookup_neigh_parms(tbl, net, ifindex);
2203 goto errout_tbl_lock;
2206 for (i = 1; i <= NDTPA_MAX; i++) {
2211 case NDTPA_QUEUE_LEN:
2212 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2213 nla_get_u32(tbp[i]) *
2214 SKB_TRUESIZE(ETH_FRAME_LEN));
2216 case NDTPA_QUEUE_LENBYTES:
2217 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2218 nla_get_u32(tbp[i]));
2220 case NDTPA_PROXY_QLEN:
2221 NEIGH_VAR_SET(p, PROXY_QLEN,
2222 nla_get_u32(tbp[i]));
2224 case NDTPA_APP_PROBES:
2225 NEIGH_VAR_SET(p, APP_PROBES,
2226 nla_get_u32(tbp[i]));
2228 case NDTPA_UCAST_PROBES:
2229 NEIGH_VAR_SET(p, UCAST_PROBES,
2230 nla_get_u32(tbp[i]));
2232 case NDTPA_MCAST_PROBES:
2233 NEIGH_VAR_SET(p, MCAST_PROBES,
2234 nla_get_u32(tbp[i]));
2236 case NDTPA_MCAST_REPROBES:
2237 NEIGH_VAR_SET(p, MCAST_REPROBES,
2238 nla_get_u32(tbp[i]));
2240 case NDTPA_BASE_REACHABLE_TIME:
2241 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2242 nla_get_msecs(tbp[i]));
2243 /* update reachable_time as well, otherwise, the change will
2244 * only be effective after the next time neigh_periodic_work
2245 * decides to recompute it (can be multiple minutes)
2248 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2250 case NDTPA_GC_STALETIME:
2251 NEIGH_VAR_SET(p, GC_STALETIME,
2252 nla_get_msecs(tbp[i]));
2254 case NDTPA_DELAY_PROBE_TIME:
2255 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2256 nla_get_msecs(tbp[i]));
2257 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2259 case NDTPA_RETRANS_TIME:
2260 NEIGH_VAR_SET(p, RETRANS_TIME,
2261 nla_get_msecs(tbp[i]));
2263 case NDTPA_ANYCAST_DELAY:
2264 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2265 nla_get_msecs(tbp[i]));
2267 case NDTPA_PROXY_DELAY:
2268 NEIGH_VAR_SET(p, PROXY_DELAY,
2269 nla_get_msecs(tbp[i]));
2271 case NDTPA_LOCKTIME:
2272 NEIGH_VAR_SET(p, LOCKTIME,
2273 nla_get_msecs(tbp[i]));
2280 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2281 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2282 !net_eq(net, &init_net))
2283 goto errout_tbl_lock;
2285 if (tb[NDTA_THRESH1])
2286 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2288 if (tb[NDTA_THRESH2])
2289 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2291 if (tb[NDTA_THRESH3])
2292 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2294 if (tb[NDTA_GC_INTERVAL])
2295 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2300 write_unlock_bh(&tbl->lock);
2305 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2306 struct netlink_ext_ack *extack)
2308 struct ndtmsg *ndtm;
2310 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2311 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2315 ndtm = nlmsg_data(nlh);
2316 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
2317 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2321 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2322 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2329 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2331 const struct nlmsghdr *nlh = cb->nlh;
2332 struct net *net = sock_net(skb->sk);
2333 int family, tidx, nidx = 0;
2334 int tbl_skip = cb->args[0];
2335 int neigh_skip = cb->args[1];
2336 struct neigh_table *tbl;
2338 if (cb->strict_check) {
2339 int err = neightbl_valid_dump_info(nlh, cb->extack);
2345 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2347 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2348 struct neigh_parms *p;
2350 tbl = neigh_tables[tidx];
2354 if (tidx < tbl_skip || (family && tbl->family != family))
2357 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2358 nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2363 p = list_next_entry(&tbl->parms, list);
2364 list_for_each_entry_from(p, &tbl->parms_list, list) {
2365 if (!net_eq(neigh_parms_net(p), net))
2368 if (nidx < neigh_skip)
2371 if (neightbl_fill_param_info(skb, tbl, p,
2372 NETLINK_CB(cb->skb).portid,
2390 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2391 u32 pid, u32 seq, int type, unsigned int flags)
2393 unsigned long now = jiffies;
2394 struct nda_cacheinfo ci;
2395 struct nlmsghdr *nlh;
2398 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2402 ndm = nlmsg_data(nlh);
2403 ndm->ndm_family = neigh->ops->family;
2406 ndm->ndm_flags = neigh->flags;
2407 ndm->ndm_type = neigh->type;
2408 ndm->ndm_ifindex = neigh->dev->ifindex;
2410 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2411 goto nla_put_failure;
2413 read_lock_bh(&neigh->lock);
2414 ndm->ndm_state = neigh->nud_state;
2415 if (neigh->nud_state & NUD_VALID) {
2416 char haddr[MAX_ADDR_LEN];
2418 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2419 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2420 read_unlock_bh(&neigh->lock);
2421 goto nla_put_failure;
2425 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2426 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2427 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2428 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
2429 read_unlock_bh(&neigh->lock);
2431 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2432 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2433 goto nla_put_failure;
2435 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2436 goto nla_put_failure;
2438 nlmsg_end(skb, nlh);
2442 nlmsg_cancel(skb, nlh);
2446 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2447 u32 pid, u32 seq, int type, unsigned int flags,
2448 struct neigh_table *tbl)
2450 struct nlmsghdr *nlh;
2453 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2457 ndm = nlmsg_data(nlh);
2458 ndm->ndm_family = tbl->family;
2461 ndm->ndm_flags = pn->flags | NTF_PROXY;
2462 ndm->ndm_type = RTN_UNICAST;
2463 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2464 ndm->ndm_state = NUD_NONE;
2466 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2467 goto nla_put_failure;
2469 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2470 goto nla_put_failure;
2472 nlmsg_end(skb, nlh);
2476 nlmsg_cancel(skb, nlh);
2480 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2482 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2483 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2486 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2488 struct net_device *master;
2493 master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2494 if (!master || master->ifindex != master_idx)
2500 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2502 if (filter_idx && (!dev || dev->ifindex != filter_idx))
2508 struct neigh_dump_filter {
2513 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2514 struct netlink_callback *cb,
2515 struct neigh_dump_filter *filter)
2517 struct net *net = sock_net(skb->sk);
2518 struct neighbour *n;
2519 int rc, h, s_h = cb->args[1];
2520 int idx, s_idx = idx = cb->args[2];
2521 struct neigh_hash_table *nht;
2522 unsigned int flags = NLM_F_MULTI;
2524 if (filter->dev_idx || filter->master_idx)
2525 flags |= NLM_F_DUMP_FILTERED;
2528 nht = rcu_dereference_bh(tbl->nht);
2530 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2533 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2535 n = rcu_dereference_bh(n->next)) {
2536 if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2538 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2539 neigh_master_filtered(n->dev, filter->master_idx))
2541 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2554 rcu_read_unlock_bh();
2560 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2561 struct netlink_callback *cb,
2562 struct neigh_dump_filter *filter)
2564 struct pneigh_entry *n;
2565 struct net *net = sock_net(skb->sk);
2566 int rc, h, s_h = cb->args[3];
2567 int idx, s_idx = idx = cb->args[4];
2568 unsigned int flags = NLM_F_MULTI;
2570 if (filter->dev_idx || filter->master_idx)
2571 flags |= NLM_F_DUMP_FILTERED;
2573 read_lock_bh(&tbl->lock);
2575 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2578 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2579 if (idx < s_idx || pneigh_net(n) != net)
2581 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2582 neigh_master_filtered(n->dev, filter->master_idx))
2584 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2586 RTM_NEWNEIGH, flags, tbl) < 0) {
2587 read_unlock_bh(&tbl->lock);
2596 read_unlock_bh(&tbl->lock);
2605 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2607 struct neigh_dump_filter *filter,
2608 struct netlink_ext_ack *extack)
2610 struct nlattr *tb[NDA_MAX + 1];
2616 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2617 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2621 ndm = nlmsg_data(nlh);
2622 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
2623 ndm->ndm_state || ndm->ndm_flags || ndm->ndm_type) {
2624 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2628 err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2631 err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2637 for (i = 0; i <= NDA_MAX; ++i) {
2641 /* all new attributes should require strict_check */
2644 if (nla_len(tb[i]) != sizeof(u32)) {
2645 NL_SET_ERR_MSG(extack, "Invalid IFINDEX attribute in neighbor dump request");
2648 filter->dev_idx = nla_get_u32(tb[i]);
2651 if (nla_len(tb[i]) != sizeof(u32)) {
2652 NL_SET_ERR_MSG(extack, "Invalid MASTER attribute in neighbor dump request");
2655 filter->master_idx = nla_get_u32(tb[i]);
2659 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2668 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2670 const struct nlmsghdr *nlh = cb->nlh;
2671 struct neigh_dump_filter filter = {};
2672 struct neigh_table *tbl;
2677 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2679 /* check for full ndmsg structure presence, family member is
2680 * the same for both structures
2682 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2683 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2686 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2687 if (err < 0 && cb->strict_check)
2692 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2693 tbl = neigh_tables[t];
2697 if (t < s_t || (family && tbl->family != family))
2700 memset(&cb->args[1], 0, sizeof(cb->args) -
2701 sizeof(cb->args[0]));
2703 err = pneigh_dump_table(tbl, skb, cb, &filter);
2705 err = neigh_dump_table(tbl, skb, cb, &filter);
2714 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2717 struct neigh_hash_table *nht;
2720 nht = rcu_dereference_bh(tbl->nht);
2722 read_lock(&tbl->lock); /* avoid resizes */
2723 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2724 struct neighbour *n;
2726 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2728 n = rcu_dereference_bh(n->next))
2731 read_unlock(&tbl->lock);
2732 rcu_read_unlock_bh();
2734 EXPORT_SYMBOL(neigh_for_each);
2736 /* The tbl->lock must be held as a writer and BH disabled. */
2737 void __neigh_for_each_release(struct neigh_table *tbl,
2738 int (*cb)(struct neighbour *))
2741 struct neigh_hash_table *nht;
2743 nht = rcu_dereference_protected(tbl->nht,
2744 lockdep_is_held(&tbl->lock));
2745 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2746 struct neighbour *n;
2747 struct neighbour __rcu **np;
2749 np = &nht->hash_buckets[chain];
2750 while ((n = rcu_dereference_protected(*np,
2751 lockdep_is_held(&tbl->lock))) != NULL) {
2754 write_lock(&n->lock);
2757 rcu_assign_pointer(*np,
2758 rcu_dereference_protected(n->next,
2759 lockdep_is_held(&tbl->lock)));
2763 write_unlock(&n->lock);
2765 neigh_cleanup_and_release(n);
2769 EXPORT_SYMBOL(__neigh_for_each_release);
2771 int neigh_xmit(int index, struct net_device *dev,
2772 const void *addr, struct sk_buff *skb)
2774 int err = -EAFNOSUPPORT;
2775 if (likely(index < NEIGH_NR_TABLES)) {
2776 struct neigh_table *tbl;
2777 struct neighbour *neigh;
2779 tbl = neigh_tables[index];
2783 neigh = __neigh_lookup_noref(tbl, addr, dev);
2785 neigh = __neigh_create(tbl, addr, dev, false);
2786 err = PTR_ERR(neigh);
2787 if (IS_ERR(neigh)) {
2788 rcu_read_unlock_bh();
2791 err = neigh->output(neigh, skb);
2792 rcu_read_unlock_bh();
2794 else if (index == NEIGH_LINK_TABLE) {
2795 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2796 addr, NULL, skb->len);
2799 err = dev_queue_xmit(skb);
2807 EXPORT_SYMBOL(neigh_xmit);
2809 #ifdef CONFIG_PROC_FS
2811 static struct neighbour *neigh_get_first(struct seq_file *seq)
2813 struct neigh_seq_state *state = seq->private;
2814 struct net *net = seq_file_net(seq);
2815 struct neigh_hash_table *nht = state->nht;
2816 struct neighbour *n = NULL;
2817 int bucket = state->bucket;
2819 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2820 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2821 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2824 if (!net_eq(dev_net(n->dev), net))
2826 if (state->neigh_sub_iter) {
2830 v = state->neigh_sub_iter(state, n, &fakep);
2834 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2836 if (n->nud_state & ~NUD_NOARP)
2839 n = rcu_dereference_bh(n->next);
2845 state->bucket = bucket;
2850 static struct neighbour *neigh_get_next(struct seq_file *seq,
2851 struct neighbour *n,
2854 struct neigh_seq_state *state = seq->private;
2855 struct net *net = seq_file_net(seq);
2856 struct neigh_hash_table *nht = state->nht;
2858 if (state->neigh_sub_iter) {
2859 void *v = state->neigh_sub_iter(state, n, pos);
2863 n = rcu_dereference_bh(n->next);
2867 if (!net_eq(dev_net(n->dev), net))
2869 if (state->neigh_sub_iter) {
2870 void *v = state->neigh_sub_iter(state, n, pos);
2875 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2878 if (n->nud_state & ~NUD_NOARP)
2881 n = rcu_dereference_bh(n->next);
2887 if (++state->bucket >= (1 << nht->hash_shift))
2890 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2898 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2900 struct neighbour *n = neigh_get_first(seq);
2905 n = neigh_get_next(seq, n, pos);
2910 return *pos ? NULL : n;
2913 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2915 struct neigh_seq_state *state = seq->private;
2916 struct net *net = seq_file_net(seq);
2917 struct neigh_table *tbl = state->tbl;
2918 struct pneigh_entry *pn = NULL;
2919 int bucket = state->bucket;
2921 state->flags |= NEIGH_SEQ_IS_PNEIGH;
2922 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2923 pn = tbl->phash_buckets[bucket];
2924 while (pn && !net_eq(pneigh_net(pn), net))
2929 state->bucket = bucket;
2934 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2935 struct pneigh_entry *pn,
2938 struct neigh_seq_state *state = seq->private;
2939 struct net *net = seq_file_net(seq);
2940 struct neigh_table *tbl = state->tbl;
2944 } while (pn && !net_eq(pneigh_net(pn), net));
2947 if (++state->bucket > PNEIGH_HASHMASK)
2949 pn = tbl->phash_buckets[state->bucket];
2950 while (pn && !net_eq(pneigh_net(pn), net))
2962 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2964 struct pneigh_entry *pn = pneigh_get_first(seq);
2969 pn = pneigh_get_next(seq, pn, pos);
2974 return *pos ? NULL : pn;
2977 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2979 struct neigh_seq_state *state = seq->private;
2981 loff_t idxpos = *pos;
2983 rc = neigh_get_idx(seq, &idxpos);
2984 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2985 rc = pneigh_get_idx(seq, &idxpos);
2990 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2993 struct neigh_seq_state *state = seq->private;
2997 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3000 state->nht = rcu_dereference_bh(tbl->nht);
3002 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3004 EXPORT_SYMBOL(neigh_seq_start);
3006 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3008 struct neigh_seq_state *state;
3011 if (v == SEQ_START_TOKEN) {
3012 rc = neigh_get_first(seq);
3016 state = seq->private;
3017 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3018 rc = neigh_get_next(seq, v, NULL);
3021 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3022 rc = pneigh_get_first(seq);
3024 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3025 rc = pneigh_get_next(seq, v, NULL);
3031 EXPORT_SYMBOL(neigh_seq_next);
3033 void neigh_seq_stop(struct seq_file *seq, void *v)
3036 rcu_read_unlock_bh();
3038 EXPORT_SYMBOL(neigh_seq_stop);
3040 /* statistics via seq_file */
3042 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3044 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3048 return SEQ_START_TOKEN;
3050 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3051 if (!cpu_possible(cpu))
3054 return per_cpu_ptr(tbl->stats, cpu);
3059 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3061 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3064 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3065 if (!cpu_possible(cpu))
3068 return per_cpu_ptr(tbl->stats, cpu);
3073 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3078 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3080 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3081 struct neigh_statistics *st = v;
3083 if (v == SEQ_START_TOKEN) {
3084 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3088 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
3089 "%08lx %08lx %08lx %08lx %08lx %08lx\n",
3090 atomic_read(&tbl->entries),
3101 st->rcv_probes_mcast,
3102 st->rcv_probes_ucast,
3104 st->periodic_gc_runs,
3113 static const struct seq_operations neigh_stat_seq_ops = {
3114 .start = neigh_stat_seq_start,
3115 .next = neigh_stat_seq_next,
3116 .stop = neigh_stat_seq_stop,
3117 .show = neigh_stat_seq_show,
3119 #endif /* CONFIG_PROC_FS */
3121 static inline size_t neigh_nlmsg_size(void)
3123 return NLMSG_ALIGN(sizeof(struct ndmsg))
3124 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
3125 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
3126 + nla_total_size(sizeof(struct nda_cacheinfo))
3127 + nla_total_size(4) /* NDA_PROBES */
3128 + nla_total_size(1); /* NDA_PROTOCOL */
3131 static void __neigh_notify(struct neighbour *n, int type, int flags,
3134 struct net *net = dev_net(n->dev);
3135 struct sk_buff *skb;
3138 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3142 err = neigh_fill_info(skb, n, pid, 0, type, flags);
3144 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3145 WARN_ON(err == -EMSGSIZE);
3149 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3153 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3156 void neigh_app_ns(struct neighbour *n)
3158 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3160 EXPORT_SYMBOL(neigh_app_ns);
3162 #ifdef CONFIG_SYSCTL
3164 static int int_max = INT_MAX;
3165 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3167 static int proc_unres_qlen(struct ctl_table *ctl, int write,
3168 void __user *buffer, size_t *lenp, loff_t *ppos)
3171 struct ctl_table tmp = *ctl;
3174 tmp.extra2 = &unres_qlen_max;
3177 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3178 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3181 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3185 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
3190 return __in_dev_arp_parms_get_rcu(dev);
3192 return __in6_dev_nd_parms_get_rcu(dev);
3197 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3200 struct net_device *dev;
3201 int family = neigh_parms_family(p);
3204 for_each_netdev_rcu(net, dev) {
3205 struct neigh_parms *dst_p =
3206 neigh_get_dev_parms_rcu(dev, family);
3208 if (dst_p && !test_bit(index, dst_p->data_state))
3209 dst_p->data[index] = p->data[index];
3214 static void neigh_proc_update(struct ctl_table *ctl, int write)
3216 struct net_device *dev = ctl->extra1;
3217 struct neigh_parms *p = ctl->extra2;
3218 struct net *net = neigh_parms_net(p);
3219 int index = (int *) ctl->data - p->data;
3224 set_bit(index, p->data_state);
3225 if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3226 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3227 if (!dev) /* NULL dev means this is default value */
3228 neigh_copy_dflt_parms(net, p, index);
3231 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3232 void __user *buffer,
3233 size_t *lenp, loff_t *ppos)
3235 struct ctl_table tmp = *ctl;
3239 tmp.extra2 = &int_max;
3241 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3242 neigh_proc_update(ctl, write);
3246 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3247 void __user *buffer, size_t *lenp, loff_t *ppos)
3249 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3251 neigh_proc_update(ctl, write);
3254 EXPORT_SYMBOL(neigh_proc_dointvec);
3256 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3257 void __user *buffer,
3258 size_t *lenp, loff_t *ppos)
3260 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3262 neigh_proc_update(ctl, write);
3265 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3267 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3268 void __user *buffer,
3269 size_t *lenp, loff_t *ppos)
3271 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3273 neigh_proc_update(ctl, write);
3277 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3278 void __user *buffer,
3279 size_t *lenp, loff_t *ppos)
3281 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3283 neigh_proc_update(ctl, write);
3286 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3288 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3289 void __user *buffer,
3290 size_t *lenp, loff_t *ppos)
3292 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3294 neigh_proc_update(ctl, write);
3298 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3299 void __user *buffer,
3300 size_t *lenp, loff_t *ppos)
3302 struct neigh_parms *p = ctl->extra2;
3305 if (strcmp(ctl->procname, "base_reachable_time") == 0)
3306 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3307 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3308 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3312 if (write && ret == 0) {
3313 /* update reachable_time as well, otherwise, the change will
3314 * only be effective after the next time neigh_periodic_work
3315 * decides to recompute it
3318 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3323 #define NEIGH_PARMS_DATA_OFFSET(index) \
3324 (&((struct neigh_parms *) 0)->data[index])
3326 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3327 [NEIGH_VAR_ ## attr] = { \
3329 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3330 .maxlen = sizeof(int), \
3332 .proc_handler = proc, \
3335 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3336 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3338 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3339 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3341 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3342 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3344 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3345 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3347 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3348 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3350 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3351 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3353 static struct neigh_sysctl_table {
3354 struct ctl_table_header *sysctl_header;
3355 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3356 } neigh_sysctl_template __read_mostly = {
3358 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3359 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3360 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3361 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3362 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3363 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3364 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3365 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3366 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3367 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3368 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3369 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3370 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3371 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3372 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3373 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3374 [NEIGH_VAR_GC_INTERVAL] = {
3375 .procname = "gc_interval",
3376 .maxlen = sizeof(int),
3378 .proc_handler = proc_dointvec_jiffies,
3380 [NEIGH_VAR_GC_THRESH1] = {
3381 .procname = "gc_thresh1",
3382 .maxlen = sizeof(int),
3386 .proc_handler = proc_dointvec_minmax,
3388 [NEIGH_VAR_GC_THRESH2] = {
3389 .procname = "gc_thresh2",
3390 .maxlen = sizeof(int),
3394 .proc_handler = proc_dointvec_minmax,
3396 [NEIGH_VAR_GC_THRESH3] = {
3397 .procname = "gc_thresh3",
3398 .maxlen = sizeof(int),
3402 .proc_handler = proc_dointvec_minmax,
3408 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3409 proc_handler *handler)
3412 struct neigh_sysctl_table *t;
3413 const char *dev_name_source;
3414 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3417 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3421 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3422 t->neigh_vars[i].data += (long) p;
3423 t->neigh_vars[i].extra1 = dev;
3424 t->neigh_vars[i].extra2 = p;
3428 dev_name_source = dev->name;
3429 /* Terminate the table early */
3430 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3431 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3433 struct neigh_table *tbl = p->tbl;
3434 dev_name_source = "default";
3435 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3436 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3437 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3438 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3443 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3445 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3446 /* RetransTime (in milliseconds)*/
3447 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3448 /* ReachableTime (in milliseconds) */
3449 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3451 /* Those handlers will update p->reachable_time after
3452 * base_reachable_time(_ms) is set to ensure the new timer starts being
3453 * applied after the next neighbour update instead of waiting for
3454 * neigh_periodic_work to update its value (can be multiple minutes)
3455 * So any handler that replaces them should do this as well
3458 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3459 neigh_proc_base_reachable_time;
3460 /* ReachableTime (in milliseconds) */
3461 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3462 neigh_proc_base_reachable_time;
3465 /* Don't export sysctls to unprivileged users */
3466 if (neigh_parms_net(p)->user_ns != &init_user_ns)
3467 t->neigh_vars[0].procname = NULL;
3469 switch (neigh_parms_family(p)) {
3480 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3481 p_name, dev_name_source);
3483 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3484 if (!t->sysctl_header)
3487 p->sysctl_table = t;
3495 EXPORT_SYMBOL(neigh_sysctl_register);
3497 void neigh_sysctl_unregister(struct neigh_parms *p)
3499 if (p->sysctl_table) {
3500 struct neigh_sysctl_table *t = p->sysctl_table;
3501 p->sysctl_table = NULL;
3502 unregister_net_sysctl_table(t->sysctl_header);
3506 EXPORT_SYMBOL(neigh_sysctl_unregister);
3508 #endif /* CONFIG_SYSCTL */
3510 static int __init neigh_init(void)
3512 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3513 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3514 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
3516 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3518 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3523 subsys_initcall(neigh_init);