bridge: implement multicast fast leave
[linux-2.6-block.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41
42 #define NEIGH_DEBUG 1
43
44 #define NEIGH_PRINTK(x...) printk(x)
45 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
46 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
47 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
48
49 #if NEIGH_DEBUG >= 1
50 #undef NEIGH_PRINTK1
51 #define NEIGH_PRINTK1 NEIGH_PRINTK
52 #endif
53 #if NEIGH_DEBUG >= 2
54 #undef NEIGH_PRINTK2
55 #define NEIGH_PRINTK2 NEIGH_PRINTK
56 #endif
57
58 #define PNEIGH_HASHMASK         0xF
59
60 static void neigh_timer_handler(unsigned long arg);
61 static void __neigh_notify(struct neighbour *n, int type, int flags);
62 static void neigh_update_notify(struct neighbour *neigh);
63 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
64
65 static int zero;
66 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
67
68 static struct neigh_table *neigh_tables;
69 #ifdef CONFIG_PROC_FS
70 static const struct file_operations neigh_stat_seq_fops;
71 #endif
72
73 /*
74    Neighbour hash table buckets are protected with rwlock tbl->lock.
75
76    - All the scans/updates to hash buckets MUST be made under this lock.
77    - NOTHING clever should be made under this lock: no callbacks
78      to protocol backends, no attempts to send something to network.
79      It will result in deadlocks, if backend/driver wants to use neighbour
80      cache.
81    - If the entry requires some non-trivial actions, increase
82      its reference count and release table lock.
83
84    Neighbour entries are protected:
85    - with reference count.
86    - with rwlock neigh->lock
87
88    Reference count prevents destruction.
89
90    neigh->lock mainly serializes ll address data and its validity state.
91    However, the same lock is used to protect another entry fields:
92     - timer
93     - resolution queue
94
95    Again, nothing clever shall be made under neigh->lock,
96    the most complicated procedure, which we allow is dev->hard_header.
97    It is supposed, that dev->hard_header is simplistic and does
98    not make callbacks to neighbour tables.
99
100    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
101    list of neighbour tables. This list is used only in process context,
102  */
103
104 static DEFINE_RWLOCK(neigh_tbl_lock);
105
106 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
107 {
108         kfree_skb(skb);
109         return -ENETDOWN;
110 }
111
112 static void neigh_cleanup_and_release(struct neighbour *neigh)
113 {
114         if (neigh->parms->neigh_cleanup)
115                 neigh->parms->neigh_cleanup(neigh);
116
117         __neigh_notify(neigh, RTM_DELNEIGH, 0);
118         neigh_release(neigh);
119 }
120
121 /*
122  * It is random distribution in the interval (1/2)*base...(3/2)*base.
123  * It corresponds to default IPv6 settings and is not overridable,
124  * because it is really reasonable choice.
125  */
126
127 unsigned long neigh_rand_reach_time(unsigned long base)
128 {
129         return base ? (net_random() % base) + (base >> 1) : 0;
130 }
131 EXPORT_SYMBOL(neigh_rand_reach_time);
132
133
134 static int neigh_forced_gc(struct neigh_table *tbl)
135 {
136         int shrunk = 0;
137         int i;
138         struct neigh_hash_table *nht;
139
140         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
141
142         write_lock_bh(&tbl->lock);
143         nht = rcu_dereference_protected(tbl->nht,
144                                         lockdep_is_held(&tbl->lock));
145         for (i = 0; i < (1 << nht->hash_shift); i++) {
146                 struct neighbour *n;
147                 struct neighbour __rcu **np;
148
149                 np = &nht->hash_buckets[i];
150                 while ((n = rcu_dereference_protected(*np,
151                                         lockdep_is_held(&tbl->lock))) != NULL) {
152                         /* Neighbour record may be discarded if:
153                          * - nobody refers to it.
154                          * - it is not permanent
155                          */
156                         write_lock(&n->lock);
157                         if (atomic_read(&n->refcnt) == 1 &&
158                             !(n->nud_state & NUD_PERMANENT)) {
159                                 rcu_assign_pointer(*np,
160                                         rcu_dereference_protected(n->next,
161                                                   lockdep_is_held(&tbl->lock)));
162                                 n->dead = 1;
163                                 shrunk  = 1;
164                                 write_unlock(&n->lock);
165                                 neigh_cleanup_and_release(n);
166                                 continue;
167                         }
168                         write_unlock(&n->lock);
169                         np = &n->next;
170                 }
171         }
172
173         tbl->last_flush = jiffies;
174
175         write_unlock_bh(&tbl->lock);
176
177         return shrunk;
178 }
179
180 static void neigh_add_timer(struct neighbour *n, unsigned long when)
181 {
182         neigh_hold(n);
183         if (unlikely(mod_timer(&n->timer, when))) {
184                 printk("NEIGH: BUG, double timer add, state is %x\n",
185                        n->nud_state);
186                 dump_stack();
187         }
188 }
189
190 static int neigh_del_timer(struct neighbour *n)
191 {
192         if ((n->nud_state & NUD_IN_TIMER) &&
193             del_timer(&n->timer)) {
194                 neigh_release(n);
195                 return 1;
196         }
197         return 0;
198 }
199
200 static void pneigh_queue_purge(struct sk_buff_head *list)
201 {
202         struct sk_buff *skb;
203
204         while ((skb = skb_dequeue(list)) != NULL) {
205                 dev_put(skb->dev);
206                 kfree_skb(skb);
207         }
208 }
209
210 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
211 {
212         int i;
213         struct neigh_hash_table *nht;
214
215         nht = rcu_dereference_protected(tbl->nht,
216                                         lockdep_is_held(&tbl->lock));
217
218         for (i = 0; i < (1 << nht->hash_shift); i++) {
219                 struct neighbour *n;
220                 struct neighbour __rcu **np = &nht->hash_buckets[i];
221
222                 while ((n = rcu_dereference_protected(*np,
223                                         lockdep_is_held(&tbl->lock))) != NULL) {
224                         if (dev && n->dev != dev) {
225                                 np = &n->next;
226                                 continue;
227                         }
228                         rcu_assign_pointer(*np,
229                                    rcu_dereference_protected(n->next,
230                                                 lockdep_is_held(&tbl->lock)));
231                         write_lock(&n->lock);
232                         neigh_del_timer(n);
233                         n->dead = 1;
234
235                         if (atomic_read(&n->refcnt) != 1) {
236                                 /* The most unpleasant situation.
237                                    We must destroy neighbour entry,
238                                    but someone still uses it.
239
240                                    The destroy will be delayed until
241                                    the last user releases us, but
242                                    we must kill timers etc. and move
243                                    it to safe state.
244                                  */
245                                 skb_queue_purge(&n->arp_queue);
246                                 n->arp_queue_len_bytes = 0;
247                                 n->output = neigh_blackhole;
248                                 if (n->nud_state & NUD_VALID)
249                                         n->nud_state = NUD_NOARP;
250                                 else
251                                         n->nud_state = NUD_NONE;
252                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
253                         }
254                         write_unlock(&n->lock);
255                         neigh_cleanup_and_release(n);
256                 }
257         }
258 }
259
260 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
261 {
262         write_lock_bh(&tbl->lock);
263         neigh_flush_dev(tbl, dev);
264         write_unlock_bh(&tbl->lock);
265 }
266 EXPORT_SYMBOL(neigh_changeaddr);
267
268 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
269 {
270         write_lock_bh(&tbl->lock);
271         neigh_flush_dev(tbl, dev);
272         pneigh_ifdown(tbl, dev);
273         write_unlock_bh(&tbl->lock);
274
275         del_timer_sync(&tbl->proxy_timer);
276         pneigh_queue_purge(&tbl->proxy_queue);
277         return 0;
278 }
279 EXPORT_SYMBOL(neigh_ifdown);
280
281 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
282 {
283         struct neighbour *n = NULL;
284         unsigned long now = jiffies;
285         int entries;
286
287         entries = atomic_inc_return(&tbl->entries) - 1;
288         if (entries >= tbl->gc_thresh3 ||
289             (entries >= tbl->gc_thresh2 &&
290              time_after(now, tbl->last_flush + 5 * HZ))) {
291                 if (!neigh_forced_gc(tbl) &&
292                     entries >= tbl->gc_thresh3)
293                         goto out_entries;
294         }
295
296         if (tbl->entry_size)
297                 n = kzalloc(tbl->entry_size, GFP_ATOMIC);
298         else {
299                 int sz = sizeof(*n) + tbl->key_len;
300
301                 sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
302                 sz += dev->neigh_priv_len;
303                 n = kzalloc(sz, GFP_ATOMIC);
304         }
305         if (!n)
306                 goto out_entries;
307
308         skb_queue_head_init(&n->arp_queue);
309         rwlock_init(&n->lock);
310         seqlock_init(&n->ha_lock);
311         n->updated        = n->used = now;
312         n->nud_state      = NUD_NONE;
313         n->output         = neigh_blackhole;
314         seqlock_init(&n->hh.hh_lock);
315         n->parms          = neigh_parms_clone(&tbl->parms);
316         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
317
318         NEIGH_CACHE_STAT_INC(tbl, allocs);
319         n->tbl            = tbl;
320         atomic_set(&n->refcnt, 1);
321         n->dead           = 1;
322 out:
323         return n;
324
325 out_entries:
326         atomic_dec(&tbl->entries);
327         goto out;
328 }
329
330 static void neigh_get_hash_rnd(u32 *x)
331 {
332         get_random_bytes(x, sizeof(*x));
333         *x |= 1;
334 }
335
336 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
337 {
338         size_t size = (1 << shift) * sizeof(struct neighbour *);
339         struct neigh_hash_table *ret;
340         struct neighbour __rcu **buckets;
341         int i;
342
343         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
344         if (!ret)
345                 return NULL;
346         if (size <= PAGE_SIZE)
347                 buckets = kzalloc(size, GFP_ATOMIC);
348         else
349                 buckets = (struct neighbour __rcu **)
350                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
351                                            get_order(size));
352         if (!buckets) {
353                 kfree(ret);
354                 return NULL;
355         }
356         ret->hash_buckets = buckets;
357         ret->hash_shift = shift;
358         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
359                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
360         return ret;
361 }
362
363 static void neigh_hash_free_rcu(struct rcu_head *head)
364 {
365         struct neigh_hash_table *nht = container_of(head,
366                                                     struct neigh_hash_table,
367                                                     rcu);
368         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
369         struct neighbour __rcu **buckets = nht->hash_buckets;
370
371         if (size <= PAGE_SIZE)
372                 kfree(buckets);
373         else
374                 free_pages((unsigned long)buckets, get_order(size));
375         kfree(nht);
376 }
377
378 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
379                                                 unsigned long new_shift)
380 {
381         unsigned int i, hash;
382         struct neigh_hash_table *new_nht, *old_nht;
383
384         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
385
386         old_nht = rcu_dereference_protected(tbl->nht,
387                                             lockdep_is_held(&tbl->lock));
388         new_nht = neigh_hash_alloc(new_shift);
389         if (!new_nht)
390                 return old_nht;
391
392         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
393                 struct neighbour *n, *next;
394
395                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
396                                                    lockdep_is_held(&tbl->lock));
397                      n != NULL;
398                      n = next) {
399                         hash = tbl->hash(n->primary_key, n->dev,
400                                          new_nht->hash_rnd);
401
402                         hash >>= (32 - new_nht->hash_shift);
403                         next = rcu_dereference_protected(n->next,
404                                                 lockdep_is_held(&tbl->lock));
405
406                         rcu_assign_pointer(n->next,
407                                            rcu_dereference_protected(
408                                                 new_nht->hash_buckets[hash],
409                                                 lockdep_is_held(&tbl->lock)));
410                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
411                 }
412         }
413
414         rcu_assign_pointer(tbl->nht, new_nht);
415         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
416         return new_nht;
417 }
418
419 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
420                                struct net_device *dev)
421 {
422         struct neighbour *n;
423         int key_len = tbl->key_len;
424         u32 hash_val;
425         struct neigh_hash_table *nht;
426
427         NEIGH_CACHE_STAT_INC(tbl, lookups);
428
429         rcu_read_lock_bh();
430         nht = rcu_dereference_bh(tbl->nht);
431         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
432
433         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
434              n != NULL;
435              n = rcu_dereference_bh(n->next)) {
436                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
437                         if (!atomic_inc_not_zero(&n->refcnt))
438                                 n = NULL;
439                         NEIGH_CACHE_STAT_INC(tbl, hits);
440                         break;
441                 }
442         }
443
444         rcu_read_unlock_bh();
445         return n;
446 }
447 EXPORT_SYMBOL(neigh_lookup);
448
449 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
450                                      const void *pkey)
451 {
452         struct neighbour *n;
453         int key_len = tbl->key_len;
454         u32 hash_val;
455         struct neigh_hash_table *nht;
456
457         NEIGH_CACHE_STAT_INC(tbl, lookups);
458
459         rcu_read_lock_bh();
460         nht = rcu_dereference_bh(tbl->nht);
461         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
462
463         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
464              n != NULL;
465              n = rcu_dereference_bh(n->next)) {
466                 if (!memcmp(n->primary_key, pkey, key_len) &&
467                     net_eq(dev_net(n->dev), net)) {
468                         if (!atomic_inc_not_zero(&n->refcnt))
469                                 n = NULL;
470                         NEIGH_CACHE_STAT_INC(tbl, hits);
471                         break;
472                 }
473         }
474
475         rcu_read_unlock_bh();
476         return n;
477 }
478 EXPORT_SYMBOL(neigh_lookup_nodev);
479
480 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
481                                  struct net_device *dev, bool want_ref)
482 {
483         u32 hash_val;
484         int key_len = tbl->key_len;
485         int error;
486         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
487         struct neigh_hash_table *nht;
488
489         if (!n) {
490                 rc = ERR_PTR(-ENOBUFS);
491                 goto out;
492         }
493
494         memcpy(n->primary_key, pkey, key_len);
495         n->dev = dev;
496         dev_hold(dev);
497
498         /* Protocol specific setup. */
499         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
500                 rc = ERR_PTR(error);
501                 goto out_neigh_release;
502         }
503
504         if (dev->netdev_ops->ndo_neigh_construct) {
505                 error = dev->netdev_ops->ndo_neigh_construct(n);
506                 if (error < 0) {
507                         rc = ERR_PTR(error);
508                         goto out_neigh_release;
509                 }
510         }
511
512         /* Device specific setup. */
513         if (n->parms->neigh_setup &&
514             (error = n->parms->neigh_setup(n)) < 0) {
515                 rc = ERR_PTR(error);
516                 goto out_neigh_release;
517         }
518
519         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
520
521         write_lock_bh(&tbl->lock);
522         nht = rcu_dereference_protected(tbl->nht,
523                                         lockdep_is_held(&tbl->lock));
524
525         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
526                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
527
528         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
529
530         if (n->parms->dead) {
531                 rc = ERR_PTR(-EINVAL);
532                 goto out_tbl_unlock;
533         }
534
535         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
536                                             lockdep_is_held(&tbl->lock));
537              n1 != NULL;
538              n1 = rcu_dereference_protected(n1->next,
539                         lockdep_is_held(&tbl->lock))) {
540                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
541                         if (want_ref)
542                                 neigh_hold(n1);
543                         rc = n1;
544                         goto out_tbl_unlock;
545                 }
546         }
547
548         n->dead = 0;
549         if (want_ref)
550                 neigh_hold(n);
551         rcu_assign_pointer(n->next,
552                            rcu_dereference_protected(nht->hash_buckets[hash_val],
553                                                      lockdep_is_held(&tbl->lock)));
554         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
555         write_unlock_bh(&tbl->lock);
556         NEIGH_PRINTK2("neigh %p is created.\n", n);
557         rc = n;
558 out:
559         return rc;
560 out_tbl_unlock:
561         write_unlock_bh(&tbl->lock);
562 out_neigh_release:
563         neigh_release(n);
564         goto out;
565 }
566 EXPORT_SYMBOL(__neigh_create);
567
568 static u32 pneigh_hash(const void *pkey, int key_len)
569 {
570         u32 hash_val = *(u32 *)(pkey + key_len - 4);
571         hash_val ^= (hash_val >> 16);
572         hash_val ^= hash_val >> 8;
573         hash_val ^= hash_val >> 4;
574         hash_val &= PNEIGH_HASHMASK;
575         return hash_val;
576 }
577
578 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
579                                               struct net *net,
580                                               const void *pkey,
581                                               int key_len,
582                                               struct net_device *dev)
583 {
584         while (n) {
585                 if (!memcmp(n->key, pkey, key_len) &&
586                     net_eq(pneigh_net(n), net) &&
587                     (n->dev == dev || !n->dev))
588                         return n;
589                 n = n->next;
590         }
591         return NULL;
592 }
593
594 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
595                 struct net *net, const void *pkey, struct net_device *dev)
596 {
597         int key_len = tbl->key_len;
598         u32 hash_val = pneigh_hash(pkey, key_len);
599
600         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
601                                  net, pkey, key_len, dev);
602 }
603 EXPORT_SYMBOL_GPL(__pneigh_lookup);
604
605 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
606                                     struct net *net, const void *pkey,
607                                     struct net_device *dev, int creat)
608 {
609         struct pneigh_entry *n;
610         int key_len = tbl->key_len;
611         u32 hash_val = pneigh_hash(pkey, key_len);
612
613         read_lock_bh(&tbl->lock);
614         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
615                               net, pkey, key_len, dev);
616         read_unlock_bh(&tbl->lock);
617
618         if (n || !creat)
619                 goto out;
620
621         ASSERT_RTNL();
622
623         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
624         if (!n)
625                 goto out;
626
627         write_pnet(&n->net, hold_net(net));
628         memcpy(n->key, pkey, key_len);
629         n->dev = dev;
630         if (dev)
631                 dev_hold(dev);
632
633         if (tbl->pconstructor && tbl->pconstructor(n)) {
634                 if (dev)
635                         dev_put(dev);
636                 release_net(net);
637                 kfree(n);
638                 n = NULL;
639                 goto out;
640         }
641
642         write_lock_bh(&tbl->lock);
643         n->next = tbl->phash_buckets[hash_val];
644         tbl->phash_buckets[hash_val] = n;
645         write_unlock_bh(&tbl->lock);
646 out:
647         return n;
648 }
649 EXPORT_SYMBOL(pneigh_lookup);
650
651
652 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
653                   struct net_device *dev)
654 {
655         struct pneigh_entry *n, **np;
656         int key_len = tbl->key_len;
657         u32 hash_val = pneigh_hash(pkey, key_len);
658
659         write_lock_bh(&tbl->lock);
660         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
661              np = &n->next) {
662                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
663                     net_eq(pneigh_net(n), net)) {
664                         *np = n->next;
665                         write_unlock_bh(&tbl->lock);
666                         if (tbl->pdestructor)
667                                 tbl->pdestructor(n);
668                         if (n->dev)
669                                 dev_put(n->dev);
670                         release_net(pneigh_net(n));
671                         kfree(n);
672                         return 0;
673                 }
674         }
675         write_unlock_bh(&tbl->lock);
676         return -ENOENT;
677 }
678
679 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
680 {
681         struct pneigh_entry *n, **np;
682         u32 h;
683
684         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
685                 np = &tbl->phash_buckets[h];
686                 while ((n = *np) != NULL) {
687                         if (!dev || n->dev == dev) {
688                                 *np = n->next;
689                                 if (tbl->pdestructor)
690                                         tbl->pdestructor(n);
691                                 if (n->dev)
692                                         dev_put(n->dev);
693                                 release_net(pneigh_net(n));
694                                 kfree(n);
695                                 continue;
696                         }
697                         np = &n->next;
698                 }
699         }
700         return -ENOENT;
701 }
702
703 static void neigh_parms_destroy(struct neigh_parms *parms);
704
705 static inline void neigh_parms_put(struct neigh_parms *parms)
706 {
707         if (atomic_dec_and_test(&parms->refcnt))
708                 neigh_parms_destroy(parms);
709 }
710
711 /*
712  *      neighbour must already be out of the table;
713  *
714  */
715 void neigh_destroy(struct neighbour *neigh)
716 {
717         struct net_device *dev = neigh->dev;
718
719         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
720
721         if (!neigh->dead) {
722                 pr_warn("Destroying alive neighbour %p\n", neigh);
723                 dump_stack();
724                 return;
725         }
726
727         if (neigh_del_timer(neigh))
728                 pr_warn("Impossible event\n");
729
730         skb_queue_purge(&neigh->arp_queue);
731         neigh->arp_queue_len_bytes = 0;
732
733         if (dev->netdev_ops->ndo_neigh_destroy)
734                 dev->netdev_ops->ndo_neigh_destroy(neigh);
735
736         dev_put(dev);
737         neigh_parms_put(neigh->parms);
738
739         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
740
741         atomic_dec(&neigh->tbl->entries);
742         kfree_rcu(neigh, rcu);
743 }
744 EXPORT_SYMBOL(neigh_destroy);
745
746 /* Neighbour state is suspicious;
747    disable fast path.
748
749    Called with write_locked neigh.
750  */
751 static void neigh_suspect(struct neighbour *neigh)
752 {
753         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
754
755         neigh->output = neigh->ops->output;
756 }
757
758 /* Neighbour state is OK;
759    enable fast path.
760
761    Called with write_locked neigh.
762  */
763 static void neigh_connect(struct neighbour *neigh)
764 {
765         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
766
767         neigh->output = neigh->ops->connected_output;
768 }
769
770 static void neigh_periodic_work(struct work_struct *work)
771 {
772         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
773         struct neighbour *n;
774         struct neighbour __rcu **np;
775         unsigned int i;
776         struct neigh_hash_table *nht;
777
778         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
779
780         write_lock_bh(&tbl->lock);
781         nht = rcu_dereference_protected(tbl->nht,
782                                         lockdep_is_held(&tbl->lock));
783
784         /*
785          *      periodically recompute ReachableTime from random function
786          */
787
788         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
789                 struct neigh_parms *p;
790                 tbl->last_rand = jiffies;
791                 for (p = &tbl->parms; p; p = p->next)
792                         p->reachable_time =
793                                 neigh_rand_reach_time(p->base_reachable_time);
794         }
795
796         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
797                 np = &nht->hash_buckets[i];
798
799                 while ((n = rcu_dereference_protected(*np,
800                                 lockdep_is_held(&tbl->lock))) != NULL) {
801                         unsigned int state;
802
803                         write_lock(&n->lock);
804
805                         state = n->nud_state;
806                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
807                                 write_unlock(&n->lock);
808                                 goto next_elt;
809                         }
810
811                         if (time_before(n->used, n->confirmed))
812                                 n->used = n->confirmed;
813
814                         if (atomic_read(&n->refcnt) == 1 &&
815                             (state == NUD_FAILED ||
816                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
817                                 *np = n->next;
818                                 n->dead = 1;
819                                 write_unlock(&n->lock);
820                                 neigh_cleanup_and_release(n);
821                                 continue;
822                         }
823                         write_unlock(&n->lock);
824
825 next_elt:
826                         np = &n->next;
827                 }
828                 /*
829                  * It's fine to release lock here, even if hash table
830                  * grows while we are preempted.
831                  */
832                 write_unlock_bh(&tbl->lock);
833                 cond_resched();
834                 write_lock_bh(&tbl->lock);
835                 nht = rcu_dereference_protected(tbl->nht,
836                                                 lockdep_is_held(&tbl->lock));
837         }
838         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
839          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
840          * base_reachable_time.
841          */
842         schedule_delayed_work(&tbl->gc_work,
843                               tbl->parms.base_reachable_time >> 1);
844         write_unlock_bh(&tbl->lock);
845 }
846
847 static __inline__ int neigh_max_probes(struct neighbour *n)
848 {
849         struct neigh_parms *p = n->parms;
850         return (n->nud_state & NUD_PROBE) ?
851                 p->ucast_probes :
852                 p->ucast_probes + p->app_probes + p->mcast_probes;
853 }
854
855 static void neigh_invalidate(struct neighbour *neigh)
856         __releases(neigh->lock)
857         __acquires(neigh->lock)
858 {
859         struct sk_buff *skb;
860
861         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
862         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
863         neigh->updated = jiffies;
864
865         /* It is very thin place. report_unreachable is very complicated
866            routine. Particularly, it can hit the same neighbour entry!
867
868            So that, we try to be accurate and avoid dead loop. --ANK
869          */
870         while (neigh->nud_state == NUD_FAILED &&
871                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
872                 write_unlock(&neigh->lock);
873                 neigh->ops->error_report(neigh, skb);
874                 write_lock(&neigh->lock);
875         }
876         skb_queue_purge(&neigh->arp_queue);
877         neigh->arp_queue_len_bytes = 0;
878 }
879
880 static void neigh_probe(struct neighbour *neigh)
881         __releases(neigh->lock)
882 {
883         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
884         /* keep skb alive even if arp_queue overflows */
885         if (skb)
886                 skb = skb_copy(skb, GFP_ATOMIC);
887         write_unlock(&neigh->lock);
888         neigh->ops->solicit(neigh, skb);
889         atomic_inc(&neigh->probes);
890         kfree_skb(skb);
891 }
892
893 /* Called when a timer expires for a neighbour entry. */
894
895 static void neigh_timer_handler(unsigned long arg)
896 {
897         unsigned long now, next;
898         struct neighbour *neigh = (struct neighbour *)arg;
899         unsigned int state;
900         int notify = 0;
901
902         write_lock(&neigh->lock);
903
904         state = neigh->nud_state;
905         now = jiffies;
906         next = now + HZ;
907
908         if (!(state & NUD_IN_TIMER))
909                 goto out;
910
911         if (state & NUD_REACHABLE) {
912                 if (time_before_eq(now,
913                                    neigh->confirmed + neigh->parms->reachable_time)) {
914                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
915                         next = neigh->confirmed + neigh->parms->reachable_time;
916                 } else if (time_before_eq(now,
917                                           neigh->used + neigh->parms->delay_probe_time)) {
918                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
919                         neigh->nud_state = NUD_DELAY;
920                         neigh->updated = jiffies;
921                         neigh_suspect(neigh);
922                         next = now + neigh->parms->delay_probe_time;
923                 } else {
924                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
925                         neigh->nud_state = NUD_STALE;
926                         neigh->updated = jiffies;
927                         neigh_suspect(neigh);
928                         notify = 1;
929                 }
930         } else if (state & NUD_DELAY) {
931                 if (time_before_eq(now,
932                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
933                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
934                         neigh->nud_state = NUD_REACHABLE;
935                         neigh->updated = jiffies;
936                         neigh_connect(neigh);
937                         notify = 1;
938                         next = neigh->confirmed + neigh->parms->reachable_time;
939                 } else {
940                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
941                         neigh->nud_state = NUD_PROBE;
942                         neigh->updated = jiffies;
943                         atomic_set(&neigh->probes, 0);
944                         next = now + neigh->parms->retrans_time;
945                 }
946         } else {
947                 /* NUD_PROBE|NUD_INCOMPLETE */
948                 next = now + neigh->parms->retrans_time;
949         }
950
951         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
952             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
953                 neigh->nud_state = NUD_FAILED;
954                 notify = 1;
955                 neigh_invalidate(neigh);
956         }
957
958         if (neigh->nud_state & NUD_IN_TIMER) {
959                 if (time_before(next, jiffies + HZ/2))
960                         next = jiffies + HZ/2;
961                 if (!mod_timer(&neigh->timer, next))
962                         neigh_hold(neigh);
963         }
964         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
965                 neigh_probe(neigh);
966         } else {
967 out:
968                 write_unlock(&neigh->lock);
969         }
970
971         if (notify)
972                 neigh_update_notify(neigh);
973
974         neigh_release(neigh);
975 }
976
977 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
978 {
979         int rc;
980         bool immediate_probe = false;
981
982         write_lock_bh(&neigh->lock);
983
984         rc = 0;
985         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
986                 goto out_unlock_bh;
987
988         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
989                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
990                         unsigned long next, now = jiffies;
991
992                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
993                         neigh->nud_state     = NUD_INCOMPLETE;
994                         neigh->updated = now;
995                         next = now + max(neigh->parms->retrans_time, HZ/2);
996                         neigh_add_timer(neigh, next);
997                         immediate_probe = true;
998                 } else {
999                         neigh->nud_state = NUD_FAILED;
1000                         neigh->updated = jiffies;
1001                         write_unlock_bh(&neigh->lock);
1002
1003                         kfree_skb(skb);
1004                         return 1;
1005                 }
1006         } else if (neigh->nud_state & NUD_STALE) {
1007                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
1008                 neigh->nud_state = NUD_DELAY;
1009                 neigh->updated = jiffies;
1010                 neigh_add_timer(neigh,
1011                                 jiffies + neigh->parms->delay_probe_time);
1012         }
1013
1014         if (neigh->nud_state == NUD_INCOMPLETE) {
1015                 if (skb) {
1016                         while (neigh->arp_queue_len_bytes + skb->truesize >
1017                                neigh->parms->queue_len_bytes) {
1018                                 struct sk_buff *buff;
1019
1020                                 buff = __skb_dequeue(&neigh->arp_queue);
1021                                 if (!buff)
1022                                         break;
1023                                 neigh->arp_queue_len_bytes -= buff->truesize;
1024                                 kfree_skb(buff);
1025                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1026                         }
1027                         skb_dst_force(skb);
1028                         __skb_queue_tail(&neigh->arp_queue, skb);
1029                         neigh->arp_queue_len_bytes += skb->truesize;
1030                 }
1031                 rc = 1;
1032         }
1033 out_unlock_bh:
1034         if (immediate_probe)
1035                 neigh_probe(neigh);
1036         else
1037                 write_unlock(&neigh->lock);
1038         local_bh_enable();
1039         return rc;
1040 }
1041 EXPORT_SYMBOL(__neigh_event_send);
1042
1043 static void neigh_update_hhs(struct neighbour *neigh)
1044 {
1045         struct hh_cache *hh;
1046         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1047                 = NULL;
1048
1049         if (neigh->dev->header_ops)
1050                 update = neigh->dev->header_ops->cache_update;
1051
1052         if (update) {
1053                 hh = &neigh->hh;
1054                 if (hh->hh_len) {
1055                         write_seqlock_bh(&hh->hh_lock);
1056                         update(hh, neigh->dev, neigh->ha);
1057                         write_sequnlock_bh(&hh->hh_lock);
1058                 }
1059         }
1060 }
1061
1062
1063
1064 /* Generic update routine.
1065    -- lladdr is new lladdr or NULL, if it is not supplied.
1066    -- new    is new state.
1067    -- flags
1068         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1069                                 if it is different.
1070         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1071                                 lladdr instead of overriding it
1072                                 if it is different.
1073                                 It also allows to retain current state
1074                                 if lladdr is unchanged.
1075         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1076
1077         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1078                                 NTF_ROUTER flag.
1079         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1080                                 a router.
1081
1082    Caller MUST hold reference count on the entry.
1083  */
1084
1085 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1086                  u32 flags)
1087 {
1088         u8 old;
1089         int err;
1090         int notify = 0;
1091         struct net_device *dev;
1092         int update_isrouter = 0;
1093
1094         write_lock_bh(&neigh->lock);
1095
1096         dev    = neigh->dev;
1097         old    = neigh->nud_state;
1098         err    = -EPERM;
1099
1100         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1101             (old & (NUD_NOARP | NUD_PERMANENT)))
1102                 goto out;
1103
1104         if (!(new & NUD_VALID)) {
1105                 neigh_del_timer(neigh);
1106                 if (old & NUD_CONNECTED)
1107                         neigh_suspect(neigh);
1108                 neigh->nud_state = new;
1109                 err = 0;
1110                 notify = old & NUD_VALID;
1111                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1112                     (new & NUD_FAILED)) {
1113                         neigh_invalidate(neigh);
1114                         notify = 1;
1115                 }
1116                 goto out;
1117         }
1118
1119         /* Compare new lladdr with cached one */
1120         if (!dev->addr_len) {
1121                 /* First case: device needs no address. */
1122                 lladdr = neigh->ha;
1123         } else if (lladdr) {
1124                 /* The second case: if something is already cached
1125                    and a new address is proposed:
1126                    - compare new & old
1127                    - if they are different, check override flag
1128                  */
1129                 if ((old & NUD_VALID) &&
1130                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1131                         lladdr = neigh->ha;
1132         } else {
1133                 /* No address is supplied; if we know something,
1134                    use it, otherwise discard the request.
1135                  */
1136                 err = -EINVAL;
1137                 if (!(old & NUD_VALID))
1138                         goto out;
1139                 lladdr = neigh->ha;
1140         }
1141
1142         if (new & NUD_CONNECTED)
1143                 neigh->confirmed = jiffies;
1144         neigh->updated = jiffies;
1145
1146         /* If entry was valid and address is not changed,
1147            do not change entry state, if new one is STALE.
1148          */
1149         err = 0;
1150         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1151         if (old & NUD_VALID) {
1152                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1153                         update_isrouter = 0;
1154                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1155                             (old & NUD_CONNECTED)) {
1156                                 lladdr = neigh->ha;
1157                                 new = NUD_STALE;
1158                         } else
1159                                 goto out;
1160                 } else {
1161                         if (lladdr == neigh->ha && new == NUD_STALE &&
1162                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1163                              (old & NUD_CONNECTED))
1164                             )
1165                                 new = old;
1166                 }
1167         }
1168
1169         if (new != old) {
1170                 neigh_del_timer(neigh);
1171                 if (new & NUD_IN_TIMER)
1172                         neigh_add_timer(neigh, (jiffies +
1173                                                 ((new & NUD_REACHABLE) ?
1174                                                  neigh->parms->reachable_time :
1175                                                  0)));
1176                 neigh->nud_state = new;
1177         }
1178
1179         if (lladdr != neigh->ha) {
1180                 write_seqlock(&neigh->ha_lock);
1181                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1182                 write_sequnlock(&neigh->ha_lock);
1183                 neigh_update_hhs(neigh);
1184                 if (!(new & NUD_CONNECTED))
1185                         neigh->confirmed = jiffies -
1186                                       (neigh->parms->base_reachable_time << 1);
1187                 notify = 1;
1188         }
1189         if (new == old)
1190                 goto out;
1191         if (new & NUD_CONNECTED)
1192                 neigh_connect(neigh);
1193         else
1194                 neigh_suspect(neigh);
1195         if (!(old & NUD_VALID)) {
1196                 struct sk_buff *skb;
1197
1198                 /* Again: avoid dead loop if something went wrong */
1199
1200                 while (neigh->nud_state & NUD_VALID &&
1201                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1202                         struct dst_entry *dst = skb_dst(skb);
1203                         struct neighbour *n2, *n1 = neigh;
1204                         write_unlock_bh(&neigh->lock);
1205
1206                         rcu_read_lock();
1207
1208                         /* Why not just use 'neigh' as-is?  The problem is that
1209                          * things such as shaper, eql, and sch_teql can end up
1210                          * using alternative, different, neigh objects to output
1211                          * the packet in the output path.  So what we need to do
1212                          * here is re-lookup the top-level neigh in the path so
1213                          * we can reinject the packet there.
1214                          */
1215                         n2 = NULL;
1216                         if (dst) {
1217                                 n2 = dst_neigh_lookup_skb(dst, skb);
1218                                 if (n2)
1219                                         n1 = n2;
1220                         }
1221                         n1->output(n1, skb);
1222                         if (n2)
1223                                 neigh_release(n2);
1224                         rcu_read_unlock();
1225
1226                         write_lock_bh(&neigh->lock);
1227                 }
1228                 skb_queue_purge(&neigh->arp_queue);
1229                 neigh->arp_queue_len_bytes = 0;
1230         }
1231 out:
1232         if (update_isrouter) {
1233                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1234                         (neigh->flags | NTF_ROUTER) :
1235                         (neigh->flags & ~NTF_ROUTER);
1236         }
1237         write_unlock_bh(&neigh->lock);
1238
1239         if (notify)
1240                 neigh_update_notify(neigh);
1241
1242         return err;
1243 }
1244 EXPORT_SYMBOL(neigh_update);
1245
1246 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1247                                  u8 *lladdr, void *saddr,
1248                                  struct net_device *dev)
1249 {
1250         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1251                                                  lladdr || !dev->addr_len);
1252         if (neigh)
1253                 neigh_update(neigh, lladdr, NUD_STALE,
1254                              NEIGH_UPDATE_F_OVERRIDE);
1255         return neigh;
1256 }
1257 EXPORT_SYMBOL(neigh_event_ns);
1258
1259 /* called with read_lock_bh(&n->lock); */
1260 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1261 {
1262         struct net_device *dev = dst->dev;
1263         __be16 prot = dst->ops->protocol;
1264         struct hh_cache *hh = &n->hh;
1265
1266         write_lock_bh(&n->lock);
1267
1268         /* Only one thread can come in here and initialize the
1269          * hh_cache entry.
1270          */
1271         if (!hh->hh_len)
1272                 dev->header_ops->cache(n, hh, prot);
1273
1274         write_unlock_bh(&n->lock);
1275 }
1276
1277 /* This function can be used in contexts, where only old dev_queue_xmit
1278  * worked, f.e. if you want to override normal output path (eql, shaper),
1279  * but resolution is not made yet.
1280  */
1281
1282 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1283 {
1284         struct net_device *dev = skb->dev;
1285
1286         __skb_pull(skb, skb_network_offset(skb));
1287
1288         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1289                             skb->len) < 0 &&
1290             dev->header_ops->rebuild(skb))
1291                 return 0;
1292
1293         return dev_queue_xmit(skb);
1294 }
1295 EXPORT_SYMBOL(neigh_compat_output);
1296
1297 /* Slow and careful. */
1298
1299 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1300 {
1301         struct dst_entry *dst = skb_dst(skb);
1302         int rc = 0;
1303
1304         if (!dst)
1305                 goto discard;
1306
1307         if (!neigh_event_send(neigh, skb)) {
1308                 int err;
1309                 struct net_device *dev = neigh->dev;
1310                 unsigned int seq;
1311
1312                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1313                         neigh_hh_init(neigh, dst);
1314
1315                 do {
1316                         __skb_pull(skb, skb_network_offset(skb));
1317                         seq = read_seqbegin(&neigh->ha_lock);
1318                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1319                                               neigh->ha, NULL, skb->len);
1320                 } while (read_seqretry(&neigh->ha_lock, seq));
1321
1322                 if (err >= 0)
1323                         rc = dev_queue_xmit(skb);
1324                 else
1325                         goto out_kfree_skb;
1326         }
1327 out:
1328         return rc;
1329 discard:
1330         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1331                       dst, neigh);
1332 out_kfree_skb:
1333         rc = -EINVAL;
1334         kfree_skb(skb);
1335         goto out;
1336 }
1337 EXPORT_SYMBOL(neigh_resolve_output);
1338
1339 /* As fast as possible without hh cache */
1340
1341 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1342 {
1343         struct net_device *dev = neigh->dev;
1344         unsigned int seq;
1345         int err;
1346
1347         do {
1348                 __skb_pull(skb, skb_network_offset(skb));
1349                 seq = read_seqbegin(&neigh->ha_lock);
1350                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1351                                       neigh->ha, NULL, skb->len);
1352         } while (read_seqretry(&neigh->ha_lock, seq));
1353
1354         if (err >= 0)
1355                 err = dev_queue_xmit(skb);
1356         else {
1357                 err = -EINVAL;
1358                 kfree_skb(skb);
1359         }
1360         return err;
1361 }
1362 EXPORT_SYMBOL(neigh_connected_output);
1363
1364 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1365 {
1366         return dev_queue_xmit(skb);
1367 }
1368 EXPORT_SYMBOL(neigh_direct_output);
1369
1370 static void neigh_proxy_process(unsigned long arg)
1371 {
1372         struct neigh_table *tbl = (struct neigh_table *)arg;
1373         long sched_next = 0;
1374         unsigned long now = jiffies;
1375         struct sk_buff *skb, *n;
1376
1377         spin_lock(&tbl->proxy_queue.lock);
1378
1379         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1380                 long tdif = NEIGH_CB(skb)->sched_next - now;
1381
1382                 if (tdif <= 0) {
1383                         struct net_device *dev = skb->dev;
1384
1385                         __skb_unlink(skb, &tbl->proxy_queue);
1386                         if (tbl->proxy_redo && netif_running(dev)) {
1387                                 rcu_read_lock();
1388                                 tbl->proxy_redo(skb);
1389                                 rcu_read_unlock();
1390                         } else {
1391                                 kfree_skb(skb);
1392                         }
1393
1394                         dev_put(dev);
1395                 } else if (!sched_next || tdif < sched_next)
1396                         sched_next = tdif;
1397         }
1398         del_timer(&tbl->proxy_timer);
1399         if (sched_next)
1400                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1401         spin_unlock(&tbl->proxy_queue.lock);
1402 }
1403
1404 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1405                     struct sk_buff *skb)
1406 {
1407         unsigned long now = jiffies;
1408         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1409
1410         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1411                 kfree_skb(skb);
1412                 return;
1413         }
1414
1415         NEIGH_CB(skb)->sched_next = sched_next;
1416         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1417
1418         spin_lock(&tbl->proxy_queue.lock);
1419         if (del_timer(&tbl->proxy_timer)) {
1420                 if (time_before(tbl->proxy_timer.expires, sched_next))
1421                         sched_next = tbl->proxy_timer.expires;
1422         }
1423         skb_dst_drop(skb);
1424         dev_hold(skb->dev);
1425         __skb_queue_tail(&tbl->proxy_queue, skb);
1426         mod_timer(&tbl->proxy_timer, sched_next);
1427         spin_unlock(&tbl->proxy_queue.lock);
1428 }
1429 EXPORT_SYMBOL(pneigh_enqueue);
1430
1431 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1432                                                       struct net *net, int ifindex)
1433 {
1434         struct neigh_parms *p;
1435
1436         for (p = &tbl->parms; p; p = p->next) {
1437                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1438                     (!p->dev && !ifindex))
1439                         return p;
1440         }
1441
1442         return NULL;
1443 }
1444
1445 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1446                                       struct neigh_table *tbl)
1447 {
1448         struct neigh_parms *p, *ref;
1449         struct net *net = dev_net(dev);
1450         const struct net_device_ops *ops = dev->netdev_ops;
1451
1452         ref = lookup_neigh_parms(tbl, net, 0);
1453         if (!ref)
1454                 return NULL;
1455
1456         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1457         if (p) {
1458                 p->tbl            = tbl;
1459                 atomic_set(&p->refcnt, 1);
1460                 p->reachable_time =
1461                                 neigh_rand_reach_time(p->base_reachable_time);
1462
1463                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1464                         kfree(p);
1465                         return NULL;
1466                 }
1467
1468                 dev_hold(dev);
1469                 p->dev = dev;
1470                 write_pnet(&p->net, hold_net(net));
1471                 p->sysctl_table = NULL;
1472                 write_lock_bh(&tbl->lock);
1473                 p->next         = tbl->parms.next;
1474                 tbl->parms.next = p;
1475                 write_unlock_bh(&tbl->lock);
1476         }
1477         return p;
1478 }
1479 EXPORT_SYMBOL(neigh_parms_alloc);
1480
1481 static void neigh_rcu_free_parms(struct rcu_head *head)
1482 {
1483         struct neigh_parms *parms =
1484                 container_of(head, struct neigh_parms, rcu_head);
1485
1486         neigh_parms_put(parms);
1487 }
1488
1489 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1490 {
1491         struct neigh_parms **p;
1492
1493         if (!parms || parms == &tbl->parms)
1494                 return;
1495         write_lock_bh(&tbl->lock);
1496         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1497                 if (*p == parms) {
1498                         *p = parms->next;
1499                         parms->dead = 1;
1500                         write_unlock_bh(&tbl->lock);
1501                         if (parms->dev)
1502                                 dev_put(parms->dev);
1503                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1504                         return;
1505                 }
1506         }
1507         write_unlock_bh(&tbl->lock);
1508         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1509 }
1510 EXPORT_SYMBOL(neigh_parms_release);
1511
1512 static void neigh_parms_destroy(struct neigh_parms *parms)
1513 {
1514         release_net(neigh_parms_net(parms));
1515         kfree(parms);
1516 }
1517
1518 static struct lock_class_key neigh_table_proxy_queue_class;
1519
1520 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1521 {
1522         unsigned long now = jiffies;
1523         unsigned long phsize;
1524
1525         write_pnet(&tbl->parms.net, &init_net);
1526         atomic_set(&tbl->parms.refcnt, 1);
1527         tbl->parms.reachable_time =
1528                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1529
1530         tbl->stats = alloc_percpu(struct neigh_statistics);
1531         if (!tbl->stats)
1532                 panic("cannot create neighbour cache statistics");
1533
1534 #ifdef CONFIG_PROC_FS
1535         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1536                               &neigh_stat_seq_fops, tbl))
1537                 panic("cannot create neighbour proc dir entry");
1538 #endif
1539
1540         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1541
1542         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1543         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1544
1545         if (!tbl->nht || !tbl->phash_buckets)
1546                 panic("cannot allocate neighbour cache hashes");
1547
1548         rwlock_init(&tbl->lock);
1549         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1550         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1551         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1552         skb_queue_head_init_class(&tbl->proxy_queue,
1553                         &neigh_table_proxy_queue_class);
1554
1555         tbl->last_flush = now;
1556         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1557 }
1558
1559 void neigh_table_init(struct neigh_table *tbl)
1560 {
1561         struct neigh_table *tmp;
1562
1563         neigh_table_init_no_netlink(tbl);
1564         write_lock(&neigh_tbl_lock);
1565         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1566                 if (tmp->family == tbl->family)
1567                         break;
1568         }
1569         tbl->next       = neigh_tables;
1570         neigh_tables    = tbl;
1571         write_unlock(&neigh_tbl_lock);
1572
1573         if (unlikely(tmp)) {
1574                 pr_err("Registering multiple tables for family %d\n",
1575                        tbl->family);
1576                 dump_stack();
1577         }
1578 }
1579 EXPORT_SYMBOL(neigh_table_init);
1580
1581 int neigh_table_clear(struct neigh_table *tbl)
1582 {
1583         struct neigh_table **tp;
1584
1585         /* It is not clean... Fix it to unload IPv6 module safely */
1586         cancel_delayed_work_sync(&tbl->gc_work);
1587         del_timer_sync(&tbl->proxy_timer);
1588         pneigh_queue_purge(&tbl->proxy_queue);
1589         neigh_ifdown(tbl, NULL);
1590         if (atomic_read(&tbl->entries))
1591                 pr_crit("neighbour leakage\n");
1592         write_lock(&neigh_tbl_lock);
1593         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1594                 if (*tp == tbl) {
1595                         *tp = tbl->next;
1596                         break;
1597                 }
1598         }
1599         write_unlock(&neigh_tbl_lock);
1600
1601         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1602                  neigh_hash_free_rcu);
1603         tbl->nht = NULL;
1604
1605         kfree(tbl->phash_buckets);
1606         tbl->phash_buckets = NULL;
1607
1608         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1609
1610         free_percpu(tbl->stats);
1611         tbl->stats = NULL;
1612
1613         return 0;
1614 }
1615 EXPORT_SYMBOL(neigh_table_clear);
1616
1617 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1618 {
1619         struct net *net = sock_net(skb->sk);
1620         struct ndmsg *ndm;
1621         struct nlattr *dst_attr;
1622         struct neigh_table *tbl;
1623         struct net_device *dev = NULL;
1624         int err = -EINVAL;
1625
1626         ASSERT_RTNL();
1627         if (nlmsg_len(nlh) < sizeof(*ndm))
1628                 goto out;
1629
1630         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1631         if (dst_attr == NULL)
1632                 goto out;
1633
1634         ndm = nlmsg_data(nlh);
1635         if (ndm->ndm_ifindex) {
1636                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1637                 if (dev == NULL) {
1638                         err = -ENODEV;
1639                         goto out;
1640                 }
1641         }
1642
1643         read_lock(&neigh_tbl_lock);
1644         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1645                 struct neighbour *neigh;
1646
1647                 if (tbl->family != ndm->ndm_family)
1648                         continue;
1649                 read_unlock(&neigh_tbl_lock);
1650
1651                 if (nla_len(dst_attr) < tbl->key_len)
1652                         goto out;
1653
1654                 if (ndm->ndm_flags & NTF_PROXY) {
1655                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1656                         goto out;
1657                 }
1658
1659                 if (dev == NULL)
1660                         goto out;
1661
1662                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1663                 if (neigh == NULL) {
1664                         err = -ENOENT;
1665                         goto out;
1666                 }
1667
1668                 err = neigh_update(neigh, NULL, NUD_FAILED,
1669                                    NEIGH_UPDATE_F_OVERRIDE |
1670                                    NEIGH_UPDATE_F_ADMIN);
1671                 neigh_release(neigh);
1672                 goto out;
1673         }
1674         read_unlock(&neigh_tbl_lock);
1675         err = -EAFNOSUPPORT;
1676
1677 out:
1678         return err;
1679 }
1680
1681 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1682 {
1683         struct net *net = sock_net(skb->sk);
1684         struct ndmsg *ndm;
1685         struct nlattr *tb[NDA_MAX+1];
1686         struct neigh_table *tbl;
1687         struct net_device *dev = NULL;
1688         int err;
1689
1690         ASSERT_RTNL();
1691         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1692         if (err < 0)
1693                 goto out;
1694
1695         err = -EINVAL;
1696         if (tb[NDA_DST] == NULL)
1697                 goto out;
1698
1699         ndm = nlmsg_data(nlh);
1700         if (ndm->ndm_ifindex) {
1701                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1702                 if (dev == NULL) {
1703                         err = -ENODEV;
1704                         goto out;
1705                 }
1706
1707                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1708                         goto out;
1709         }
1710
1711         read_lock(&neigh_tbl_lock);
1712         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1713                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1714                 struct neighbour *neigh;
1715                 void *dst, *lladdr;
1716
1717                 if (tbl->family != ndm->ndm_family)
1718                         continue;
1719                 read_unlock(&neigh_tbl_lock);
1720
1721                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1722                         goto out;
1723                 dst = nla_data(tb[NDA_DST]);
1724                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1725
1726                 if (ndm->ndm_flags & NTF_PROXY) {
1727                         struct pneigh_entry *pn;
1728
1729                         err = -ENOBUFS;
1730                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1731                         if (pn) {
1732                                 pn->flags = ndm->ndm_flags;
1733                                 err = 0;
1734                         }
1735                         goto out;
1736                 }
1737
1738                 if (dev == NULL)
1739                         goto out;
1740
1741                 neigh = neigh_lookup(tbl, dst, dev);
1742                 if (neigh == NULL) {
1743                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1744                                 err = -ENOENT;
1745                                 goto out;
1746                         }
1747
1748                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1749                         if (IS_ERR(neigh)) {
1750                                 err = PTR_ERR(neigh);
1751                                 goto out;
1752                         }
1753                 } else {
1754                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1755                                 err = -EEXIST;
1756                                 neigh_release(neigh);
1757                                 goto out;
1758                         }
1759
1760                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1761                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1762                 }
1763
1764                 if (ndm->ndm_flags & NTF_USE) {
1765                         neigh_event_send(neigh, NULL);
1766                         err = 0;
1767                 } else
1768                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1769                 neigh_release(neigh);
1770                 goto out;
1771         }
1772
1773         read_unlock(&neigh_tbl_lock);
1774         err = -EAFNOSUPPORT;
1775 out:
1776         return err;
1777 }
1778
1779 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1780 {
1781         struct nlattr *nest;
1782
1783         nest = nla_nest_start(skb, NDTA_PARMS);
1784         if (nest == NULL)
1785                 return -ENOBUFS;
1786
1787         if ((parms->dev &&
1788              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1789             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1790             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1791             /* approximative value for deprecated QUEUE_LEN (in packets) */
1792             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1793                         parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1794             nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1795             nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1796             nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1797             nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1798             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1799             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1800                           parms->base_reachable_time) ||
1801             nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1802             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1803                           parms->delay_probe_time) ||
1804             nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1805             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1806             nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1807             nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1808                 goto nla_put_failure;
1809         return nla_nest_end(skb, nest);
1810
1811 nla_put_failure:
1812         nla_nest_cancel(skb, nest);
1813         return -EMSGSIZE;
1814 }
1815
1816 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1817                               u32 pid, u32 seq, int type, int flags)
1818 {
1819         struct nlmsghdr *nlh;
1820         struct ndtmsg *ndtmsg;
1821
1822         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1823         if (nlh == NULL)
1824                 return -EMSGSIZE;
1825
1826         ndtmsg = nlmsg_data(nlh);
1827
1828         read_lock_bh(&tbl->lock);
1829         ndtmsg->ndtm_family = tbl->family;
1830         ndtmsg->ndtm_pad1   = 0;
1831         ndtmsg->ndtm_pad2   = 0;
1832
1833         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1834             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1835             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1836             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1837             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1838                 goto nla_put_failure;
1839         {
1840                 unsigned long now = jiffies;
1841                 unsigned int flush_delta = now - tbl->last_flush;
1842                 unsigned int rand_delta = now - tbl->last_rand;
1843                 struct neigh_hash_table *nht;
1844                 struct ndt_config ndc = {
1845                         .ndtc_key_len           = tbl->key_len,
1846                         .ndtc_entry_size        = tbl->entry_size,
1847                         .ndtc_entries           = atomic_read(&tbl->entries),
1848                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1849                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1850                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1851                 };
1852
1853                 rcu_read_lock_bh();
1854                 nht = rcu_dereference_bh(tbl->nht);
1855                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1856                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1857                 rcu_read_unlock_bh();
1858
1859                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1860                         goto nla_put_failure;
1861         }
1862
1863         {
1864                 int cpu;
1865                 struct ndt_stats ndst;
1866
1867                 memset(&ndst, 0, sizeof(ndst));
1868
1869                 for_each_possible_cpu(cpu) {
1870                         struct neigh_statistics *st;
1871
1872                         st = per_cpu_ptr(tbl->stats, cpu);
1873                         ndst.ndts_allocs                += st->allocs;
1874                         ndst.ndts_destroys              += st->destroys;
1875                         ndst.ndts_hash_grows            += st->hash_grows;
1876                         ndst.ndts_res_failed            += st->res_failed;
1877                         ndst.ndts_lookups               += st->lookups;
1878                         ndst.ndts_hits                  += st->hits;
1879                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1880                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1881                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1882                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1883                 }
1884
1885                 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1886                         goto nla_put_failure;
1887         }
1888
1889         BUG_ON(tbl->parms.dev);
1890         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1891                 goto nla_put_failure;
1892
1893         read_unlock_bh(&tbl->lock);
1894         return nlmsg_end(skb, nlh);
1895
1896 nla_put_failure:
1897         read_unlock_bh(&tbl->lock);
1898         nlmsg_cancel(skb, nlh);
1899         return -EMSGSIZE;
1900 }
1901
1902 static int neightbl_fill_param_info(struct sk_buff *skb,
1903                                     struct neigh_table *tbl,
1904                                     struct neigh_parms *parms,
1905                                     u32 pid, u32 seq, int type,
1906                                     unsigned int flags)
1907 {
1908         struct ndtmsg *ndtmsg;
1909         struct nlmsghdr *nlh;
1910
1911         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1912         if (nlh == NULL)
1913                 return -EMSGSIZE;
1914
1915         ndtmsg = nlmsg_data(nlh);
1916
1917         read_lock_bh(&tbl->lock);
1918         ndtmsg->ndtm_family = tbl->family;
1919         ndtmsg->ndtm_pad1   = 0;
1920         ndtmsg->ndtm_pad2   = 0;
1921
1922         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1923             neightbl_fill_parms(skb, parms) < 0)
1924                 goto errout;
1925
1926         read_unlock_bh(&tbl->lock);
1927         return nlmsg_end(skb, nlh);
1928 errout:
1929         read_unlock_bh(&tbl->lock);
1930         nlmsg_cancel(skb, nlh);
1931         return -EMSGSIZE;
1932 }
1933
1934 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1935         [NDTA_NAME]             = { .type = NLA_STRING },
1936         [NDTA_THRESH1]          = { .type = NLA_U32 },
1937         [NDTA_THRESH2]          = { .type = NLA_U32 },
1938         [NDTA_THRESH3]          = { .type = NLA_U32 },
1939         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1940         [NDTA_PARMS]            = { .type = NLA_NESTED },
1941 };
1942
1943 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1944         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1945         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1946         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1947         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1948         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1949         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1950         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1951         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1952         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1953         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1954         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1955         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1956         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1957 };
1958
1959 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1960 {
1961         struct net *net = sock_net(skb->sk);
1962         struct neigh_table *tbl;
1963         struct ndtmsg *ndtmsg;
1964         struct nlattr *tb[NDTA_MAX+1];
1965         int err;
1966
1967         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1968                           nl_neightbl_policy);
1969         if (err < 0)
1970                 goto errout;
1971
1972         if (tb[NDTA_NAME] == NULL) {
1973                 err = -EINVAL;
1974                 goto errout;
1975         }
1976
1977         ndtmsg = nlmsg_data(nlh);
1978         read_lock(&neigh_tbl_lock);
1979         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1980                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1981                         continue;
1982
1983                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1984                         break;
1985         }
1986
1987         if (tbl == NULL) {
1988                 err = -ENOENT;
1989                 goto errout_locked;
1990         }
1991
1992         /*
1993          * We acquire tbl->lock to be nice to the periodic timers and
1994          * make sure they always see a consistent set of values.
1995          */
1996         write_lock_bh(&tbl->lock);
1997
1998         if (tb[NDTA_PARMS]) {
1999                 struct nlattr *tbp[NDTPA_MAX+1];
2000                 struct neigh_parms *p;
2001                 int i, ifindex = 0;
2002
2003                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2004                                        nl_ntbl_parm_policy);
2005                 if (err < 0)
2006                         goto errout_tbl_lock;
2007
2008                 if (tbp[NDTPA_IFINDEX])
2009                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2010
2011                 p = lookup_neigh_parms(tbl, net, ifindex);
2012                 if (p == NULL) {
2013                         err = -ENOENT;
2014                         goto errout_tbl_lock;
2015                 }
2016
2017                 for (i = 1; i <= NDTPA_MAX; i++) {
2018                         if (tbp[i] == NULL)
2019                                 continue;
2020
2021                         switch (i) {
2022                         case NDTPA_QUEUE_LEN:
2023                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2024                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
2025                                 break;
2026                         case NDTPA_QUEUE_LENBYTES:
2027                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
2028                                 break;
2029                         case NDTPA_PROXY_QLEN:
2030                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2031                                 break;
2032                         case NDTPA_APP_PROBES:
2033                                 p->app_probes = nla_get_u32(tbp[i]);
2034                                 break;
2035                         case NDTPA_UCAST_PROBES:
2036                                 p->ucast_probes = nla_get_u32(tbp[i]);
2037                                 break;
2038                         case NDTPA_MCAST_PROBES:
2039                                 p->mcast_probes = nla_get_u32(tbp[i]);
2040                                 break;
2041                         case NDTPA_BASE_REACHABLE_TIME:
2042                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2043                                 break;
2044                         case NDTPA_GC_STALETIME:
2045                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2046                                 break;
2047                         case NDTPA_DELAY_PROBE_TIME:
2048                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2049                                 break;
2050                         case NDTPA_RETRANS_TIME:
2051                                 p->retrans_time = nla_get_msecs(tbp[i]);
2052                                 break;
2053                         case NDTPA_ANYCAST_DELAY:
2054                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2055                                 break;
2056                         case NDTPA_PROXY_DELAY:
2057                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2058                                 break;
2059                         case NDTPA_LOCKTIME:
2060                                 p->locktime = nla_get_msecs(tbp[i]);
2061                                 break;
2062                         }
2063                 }
2064         }
2065
2066         if (tb[NDTA_THRESH1])
2067                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2068
2069         if (tb[NDTA_THRESH2])
2070                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2071
2072         if (tb[NDTA_THRESH3])
2073                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2074
2075         if (tb[NDTA_GC_INTERVAL])
2076                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2077
2078         err = 0;
2079
2080 errout_tbl_lock:
2081         write_unlock_bh(&tbl->lock);
2082 errout_locked:
2083         read_unlock(&neigh_tbl_lock);
2084 errout:
2085         return err;
2086 }
2087
2088 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2089 {
2090         struct net *net = sock_net(skb->sk);
2091         int family, tidx, nidx = 0;
2092         int tbl_skip = cb->args[0];
2093         int neigh_skip = cb->args[1];
2094         struct neigh_table *tbl;
2095
2096         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2097
2098         read_lock(&neigh_tbl_lock);
2099         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2100                 struct neigh_parms *p;
2101
2102                 if (tidx < tbl_skip || (family && tbl->family != family))
2103                         continue;
2104
2105                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2106                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2107                                        NLM_F_MULTI) <= 0)
2108                         break;
2109
2110                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2111                         if (!net_eq(neigh_parms_net(p), net))
2112                                 continue;
2113
2114                         if (nidx < neigh_skip)
2115                                 goto next;
2116
2117                         if (neightbl_fill_param_info(skb, tbl, p,
2118                                                      NETLINK_CB(cb->skb).portid,
2119                                                      cb->nlh->nlmsg_seq,
2120                                                      RTM_NEWNEIGHTBL,
2121                                                      NLM_F_MULTI) <= 0)
2122                                 goto out;
2123                 next:
2124                         nidx++;
2125                 }
2126
2127                 neigh_skip = 0;
2128         }
2129 out:
2130         read_unlock(&neigh_tbl_lock);
2131         cb->args[0] = tidx;
2132         cb->args[1] = nidx;
2133
2134         return skb->len;
2135 }
2136
2137 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2138                            u32 pid, u32 seq, int type, unsigned int flags)
2139 {
2140         unsigned long now = jiffies;
2141         struct nda_cacheinfo ci;
2142         struct nlmsghdr *nlh;
2143         struct ndmsg *ndm;
2144
2145         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2146         if (nlh == NULL)
2147                 return -EMSGSIZE;
2148
2149         ndm = nlmsg_data(nlh);
2150         ndm->ndm_family  = neigh->ops->family;
2151         ndm->ndm_pad1    = 0;
2152         ndm->ndm_pad2    = 0;
2153         ndm->ndm_flags   = neigh->flags;
2154         ndm->ndm_type    = neigh->type;
2155         ndm->ndm_ifindex = neigh->dev->ifindex;
2156
2157         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2158                 goto nla_put_failure;
2159
2160         read_lock_bh(&neigh->lock);
2161         ndm->ndm_state   = neigh->nud_state;
2162         if (neigh->nud_state & NUD_VALID) {
2163                 char haddr[MAX_ADDR_LEN];
2164
2165                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2166                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2167                         read_unlock_bh(&neigh->lock);
2168                         goto nla_put_failure;
2169                 }
2170         }
2171
2172         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2173         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2174         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2175         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2176         read_unlock_bh(&neigh->lock);
2177
2178         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2179             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2180                 goto nla_put_failure;
2181
2182         return nlmsg_end(skb, nlh);
2183
2184 nla_put_failure:
2185         nlmsg_cancel(skb, nlh);
2186         return -EMSGSIZE;
2187 }
2188
2189 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2190                             u32 pid, u32 seq, int type, unsigned int flags,
2191                             struct neigh_table *tbl)
2192 {
2193         struct nlmsghdr *nlh;
2194         struct ndmsg *ndm;
2195
2196         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2197         if (nlh == NULL)
2198                 return -EMSGSIZE;
2199
2200         ndm = nlmsg_data(nlh);
2201         ndm->ndm_family  = tbl->family;
2202         ndm->ndm_pad1    = 0;
2203         ndm->ndm_pad2    = 0;
2204         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2205         ndm->ndm_type    = NDA_DST;
2206         ndm->ndm_ifindex = pn->dev->ifindex;
2207         ndm->ndm_state   = NUD_NONE;
2208
2209         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2210                 goto nla_put_failure;
2211
2212         return nlmsg_end(skb, nlh);
2213
2214 nla_put_failure:
2215         nlmsg_cancel(skb, nlh);
2216         return -EMSGSIZE;
2217 }
2218
2219 static void neigh_update_notify(struct neighbour *neigh)
2220 {
2221         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2222         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2223 }
2224
2225 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2226                             struct netlink_callback *cb)
2227 {
2228         struct net *net = sock_net(skb->sk);
2229         struct neighbour *n;
2230         int rc, h, s_h = cb->args[1];
2231         int idx, s_idx = idx = cb->args[2];
2232         struct neigh_hash_table *nht;
2233
2234         rcu_read_lock_bh();
2235         nht = rcu_dereference_bh(tbl->nht);
2236
2237         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2238                 if (h > s_h)
2239                         s_idx = 0;
2240                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2241                      n != NULL;
2242                      n = rcu_dereference_bh(n->next)) {
2243                         if (!net_eq(dev_net(n->dev), net))
2244                                 continue;
2245                         if (idx < s_idx)
2246                                 goto next;
2247                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2248                                             cb->nlh->nlmsg_seq,
2249                                             RTM_NEWNEIGH,
2250                                             NLM_F_MULTI) <= 0) {
2251                                 rc = -1;
2252                                 goto out;
2253                         }
2254 next:
2255                         idx++;
2256                 }
2257         }
2258         rc = skb->len;
2259 out:
2260         rcu_read_unlock_bh();
2261         cb->args[1] = h;
2262         cb->args[2] = idx;
2263         return rc;
2264 }
2265
2266 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2267                              struct netlink_callback *cb)
2268 {
2269         struct pneigh_entry *n;
2270         struct net *net = sock_net(skb->sk);
2271         int rc, h, s_h = cb->args[3];
2272         int idx, s_idx = idx = cb->args[4];
2273
2274         read_lock_bh(&tbl->lock);
2275
2276         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2277                 if (h > s_h)
2278                         s_idx = 0;
2279                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2280                         if (dev_net(n->dev) != net)
2281                                 continue;
2282                         if (idx < s_idx)
2283                                 goto next;
2284                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2285                                             cb->nlh->nlmsg_seq,
2286                                             RTM_NEWNEIGH,
2287                                             NLM_F_MULTI, tbl) <= 0) {
2288                                 read_unlock_bh(&tbl->lock);
2289                                 rc = -1;
2290                                 goto out;
2291                         }
2292                 next:
2293                         idx++;
2294                 }
2295         }
2296
2297         read_unlock_bh(&tbl->lock);
2298         rc = skb->len;
2299 out:
2300         cb->args[3] = h;
2301         cb->args[4] = idx;
2302         return rc;
2303
2304 }
2305
2306 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2307 {
2308         struct neigh_table *tbl;
2309         int t, family, s_t;
2310         int proxy = 0;
2311         int err;
2312
2313         read_lock(&neigh_tbl_lock);
2314         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2315
2316         /* check for full ndmsg structure presence, family member is
2317          * the same for both structures
2318          */
2319         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2320             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2321                 proxy = 1;
2322
2323         s_t = cb->args[0];
2324
2325         for (tbl = neigh_tables, t = 0; tbl;
2326              tbl = tbl->next, t++) {
2327                 if (t < s_t || (family && tbl->family != family))
2328                         continue;
2329                 if (t > s_t)
2330                         memset(&cb->args[1], 0, sizeof(cb->args) -
2331                                                 sizeof(cb->args[0]));
2332                 if (proxy)
2333                         err = pneigh_dump_table(tbl, skb, cb);
2334                 else
2335                         err = neigh_dump_table(tbl, skb, cb);
2336                 if (err < 0)
2337                         break;
2338         }
2339         read_unlock(&neigh_tbl_lock);
2340
2341         cb->args[0] = t;
2342         return skb->len;
2343 }
2344
2345 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2346 {
2347         int chain;
2348         struct neigh_hash_table *nht;
2349
2350         rcu_read_lock_bh();
2351         nht = rcu_dereference_bh(tbl->nht);
2352
2353         read_lock(&tbl->lock); /* avoid resizes */
2354         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2355                 struct neighbour *n;
2356
2357                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2358                      n != NULL;
2359                      n = rcu_dereference_bh(n->next))
2360                         cb(n, cookie);
2361         }
2362         read_unlock(&tbl->lock);
2363         rcu_read_unlock_bh();
2364 }
2365 EXPORT_SYMBOL(neigh_for_each);
2366
2367 /* The tbl->lock must be held as a writer and BH disabled. */
2368 void __neigh_for_each_release(struct neigh_table *tbl,
2369                               int (*cb)(struct neighbour *))
2370 {
2371         int chain;
2372         struct neigh_hash_table *nht;
2373
2374         nht = rcu_dereference_protected(tbl->nht,
2375                                         lockdep_is_held(&tbl->lock));
2376         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2377                 struct neighbour *n;
2378                 struct neighbour __rcu **np;
2379
2380                 np = &nht->hash_buckets[chain];
2381                 while ((n = rcu_dereference_protected(*np,
2382                                         lockdep_is_held(&tbl->lock))) != NULL) {
2383                         int release;
2384
2385                         write_lock(&n->lock);
2386                         release = cb(n);
2387                         if (release) {
2388                                 rcu_assign_pointer(*np,
2389                                         rcu_dereference_protected(n->next,
2390                                                 lockdep_is_held(&tbl->lock)));
2391                                 n->dead = 1;
2392                         } else
2393                                 np = &n->next;
2394                         write_unlock(&n->lock);
2395                         if (release)
2396                                 neigh_cleanup_and_release(n);
2397                 }
2398         }
2399 }
2400 EXPORT_SYMBOL(__neigh_for_each_release);
2401
2402 #ifdef CONFIG_PROC_FS
2403
2404 static struct neighbour *neigh_get_first(struct seq_file *seq)
2405 {
2406         struct neigh_seq_state *state = seq->private;
2407         struct net *net = seq_file_net(seq);
2408         struct neigh_hash_table *nht = state->nht;
2409         struct neighbour *n = NULL;
2410         int bucket = state->bucket;
2411
2412         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2413         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2414                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2415
2416                 while (n) {
2417                         if (!net_eq(dev_net(n->dev), net))
2418                                 goto next;
2419                         if (state->neigh_sub_iter) {
2420                                 loff_t fakep = 0;
2421                                 void *v;
2422
2423                                 v = state->neigh_sub_iter(state, n, &fakep);
2424                                 if (!v)
2425                                         goto next;
2426                         }
2427                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2428                                 break;
2429                         if (n->nud_state & ~NUD_NOARP)
2430                                 break;
2431 next:
2432                         n = rcu_dereference_bh(n->next);
2433                 }
2434
2435                 if (n)
2436                         break;
2437         }
2438         state->bucket = bucket;
2439
2440         return n;
2441 }
2442
2443 static struct neighbour *neigh_get_next(struct seq_file *seq,
2444                                         struct neighbour *n,
2445                                         loff_t *pos)
2446 {
2447         struct neigh_seq_state *state = seq->private;
2448         struct net *net = seq_file_net(seq);
2449         struct neigh_hash_table *nht = state->nht;
2450
2451         if (state->neigh_sub_iter) {
2452                 void *v = state->neigh_sub_iter(state, n, pos);
2453                 if (v)
2454                         return n;
2455         }
2456         n = rcu_dereference_bh(n->next);
2457
2458         while (1) {
2459                 while (n) {
2460                         if (!net_eq(dev_net(n->dev), net))
2461                                 goto next;
2462                         if (state->neigh_sub_iter) {
2463                                 void *v = state->neigh_sub_iter(state, n, pos);
2464                                 if (v)
2465                                         return n;
2466                                 goto next;
2467                         }
2468                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2469                                 break;
2470
2471                         if (n->nud_state & ~NUD_NOARP)
2472                                 break;
2473 next:
2474                         n = rcu_dereference_bh(n->next);
2475                 }
2476
2477                 if (n)
2478                         break;
2479
2480                 if (++state->bucket >= (1 << nht->hash_shift))
2481                         break;
2482
2483                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2484         }
2485
2486         if (n && pos)
2487                 --(*pos);
2488         return n;
2489 }
2490
2491 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2492 {
2493         struct neighbour *n = neigh_get_first(seq);
2494
2495         if (n) {
2496                 --(*pos);
2497                 while (*pos) {
2498                         n = neigh_get_next(seq, n, pos);
2499                         if (!n)
2500                                 break;
2501                 }
2502         }
2503         return *pos ? NULL : n;
2504 }
2505
2506 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2507 {
2508         struct neigh_seq_state *state = seq->private;
2509         struct net *net = seq_file_net(seq);
2510         struct neigh_table *tbl = state->tbl;
2511         struct pneigh_entry *pn = NULL;
2512         int bucket = state->bucket;
2513
2514         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2515         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2516                 pn = tbl->phash_buckets[bucket];
2517                 while (pn && !net_eq(pneigh_net(pn), net))
2518                         pn = pn->next;
2519                 if (pn)
2520                         break;
2521         }
2522         state->bucket = bucket;
2523
2524         return pn;
2525 }
2526
2527 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2528                                             struct pneigh_entry *pn,
2529                                             loff_t *pos)
2530 {
2531         struct neigh_seq_state *state = seq->private;
2532         struct net *net = seq_file_net(seq);
2533         struct neigh_table *tbl = state->tbl;
2534
2535         do {
2536                 pn = pn->next;
2537         } while (pn && !net_eq(pneigh_net(pn), net));
2538
2539         while (!pn) {
2540                 if (++state->bucket > PNEIGH_HASHMASK)
2541                         break;
2542                 pn = tbl->phash_buckets[state->bucket];
2543                 while (pn && !net_eq(pneigh_net(pn), net))
2544                         pn = pn->next;
2545                 if (pn)
2546                         break;
2547         }
2548
2549         if (pn && pos)
2550                 --(*pos);
2551
2552         return pn;
2553 }
2554
2555 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2556 {
2557         struct pneigh_entry *pn = pneigh_get_first(seq);
2558
2559         if (pn) {
2560                 --(*pos);
2561                 while (*pos) {
2562                         pn = pneigh_get_next(seq, pn, pos);
2563                         if (!pn)
2564                                 break;
2565                 }
2566         }
2567         return *pos ? NULL : pn;
2568 }
2569
2570 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2571 {
2572         struct neigh_seq_state *state = seq->private;
2573         void *rc;
2574         loff_t idxpos = *pos;
2575
2576         rc = neigh_get_idx(seq, &idxpos);
2577         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2578                 rc = pneigh_get_idx(seq, &idxpos);
2579
2580         return rc;
2581 }
2582
2583 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2584         __acquires(rcu_bh)
2585 {
2586         struct neigh_seq_state *state = seq->private;
2587
2588         state->tbl = tbl;
2589         state->bucket = 0;
2590         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2591
2592         rcu_read_lock_bh();
2593         state->nht = rcu_dereference_bh(tbl->nht);
2594
2595         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2596 }
2597 EXPORT_SYMBOL(neigh_seq_start);
2598
2599 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2600 {
2601         struct neigh_seq_state *state;
2602         void *rc;
2603
2604         if (v == SEQ_START_TOKEN) {
2605                 rc = neigh_get_first(seq);
2606                 goto out;
2607         }
2608
2609         state = seq->private;
2610         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2611                 rc = neigh_get_next(seq, v, NULL);
2612                 if (rc)
2613                         goto out;
2614                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2615                         rc = pneigh_get_first(seq);
2616         } else {
2617                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2618                 rc = pneigh_get_next(seq, v, NULL);
2619         }
2620 out:
2621         ++(*pos);
2622         return rc;
2623 }
2624 EXPORT_SYMBOL(neigh_seq_next);
2625
2626 void neigh_seq_stop(struct seq_file *seq, void *v)
2627         __releases(rcu_bh)
2628 {
2629         rcu_read_unlock_bh();
2630 }
2631 EXPORT_SYMBOL(neigh_seq_stop);
2632
2633 /* statistics via seq_file */
2634
2635 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2636 {
2637         struct neigh_table *tbl = seq->private;
2638         int cpu;
2639
2640         if (*pos == 0)
2641                 return SEQ_START_TOKEN;
2642
2643         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2644                 if (!cpu_possible(cpu))
2645                         continue;
2646                 *pos = cpu+1;
2647                 return per_cpu_ptr(tbl->stats, cpu);
2648         }
2649         return NULL;
2650 }
2651
2652 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2653 {
2654         struct neigh_table *tbl = seq->private;
2655         int cpu;
2656
2657         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2658                 if (!cpu_possible(cpu))
2659                         continue;
2660                 *pos = cpu+1;
2661                 return per_cpu_ptr(tbl->stats, cpu);
2662         }
2663         return NULL;
2664 }
2665
2666 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2667 {
2668
2669 }
2670
2671 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2672 {
2673         struct neigh_table *tbl = seq->private;
2674         struct neigh_statistics *st = v;
2675
2676         if (v == SEQ_START_TOKEN) {
2677                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2678                 return 0;
2679         }
2680
2681         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2682                         "%08lx %08lx  %08lx %08lx %08lx\n",
2683                    atomic_read(&tbl->entries),
2684
2685                    st->allocs,
2686                    st->destroys,
2687                    st->hash_grows,
2688
2689                    st->lookups,
2690                    st->hits,
2691
2692                    st->res_failed,
2693
2694                    st->rcv_probes_mcast,
2695                    st->rcv_probes_ucast,
2696
2697                    st->periodic_gc_runs,
2698                    st->forced_gc_runs,
2699                    st->unres_discards
2700                    );
2701
2702         return 0;
2703 }
2704
2705 static const struct seq_operations neigh_stat_seq_ops = {
2706         .start  = neigh_stat_seq_start,
2707         .next   = neigh_stat_seq_next,
2708         .stop   = neigh_stat_seq_stop,
2709         .show   = neigh_stat_seq_show,
2710 };
2711
2712 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2713 {
2714         int ret = seq_open(file, &neigh_stat_seq_ops);
2715
2716         if (!ret) {
2717                 struct seq_file *sf = file->private_data;
2718                 sf->private = PDE(inode)->data;
2719         }
2720         return ret;
2721 };
2722
2723 static const struct file_operations neigh_stat_seq_fops = {
2724         .owner   = THIS_MODULE,
2725         .open    = neigh_stat_seq_open,
2726         .read    = seq_read,
2727         .llseek  = seq_lseek,
2728         .release = seq_release,
2729 };
2730
2731 #endif /* CONFIG_PROC_FS */
2732
2733 static inline size_t neigh_nlmsg_size(void)
2734 {
2735         return NLMSG_ALIGN(sizeof(struct ndmsg))
2736                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2737                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2738                + nla_total_size(sizeof(struct nda_cacheinfo))
2739                + nla_total_size(4); /* NDA_PROBES */
2740 }
2741
2742 static void __neigh_notify(struct neighbour *n, int type, int flags)
2743 {
2744         struct net *net = dev_net(n->dev);
2745         struct sk_buff *skb;
2746         int err = -ENOBUFS;
2747
2748         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2749         if (skb == NULL)
2750                 goto errout;
2751
2752         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2753         if (err < 0) {
2754                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2755                 WARN_ON(err == -EMSGSIZE);
2756                 kfree_skb(skb);
2757                 goto errout;
2758         }
2759         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2760         return;
2761 errout:
2762         if (err < 0)
2763                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2764 }
2765
2766 #ifdef CONFIG_ARPD
2767 void neigh_app_ns(struct neighbour *n)
2768 {
2769         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2770 }
2771 EXPORT_SYMBOL(neigh_app_ns);
2772 #endif /* CONFIG_ARPD */
2773
2774 #ifdef CONFIG_SYSCTL
2775
2776 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2777                            size_t *lenp, loff_t *ppos)
2778 {
2779         int size, ret;
2780         ctl_table tmp = *ctl;
2781
2782         tmp.extra1 = &zero;
2783         tmp.extra2 = &unres_qlen_max;
2784         tmp.data = &size;
2785
2786         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2787         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2788
2789         if (write && !ret)
2790                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2791         return ret;
2792 }
2793
2794 enum {
2795         NEIGH_VAR_MCAST_PROBE,
2796         NEIGH_VAR_UCAST_PROBE,
2797         NEIGH_VAR_APP_PROBE,
2798         NEIGH_VAR_RETRANS_TIME,
2799         NEIGH_VAR_BASE_REACHABLE_TIME,
2800         NEIGH_VAR_DELAY_PROBE_TIME,
2801         NEIGH_VAR_GC_STALETIME,
2802         NEIGH_VAR_QUEUE_LEN,
2803         NEIGH_VAR_QUEUE_LEN_BYTES,
2804         NEIGH_VAR_PROXY_QLEN,
2805         NEIGH_VAR_ANYCAST_DELAY,
2806         NEIGH_VAR_PROXY_DELAY,
2807         NEIGH_VAR_LOCKTIME,
2808         NEIGH_VAR_RETRANS_TIME_MS,
2809         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2810         NEIGH_VAR_GC_INTERVAL,
2811         NEIGH_VAR_GC_THRESH1,
2812         NEIGH_VAR_GC_THRESH2,
2813         NEIGH_VAR_GC_THRESH3,
2814         NEIGH_VAR_MAX
2815 };
2816
2817 static struct neigh_sysctl_table {
2818         struct ctl_table_header *sysctl_header;
2819         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2820 } neigh_sysctl_template __read_mostly = {
2821         .neigh_vars = {
2822                 [NEIGH_VAR_MCAST_PROBE] = {
2823                         .procname       = "mcast_solicit",
2824                         .maxlen         = sizeof(int),
2825                         .mode           = 0644,
2826                         .proc_handler   = proc_dointvec,
2827                 },
2828                 [NEIGH_VAR_UCAST_PROBE] = {
2829                         .procname       = "ucast_solicit",
2830                         .maxlen         = sizeof(int),
2831                         .mode           = 0644,
2832                         .proc_handler   = proc_dointvec,
2833                 },
2834                 [NEIGH_VAR_APP_PROBE] = {
2835                         .procname       = "app_solicit",
2836                         .maxlen         = sizeof(int),
2837                         .mode           = 0644,
2838                         .proc_handler   = proc_dointvec,
2839                 },
2840                 [NEIGH_VAR_RETRANS_TIME] = {
2841                         .procname       = "retrans_time",
2842                         .maxlen         = sizeof(int),
2843                         .mode           = 0644,
2844                         .proc_handler   = proc_dointvec_userhz_jiffies,
2845                 },
2846                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2847                         .procname       = "base_reachable_time",
2848                         .maxlen         = sizeof(int),
2849                         .mode           = 0644,
2850                         .proc_handler   = proc_dointvec_jiffies,
2851                 },
2852                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2853                         .procname       = "delay_first_probe_time",
2854                         .maxlen         = sizeof(int),
2855                         .mode           = 0644,
2856                         .proc_handler   = proc_dointvec_jiffies,
2857                 },
2858                 [NEIGH_VAR_GC_STALETIME] = {
2859                         .procname       = "gc_stale_time",
2860                         .maxlen         = sizeof(int),
2861                         .mode           = 0644,
2862                         .proc_handler   = proc_dointvec_jiffies,
2863                 },
2864                 [NEIGH_VAR_QUEUE_LEN] = {
2865                         .procname       = "unres_qlen",
2866                         .maxlen         = sizeof(int),
2867                         .mode           = 0644,
2868                         .proc_handler   = proc_unres_qlen,
2869                 },
2870                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2871                         .procname       = "unres_qlen_bytes",
2872                         .maxlen         = sizeof(int),
2873                         .mode           = 0644,
2874                         .extra1         = &zero,
2875                         .proc_handler   = proc_dointvec_minmax,
2876                 },
2877                 [NEIGH_VAR_PROXY_QLEN] = {
2878                         .procname       = "proxy_qlen",
2879                         .maxlen         = sizeof(int),
2880                         .mode           = 0644,
2881                         .proc_handler   = proc_dointvec,
2882                 },
2883                 [NEIGH_VAR_ANYCAST_DELAY] = {
2884                         .procname       = "anycast_delay",
2885                         .maxlen         = sizeof(int),
2886                         .mode           = 0644,
2887                         .proc_handler   = proc_dointvec_userhz_jiffies,
2888                 },
2889                 [NEIGH_VAR_PROXY_DELAY] = {
2890                         .procname       = "proxy_delay",
2891                         .maxlen         = sizeof(int),
2892                         .mode           = 0644,
2893                         .proc_handler   = proc_dointvec_userhz_jiffies,
2894                 },
2895                 [NEIGH_VAR_LOCKTIME] = {
2896                         .procname       = "locktime",
2897                         .maxlen         = sizeof(int),
2898                         .mode           = 0644,
2899                         .proc_handler   = proc_dointvec_userhz_jiffies,
2900                 },
2901                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2902                         .procname       = "retrans_time_ms",
2903                         .maxlen         = sizeof(int),
2904                         .mode           = 0644,
2905                         .proc_handler   = proc_dointvec_ms_jiffies,
2906                 },
2907                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2908                         .procname       = "base_reachable_time_ms",
2909                         .maxlen         = sizeof(int),
2910                         .mode           = 0644,
2911                         .proc_handler   = proc_dointvec_ms_jiffies,
2912                 },
2913                 [NEIGH_VAR_GC_INTERVAL] = {
2914                         .procname       = "gc_interval",
2915                         .maxlen         = sizeof(int),
2916                         .mode           = 0644,
2917                         .proc_handler   = proc_dointvec_jiffies,
2918                 },
2919                 [NEIGH_VAR_GC_THRESH1] = {
2920                         .procname       = "gc_thresh1",
2921                         .maxlen         = sizeof(int),
2922                         .mode           = 0644,
2923                         .proc_handler   = proc_dointvec,
2924                 },
2925                 [NEIGH_VAR_GC_THRESH2] = {
2926                         .procname       = "gc_thresh2",
2927                         .maxlen         = sizeof(int),
2928                         .mode           = 0644,
2929                         .proc_handler   = proc_dointvec,
2930                 },
2931                 [NEIGH_VAR_GC_THRESH3] = {
2932                         .procname       = "gc_thresh3",
2933                         .maxlen         = sizeof(int),
2934                         .mode           = 0644,
2935                         .proc_handler   = proc_dointvec,
2936                 },
2937                 {},
2938         },
2939 };
2940
2941 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2942                           char *p_name, proc_handler *handler)
2943 {
2944         struct neigh_sysctl_table *t;
2945         const char *dev_name_source = NULL;
2946         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2947
2948         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2949         if (!t)
2950                 goto err;
2951
2952         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2953         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2954         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2955         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2956         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2957         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2958         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2959         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2960         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2961         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2962         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2963         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2964         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2965         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2966         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2967
2968         if (dev) {
2969                 dev_name_source = dev->name;
2970                 /* Terminate the table early */
2971                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2972                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2973         } else {
2974                 dev_name_source = "default";
2975                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2976                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2977                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2978                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2979         }
2980
2981
2982         if (handler) {
2983                 /* RetransTime */
2984                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2985                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2986                 /* ReachableTime */
2987                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2988                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2989                 /* RetransTime (in milliseconds)*/
2990                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2991                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2992                 /* ReachableTime (in milliseconds) */
2993                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2994                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2995         }
2996
2997         /* Don't export sysctls to unprivileged users */
2998         if (neigh_parms_net(p)->user_ns != &init_user_ns)
2999                 t->neigh_vars[0].procname = NULL;
3000
3001         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3002                 p_name, dev_name_source);
3003         t->sysctl_header =
3004                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3005         if (!t->sysctl_header)
3006                 goto free;
3007
3008         p->sysctl_table = t;
3009         return 0;
3010
3011 free:
3012         kfree(t);
3013 err:
3014         return -ENOBUFS;
3015 }
3016 EXPORT_SYMBOL(neigh_sysctl_register);
3017
3018 void neigh_sysctl_unregister(struct neigh_parms *p)
3019 {
3020         if (p->sysctl_table) {
3021                 struct neigh_sysctl_table *t = p->sysctl_table;
3022                 p->sysctl_table = NULL;
3023                 unregister_net_sysctl_table(t->sysctl_header);
3024                 kfree(t);
3025         }
3026 }
3027 EXPORT_SYMBOL(neigh_sysctl_unregister);
3028
3029 #endif  /* CONFIG_SYSCTL */
3030
3031 static int __init neigh_init(void)
3032 {
3033         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3034         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3035         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3036
3037         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3038                       NULL);
3039         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3040
3041         return 0;
3042 }
3043
3044 subsys_initcall(neigh_init);
3045