neigh: Create mechanism for generic neigh private areas.
[linux-2.6-block.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39
40 #define NEIGH_DEBUG 1
41
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55
56 #define PNEIGH_HASHMASK         0xF
57
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67
68 /*
69    Neighbour hash table buckets are protected with rwlock tbl->lock.
70
71    - All the scans/updates to hash buckets MUST be made under this lock.
72    - NOTHING clever should be made under this lock: no callbacks
73      to protocol backends, no attempts to send something to network.
74      It will result in deadlocks, if backend/driver wants to use neighbour
75      cache.
76    - If the entry requires some non-trivial actions, increase
77      its reference count and release table lock.
78
79    Neighbour entries are protected:
80    - with reference count.
81    - with rwlock neigh->lock
82
83    Reference count prevents destruction.
84
85    neigh->lock mainly serializes ll address data and its validity state.
86    However, the same lock is used to protect another entry fields:
87     - timer
88     - resolution queue
89
90    Again, nothing clever shall be made under neigh->lock,
91    the most complicated procedure, which we allow is dev->hard_header.
92    It is supposed, that dev->hard_header is simplistic and does
93    not make callbacks to neighbour tables.
94
95    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96    list of neighbour tables. This list is used only in process context,
97  */
98
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100
101 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
102 {
103         kfree_skb(skb);
104         return -ENETDOWN;
105 }
106
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109         if (neigh->parms->neigh_cleanup)
110                 neigh->parms->neigh_cleanup(neigh);
111
112         __neigh_notify(neigh, RTM_DELNEIGH, 0);
113         neigh_release(neigh);
114 }
115
116 /*
117  * It is random distribution in the interval (1/2)*base...(3/2)*base.
118  * It corresponds to default IPv6 settings and is not overridable,
119  * because it is really reasonable choice.
120  */
121
122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124         return base ? (net_random() % base) + (base >> 1) : 0;
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127
128
129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131         int shrunk = 0;
132         int i;
133         struct neigh_hash_table *nht;
134
135         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136
137         write_lock_bh(&tbl->lock);
138         nht = rcu_dereference_protected(tbl->nht,
139                                         lockdep_is_held(&tbl->lock));
140         for (i = 0; i < (1 << nht->hash_shift); i++) {
141                 struct neighbour *n;
142                 struct neighbour __rcu **np;
143
144                 np = &nht->hash_buckets[i];
145                 while ((n = rcu_dereference_protected(*np,
146                                         lockdep_is_held(&tbl->lock))) != NULL) {
147                         /* Neighbour record may be discarded if:
148                          * - nobody refers to it.
149                          * - it is not permanent
150                          */
151                         write_lock(&n->lock);
152                         if (atomic_read(&n->refcnt) == 1 &&
153                             !(n->nud_state & NUD_PERMANENT)) {
154                                 rcu_assign_pointer(*np,
155                                         rcu_dereference_protected(n->next,
156                                                   lockdep_is_held(&tbl->lock)));
157                                 n->dead = 1;
158                                 shrunk  = 1;
159                                 write_unlock(&n->lock);
160                                 neigh_cleanup_and_release(n);
161                                 continue;
162                         }
163                         write_unlock(&n->lock);
164                         np = &n->next;
165                 }
166         }
167
168         tbl->last_flush = jiffies;
169
170         write_unlock_bh(&tbl->lock);
171
172         return shrunk;
173 }
174
175 static void neigh_add_timer(struct neighbour *n, unsigned long when)
176 {
177         neigh_hold(n);
178         if (unlikely(mod_timer(&n->timer, when))) {
179                 printk("NEIGH: BUG, double timer add, state is %x\n",
180                        n->nud_state);
181                 dump_stack();
182         }
183 }
184
185 static int neigh_del_timer(struct neighbour *n)
186 {
187         if ((n->nud_state & NUD_IN_TIMER) &&
188             del_timer(&n->timer)) {
189                 neigh_release(n);
190                 return 1;
191         }
192         return 0;
193 }
194
195 static void pneigh_queue_purge(struct sk_buff_head *list)
196 {
197         struct sk_buff *skb;
198
199         while ((skb = skb_dequeue(list)) != NULL) {
200                 dev_put(skb->dev);
201                 kfree_skb(skb);
202         }
203 }
204
205 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
206 {
207         int i;
208         struct neigh_hash_table *nht;
209
210         nht = rcu_dereference_protected(tbl->nht,
211                                         lockdep_is_held(&tbl->lock));
212
213         for (i = 0; i < (1 << nht->hash_shift); i++) {
214                 struct neighbour *n;
215                 struct neighbour __rcu **np = &nht->hash_buckets[i];
216
217                 while ((n = rcu_dereference_protected(*np,
218                                         lockdep_is_held(&tbl->lock))) != NULL) {
219                         if (dev && n->dev != dev) {
220                                 np = &n->next;
221                                 continue;
222                         }
223                         rcu_assign_pointer(*np,
224                                    rcu_dereference_protected(n->next,
225                                                 lockdep_is_held(&tbl->lock)));
226                         write_lock(&n->lock);
227                         neigh_del_timer(n);
228                         n->dead = 1;
229
230                         if (atomic_read(&n->refcnt) != 1) {
231                                 /* The most unpleasant situation.
232                                    We must destroy neighbour entry,
233                                    but someone still uses it.
234
235                                    The destroy will be delayed until
236                                    the last user releases us, but
237                                    we must kill timers etc. and move
238                                    it to safe state.
239                                  */
240                                 skb_queue_purge(&n->arp_queue);
241                                 n->arp_queue_len_bytes = 0;
242                                 n->output = neigh_blackhole;
243                                 if (n->nud_state & NUD_VALID)
244                                         n->nud_state = NUD_NOARP;
245                                 else
246                                         n->nud_state = NUD_NONE;
247                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
248                         }
249                         write_unlock(&n->lock);
250                         neigh_cleanup_and_release(n);
251                 }
252         }
253 }
254
255 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
256 {
257         write_lock_bh(&tbl->lock);
258         neigh_flush_dev(tbl, dev);
259         write_unlock_bh(&tbl->lock);
260 }
261 EXPORT_SYMBOL(neigh_changeaddr);
262
263 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
264 {
265         write_lock_bh(&tbl->lock);
266         neigh_flush_dev(tbl, dev);
267         pneigh_ifdown(tbl, dev);
268         write_unlock_bh(&tbl->lock);
269
270         del_timer_sync(&tbl->proxy_timer);
271         pneigh_queue_purge(&tbl->proxy_queue);
272         return 0;
273 }
274 EXPORT_SYMBOL(neigh_ifdown);
275
276 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
277 {
278         struct neighbour *n = NULL;
279         unsigned long now = jiffies;
280         int entries;
281
282         entries = atomic_inc_return(&tbl->entries) - 1;
283         if (entries >= tbl->gc_thresh3 ||
284             (entries >= tbl->gc_thresh2 &&
285              time_after(now, tbl->last_flush + 5 * HZ))) {
286                 if (!neigh_forced_gc(tbl) &&
287                     entries >= tbl->gc_thresh3)
288                         goto out_entries;
289         }
290
291         n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
292         if (!n)
293                 goto out_entries;
294
295         skb_queue_head_init(&n->arp_queue);
296         rwlock_init(&n->lock);
297         seqlock_init(&n->ha_lock);
298         n->updated        = n->used = now;
299         n->nud_state      = NUD_NONE;
300         n->output         = neigh_blackhole;
301         seqlock_init(&n->hh.hh_lock);
302         n->parms          = neigh_parms_clone(&tbl->parms);
303         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
304
305         NEIGH_CACHE_STAT_INC(tbl, allocs);
306         n->tbl            = tbl;
307         atomic_set(&n->refcnt, 1);
308         n->dead           = 1;
309 out:
310         return n;
311
312 out_entries:
313         atomic_dec(&tbl->entries);
314         goto out;
315 }
316
317 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
318 {
319         size_t size = (1 << shift) * sizeof(struct neighbour *);
320         struct neigh_hash_table *ret;
321         struct neighbour __rcu **buckets;
322
323         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
324         if (!ret)
325                 return NULL;
326         if (size <= PAGE_SIZE)
327                 buckets = kzalloc(size, GFP_ATOMIC);
328         else
329                 buckets = (struct neighbour __rcu **)
330                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
331                                            get_order(size));
332         if (!buckets) {
333                 kfree(ret);
334                 return NULL;
335         }
336         ret->hash_buckets = buckets;
337         ret->hash_shift = shift;
338         get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
339         ret->hash_rnd |= 1;
340         return ret;
341 }
342
343 static void neigh_hash_free_rcu(struct rcu_head *head)
344 {
345         struct neigh_hash_table *nht = container_of(head,
346                                                     struct neigh_hash_table,
347                                                     rcu);
348         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
349         struct neighbour __rcu **buckets = nht->hash_buckets;
350
351         if (size <= PAGE_SIZE)
352                 kfree(buckets);
353         else
354                 free_pages((unsigned long)buckets, get_order(size));
355         kfree(nht);
356 }
357
358 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
359                                                 unsigned long new_shift)
360 {
361         unsigned int i, hash;
362         struct neigh_hash_table *new_nht, *old_nht;
363
364         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
365
366         old_nht = rcu_dereference_protected(tbl->nht,
367                                             lockdep_is_held(&tbl->lock));
368         new_nht = neigh_hash_alloc(new_shift);
369         if (!new_nht)
370                 return old_nht;
371
372         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
373                 struct neighbour *n, *next;
374
375                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
376                                                    lockdep_is_held(&tbl->lock));
377                      n != NULL;
378                      n = next) {
379                         hash = tbl->hash(n->primary_key, n->dev,
380                                          new_nht->hash_rnd);
381
382                         hash >>= (32 - new_nht->hash_shift);
383                         next = rcu_dereference_protected(n->next,
384                                                 lockdep_is_held(&tbl->lock));
385
386                         rcu_assign_pointer(n->next,
387                                            rcu_dereference_protected(
388                                                 new_nht->hash_buckets[hash],
389                                                 lockdep_is_held(&tbl->lock)));
390                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
391                 }
392         }
393
394         rcu_assign_pointer(tbl->nht, new_nht);
395         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
396         return new_nht;
397 }
398
399 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
400                                struct net_device *dev)
401 {
402         struct neighbour *n;
403         int key_len = tbl->key_len;
404         u32 hash_val;
405         struct neigh_hash_table *nht;
406
407         NEIGH_CACHE_STAT_INC(tbl, lookups);
408
409         rcu_read_lock_bh();
410         nht = rcu_dereference_bh(tbl->nht);
411         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
412
413         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
414              n != NULL;
415              n = rcu_dereference_bh(n->next)) {
416                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
417                         if (!atomic_inc_not_zero(&n->refcnt))
418                                 n = NULL;
419                         NEIGH_CACHE_STAT_INC(tbl, hits);
420                         break;
421                 }
422         }
423
424         rcu_read_unlock_bh();
425         return n;
426 }
427 EXPORT_SYMBOL(neigh_lookup);
428
429 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
430                                      const void *pkey)
431 {
432         struct neighbour *n;
433         int key_len = tbl->key_len;
434         u32 hash_val;
435         struct neigh_hash_table *nht;
436
437         NEIGH_CACHE_STAT_INC(tbl, lookups);
438
439         rcu_read_lock_bh();
440         nht = rcu_dereference_bh(tbl->nht);
441         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
442
443         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
444              n != NULL;
445              n = rcu_dereference_bh(n->next)) {
446                 if (!memcmp(n->primary_key, pkey, key_len) &&
447                     net_eq(dev_net(n->dev), net)) {
448                         if (!atomic_inc_not_zero(&n->refcnt))
449                                 n = NULL;
450                         NEIGH_CACHE_STAT_INC(tbl, hits);
451                         break;
452                 }
453         }
454
455         rcu_read_unlock_bh();
456         return n;
457 }
458 EXPORT_SYMBOL(neigh_lookup_nodev);
459
460 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
461                                struct net_device *dev)
462 {
463         u32 hash_val;
464         int key_len = tbl->key_len;
465         int error;
466         struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
467         struct neigh_hash_table *nht;
468
469         if (!n) {
470                 rc = ERR_PTR(-ENOBUFS);
471                 goto out;
472         }
473
474         memcpy(n->primary_key, pkey, key_len);
475         n->dev = dev;
476         dev_hold(dev);
477
478         /* Protocol specific setup. */
479         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
480                 rc = ERR_PTR(error);
481                 goto out_neigh_release;
482         }
483
484         /* Device specific setup. */
485         if (n->parms->neigh_setup &&
486             (error = n->parms->neigh_setup(n)) < 0) {
487                 rc = ERR_PTR(error);
488                 goto out_neigh_release;
489         }
490
491         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
492
493         write_lock_bh(&tbl->lock);
494         nht = rcu_dereference_protected(tbl->nht,
495                                         lockdep_is_held(&tbl->lock));
496
497         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
498                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
499
500         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
501
502         if (n->parms->dead) {
503                 rc = ERR_PTR(-EINVAL);
504                 goto out_tbl_unlock;
505         }
506
507         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
508                                             lockdep_is_held(&tbl->lock));
509              n1 != NULL;
510              n1 = rcu_dereference_protected(n1->next,
511                         lockdep_is_held(&tbl->lock))) {
512                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
513                         neigh_hold(n1);
514                         rc = n1;
515                         goto out_tbl_unlock;
516                 }
517         }
518
519         n->dead = 0;
520         neigh_hold(n);
521         rcu_assign_pointer(n->next,
522                            rcu_dereference_protected(nht->hash_buckets[hash_val],
523                                                      lockdep_is_held(&tbl->lock)));
524         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
525         write_unlock_bh(&tbl->lock);
526         NEIGH_PRINTK2("neigh %p is created.\n", n);
527         rc = n;
528 out:
529         return rc;
530 out_tbl_unlock:
531         write_unlock_bh(&tbl->lock);
532 out_neigh_release:
533         neigh_release(n);
534         goto out;
535 }
536 EXPORT_SYMBOL(neigh_create);
537
538 static u32 pneigh_hash(const void *pkey, int key_len)
539 {
540         u32 hash_val = *(u32 *)(pkey + key_len - 4);
541         hash_val ^= (hash_val >> 16);
542         hash_val ^= hash_val >> 8;
543         hash_val ^= hash_val >> 4;
544         hash_val &= PNEIGH_HASHMASK;
545         return hash_val;
546 }
547
548 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
549                                               struct net *net,
550                                               const void *pkey,
551                                               int key_len,
552                                               struct net_device *dev)
553 {
554         while (n) {
555                 if (!memcmp(n->key, pkey, key_len) &&
556                     net_eq(pneigh_net(n), net) &&
557                     (n->dev == dev || !n->dev))
558                         return n;
559                 n = n->next;
560         }
561         return NULL;
562 }
563
564 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
565                 struct net *net, const void *pkey, struct net_device *dev)
566 {
567         int key_len = tbl->key_len;
568         u32 hash_val = pneigh_hash(pkey, key_len);
569
570         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
571                                  net, pkey, key_len, dev);
572 }
573 EXPORT_SYMBOL_GPL(__pneigh_lookup);
574
575 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
576                                     struct net *net, const void *pkey,
577                                     struct net_device *dev, int creat)
578 {
579         struct pneigh_entry *n;
580         int key_len = tbl->key_len;
581         u32 hash_val = pneigh_hash(pkey, key_len);
582
583         read_lock_bh(&tbl->lock);
584         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
585                               net, pkey, key_len, dev);
586         read_unlock_bh(&tbl->lock);
587
588         if (n || !creat)
589                 goto out;
590
591         ASSERT_RTNL();
592
593         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
594         if (!n)
595                 goto out;
596
597         write_pnet(&n->net, hold_net(net));
598         memcpy(n->key, pkey, key_len);
599         n->dev = dev;
600         if (dev)
601                 dev_hold(dev);
602
603         if (tbl->pconstructor && tbl->pconstructor(n)) {
604                 if (dev)
605                         dev_put(dev);
606                 release_net(net);
607                 kfree(n);
608                 n = NULL;
609                 goto out;
610         }
611
612         write_lock_bh(&tbl->lock);
613         n->next = tbl->phash_buckets[hash_val];
614         tbl->phash_buckets[hash_val] = n;
615         write_unlock_bh(&tbl->lock);
616 out:
617         return n;
618 }
619 EXPORT_SYMBOL(pneigh_lookup);
620
621
622 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
623                   struct net_device *dev)
624 {
625         struct pneigh_entry *n, **np;
626         int key_len = tbl->key_len;
627         u32 hash_val = pneigh_hash(pkey, key_len);
628
629         write_lock_bh(&tbl->lock);
630         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
631              np = &n->next) {
632                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
633                     net_eq(pneigh_net(n), net)) {
634                         *np = n->next;
635                         write_unlock_bh(&tbl->lock);
636                         if (tbl->pdestructor)
637                                 tbl->pdestructor(n);
638                         if (n->dev)
639                                 dev_put(n->dev);
640                         release_net(pneigh_net(n));
641                         kfree(n);
642                         return 0;
643                 }
644         }
645         write_unlock_bh(&tbl->lock);
646         return -ENOENT;
647 }
648
649 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
650 {
651         struct pneigh_entry *n, **np;
652         u32 h;
653
654         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
655                 np = &tbl->phash_buckets[h];
656                 while ((n = *np) != NULL) {
657                         if (!dev || n->dev == dev) {
658                                 *np = n->next;
659                                 if (tbl->pdestructor)
660                                         tbl->pdestructor(n);
661                                 if (n->dev)
662                                         dev_put(n->dev);
663                                 release_net(pneigh_net(n));
664                                 kfree(n);
665                                 continue;
666                         }
667                         np = &n->next;
668                 }
669         }
670         return -ENOENT;
671 }
672
673 static void neigh_parms_destroy(struct neigh_parms *parms);
674
675 static inline void neigh_parms_put(struct neigh_parms *parms)
676 {
677         if (atomic_dec_and_test(&parms->refcnt))
678                 neigh_parms_destroy(parms);
679 }
680
681 static void neigh_destroy_rcu(struct rcu_head *head)
682 {
683         struct neighbour *neigh = container_of(head, struct neighbour, rcu);
684
685         kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
686 }
687 /*
688  *      neighbour must already be out of the table;
689  *
690  */
691 void neigh_destroy(struct neighbour *neigh)
692 {
693         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
694
695         if (!neigh->dead) {
696                 printk(KERN_WARNING
697                        "Destroying alive neighbour %p\n", neigh);
698                 dump_stack();
699                 return;
700         }
701
702         if (neigh_del_timer(neigh))
703                 printk(KERN_WARNING "Impossible event.\n");
704
705         skb_queue_purge(&neigh->arp_queue);
706         neigh->arp_queue_len_bytes = 0;
707
708         dev_put(neigh->dev);
709         neigh_parms_put(neigh->parms);
710
711         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
712
713         atomic_dec(&neigh->tbl->entries);
714         call_rcu(&neigh->rcu, neigh_destroy_rcu);
715 }
716 EXPORT_SYMBOL(neigh_destroy);
717
718 /* Neighbour state is suspicious;
719    disable fast path.
720
721    Called with write_locked neigh.
722  */
723 static void neigh_suspect(struct neighbour *neigh)
724 {
725         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
726
727         neigh->output = neigh->ops->output;
728 }
729
730 /* Neighbour state is OK;
731    enable fast path.
732
733    Called with write_locked neigh.
734  */
735 static void neigh_connect(struct neighbour *neigh)
736 {
737         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
738
739         neigh->output = neigh->ops->connected_output;
740 }
741
742 static void neigh_periodic_work(struct work_struct *work)
743 {
744         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
745         struct neighbour *n;
746         struct neighbour __rcu **np;
747         unsigned int i;
748         struct neigh_hash_table *nht;
749
750         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
751
752         write_lock_bh(&tbl->lock);
753         nht = rcu_dereference_protected(tbl->nht,
754                                         lockdep_is_held(&tbl->lock));
755
756         /*
757          *      periodically recompute ReachableTime from random function
758          */
759
760         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
761                 struct neigh_parms *p;
762                 tbl->last_rand = jiffies;
763                 for (p = &tbl->parms; p; p = p->next)
764                         p->reachable_time =
765                                 neigh_rand_reach_time(p->base_reachable_time);
766         }
767
768         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
769                 np = &nht->hash_buckets[i];
770
771                 while ((n = rcu_dereference_protected(*np,
772                                 lockdep_is_held(&tbl->lock))) != NULL) {
773                         unsigned int state;
774
775                         write_lock(&n->lock);
776
777                         state = n->nud_state;
778                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
779                                 write_unlock(&n->lock);
780                                 goto next_elt;
781                         }
782
783                         if (time_before(n->used, n->confirmed))
784                                 n->used = n->confirmed;
785
786                         if (atomic_read(&n->refcnt) == 1 &&
787                             (state == NUD_FAILED ||
788                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
789                                 *np = n->next;
790                                 n->dead = 1;
791                                 write_unlock(&n->lock);
792                                 neigh_cleanup_and_release(n);
793                                 continue;
794                         }
795                         write_unlock(&n->lock);
796
797 next_elt:
798                         np = &n->next;
799                 }
800                 /*
801                  * It's fine to release lock here, even if hash table
802                  * grows while we are preempted.
803                  */
804                 write_unlock_bh(&tbl->lock);
805                 cond_resched();
806                 write_lock_bh(&tbl->lock);
807         }
808         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
809          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
810          * base_reachable_time.
811          */
812         schedule_delayed_work(&tbl->gc_work,
813                               tbl->parms.base_reachable_time >> 1);
814         write_unlock_bh(&tbl->lock);
815 }
816
817 static __inline__ int neigh_max_probes(struct neighbour *n)
818 {
819         struct neigh_parms *p = n->parms;
820         return (n->nud_state & NUD_PROBE) ?
821                 p->ucast_probes :
822                 p->ucast_probes + p->app_probes + p->mcast_probes;
823 }
824
825 static void neigh_invalidate(struct neighbour *neigh)
826         __releases(neigh->lock)
827         __acquires(neigh->lock)
828 {
829         struct sk_buff *skb;
830
831         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
832         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
833         neigh->updated = jiffies;
834
835         /* It is very thin place. report_unreachable is very complicated
836            routine. Particularly, it can hit the same neighbour entry!
837
838            So that, we try to be accurate and avoid dead loop. --ANK
839          */
840         while (neigh->nud_state == NUD_FAILED &&
841                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
842                 write_unlock(&neigh->lock);
843                 neigh->ops->error_report(neigh, skb);
844                 write_lock(&neigh->lock);
845         }
846         skb_queue_purge(&neigh->arp_queue);
847         neigh->arp_queue_len_bytes = 0;
848 }
849
850 static void neigh_probe(struct neighbour *neigh)
851         __releases(neigh->lock)
852 {
853         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
854         /* keep skb alive even if arp_queue overflows */
855         if (skb)
856                 skb = skb_copy(skb, GFP_ATOMIC);
857         write_unlock(&neigh->lock);
858         neigh->ops->solicit(neigh, skb);
859         atomic_inc(&neigh->probes);
860         kfree_skb(skb);
861 }
862
863 /* Called when a timer expires for a neighbour entry. */
864
865 static void neigh_timer_handler(unsigned long arg)
866 {
867         unsigned long now, next;
868         struct neighbour *neigh = (struct neighbour *)arg;
869         unsigned state;
870         int notify = 0;
871
872         write_lock(&neigh->lock);
873
874         state = neigh->nud_state;
875         now = jiffies;
876         next = now + HZ;
877
878         if (!(state & NUD_IN_TIMER))
879                 goto out;
880
881         if (state & NUD_REACHABLE) {
882                 if (time_before_eq(now,
883                                    neigh->confirmed + neigh->parms->reachable_time)) {
884                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
885                         next = neigh->confirmed + neigh->parms->reachable_time;
886                 } else if (time_before_eq(now,
887                                           neigh->used + neigh->parms->delay_probe_time)) {
888                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
889                         neigh->nud_state = NUD_DELAY;
890                         neigh->updated = jiffies;
891                         neigh_suspect(neigh);
892                         next = now + neigh->parms->delay_probe_time;
893                 } else {
894                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
895                         neigh->nud_state = NUD_STALE;
896                         neigh->updated = jiffies;
897                         neigh_suspect(neigh);
898                         notify = 1;
899                 }
900         } else if (state & NUD_DELAY) {
901                 if (time_before_eq(now,
902                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
903                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
904                         neigh->nud_state = NUD_REACHABLE;
905                         neigh->updated = jiffies;
906                         neigh_connect(neigh);
907                         notify = 1;
908                         next = neigh->confirmed + neigh->parms->reachable_time;
909                 } else {
910                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
911                         neigh->nud_state = NUD_PROBE;
912                         neigh->updated = jiffies;
913                         atomic_set(&neigh->probes, 0);
914                         next = now + neigh->parms->retrans_time;
915                 }
916         } else {
917                 /* NUD_PROBE|NUD_INCOMPLETE */
918                 next = now + neigh->parms->retrans_time;
919         }
920
921         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
922             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
923                 neigh->nud_state = NUD_FAILED;
924                 notify = 1;
925                 neigh_invalidate(neigh);
926         }
927
928         if (neigh->nud_state & NUD_IN_TIMER) {
929                 if (time_before(next, jiffies + HZ/2))
930                         next = jiffies + HZ/2;
931                 if (!mod_timer(&neigh->timer, next))
932                         neigh_hold(neigh);
933         }
934         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
935                 neigh_probe(neigh);
936         } else {
937 out:
938                 write_unlock(&neigh->lock);
939         }
940
941         if (notify)
942                 neigh_update_notify(neigh);
943
944         neigh_release(neigh);
945 }
946
947 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
948 {
949         int rc;
950         bool immediate_probe = false;
951
952         write_lock_bh(&neigh->lock);
953
954         rc = 0;
955         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
956                 goto out_unlock_bh;
957
958         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
959                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
960                         unsigned long next, now = jiffies;
961
962                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
963                         neigh->nud_state     = NUD_INCOMPLETE;
964                         neigh->updated = now;
965                         next = now + max(neigh->parms->retrans_time, HZ/2);
966                         neigh_add_timer(neigh, next);
967                         immediate_probe = true;
968                 } else {
969                         neigh->nud_state = NUD_FAILED;
970                         neigh->updated = jiffies;
971                         write_unlock_bh(&neigh->lock);
972
973                         kfree_skb(skb);
974                         return 1;
975                 }
976         } else if (neigh->nud_state & NUD_STALE) {
977                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
978                 neigh->nud_state = NUD_DELAY;
979                 neigh->updated = jiffies;
980                 neigh_add_timer(neigh,
981                                 jiffies + neigh->parms->delay_probe_time);
982         }
983
984         if (neigh->nud_state == NUD_INCOMPLETE) {
985                 if (skb) {
986                         while (neigh->arp_queue_len_bytes + skb->truesize >
987                                neigh->parms->queue_len_bytes) {
988                                 struct sk_buff *buff;
989
990                                 buff = __skb_dequeue(&neigh->arp_queue);
991                                 if (!buff)
992                                         break;
993                                 neigh->arp_queue_len_bytes -= buff->truesize;
994                                 kfree_skb(buff);
995                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
996                         }
997                         skb_dst_force(skb);
998                         __skb_queue_tail(&neigh->arp_queue, skb);
999                         neigh->arp_queue_len_bytes += skb->truesize;
1000                 }
1001                 rc = 1;
1002         }
1003 out_unlock_bh:
1004         if (immediate_probe)
1005                 neigh_probe(neigh);
1006         else
1007                 write_unlock(&neigh->lock);
1008         local_bh_enable();
1009         return rc;
1010 }
1011 EXPORT_SYMBOL(__neigh_event_send);
1012
1013 static void neigh_update_hhs(struct neighbour *neigh)
1014 {
1015         struct hh_cache *hh;
1016         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1017                 = NULL;
1018
1019         if (neigh->dev->header_ops)
1020                 update = neigh->dev->header_ops->cache_update;
1021
1022         if (update) {
1023                 hh = &neigh->hh;
1024                 if (hh->hh_len) {
1025                         write_seqlock_bh(&hh->hh_lock);
1026                         update(hh, neigh->dev, neigh->ha);
1027                         write_sequnlock_bh(&hh->hh_lock);
1028                 }
1029         }
1030 }
1031
1032
1033
1034 /* Generic update routine.
1035    -- lladdr is new lladdr or NULL, if it is not supplied.
1036    -- new    is new state.
1037    -- flags
1038         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1039                                 if it is different.
1040         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1041                                 lladdr instead of overriding it
1042                                 if it is different.
1043                                 It also allows to retain current state
1044                                 if lladdr is unchanged.
1045         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1046
1047         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1048                                 NTF_ROUTER flag.
1049         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1050                                 a router.
1051
1052    Caller MUST hold reference count on the entry.
1053  */
1054
1055 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1056                  u32 flags)
1057 {
1058         u8 old;
1059         int err;
1060         int notify = 0;
1061         struct net_device *dev;
1062         int update_isrouter = 0;
1063
1064         write_lock_bh(&neigh->lock);
1065
1066         dev    = neigh->dev;
1067         old    = neigh->nud_state;
1068         err    = -EPERM;
1069
1070         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1071             (old & (NUD_NOARP | NUD_PERMANENT)))
1072                 goto out;
1073
1074         if (!(new & NUD_VALID)) {
1075                 neigh_del_timer(neigh);
1076                 if (old & NUD_CONNECTED)
1077                         neigh_suspect(neigh);
1078                 neigh->nud_state = new;
1079                 err = 0;
1080                 notify = old & NUD_VALID;
1081                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1082                     (new & NUD_FAILED)) {
1083                         neigh_invalidate(neigh);
1084                         notify = 1;
1085                 }
1086                 goto out;
1087         }
1088
1089         /* Compare new lladdr with cached one */
1090         if (!dev->addr_len) {
1091                 /* First case: device needs no address. */
1092                 lladdr = neigh->ha;
1093         } else if (lladdr) {
1094                 /* The second case: if something is already cached
1095                    and a new address is proposed:
1096                    - compare new & old
1097                    - if they are different, check override flag
1098                  */
1099                 if ((old & NUD_VALID) &&
1100                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1101                         lladdr = neigh->ha;
1102         } else {
1103                 /* No address is supplied; if we know something,
1104                    use it, otherwise discard the request.
1105                  */
1106                 err = -EINVAL;
1107                 if (!(old & NUD_VALID))
1108                         goto out;
1109                 lladdr = neigh->ha;
1110         }
1111
1112         if (new & NUD_CONNECTED)
1113                 neigh->confirmed = jiffies;
1114         neigh->updated = jiffies;
1115
1116         /* If entry was valid and address is not changed,
1117            do not change entry state, if new one is STALE.
1118          */
1119         err = 0;
1120         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1121         if (old & NUD_VALID) {
1122                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1123                         update_isrouter = 0;
1124                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1125                             (old & NUD_CONNECTED)) {
1126                                 lladdr = neigh->ha;
1127                                 new = NUD_STALE;
1128                         } else
1129                                 goto out;
1130                 } else {
1131                         if (lladdr == neigh->ha && new == NUD_STALE &&
1132                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1133                              (old & NUD_CONNECTED))
1134                             )
1135                                 new = old;
1136                 }
1137         }
1138
1139         if (new != old) {
1140                 neigh_del_timer(neigh);
1141                 if (new & NUD_IN_TIMER)
1142                         neigh_add_timer(neigh, (jiffies +
1143                                                 ((new & NUD_REACHABLE) ?
1144                                                  neigh->parms->reachable_time :
1145                                                  0)));
1146                 neigh->nud_state = new;
1147         }
1148
1149         if (lladdr != neigh->ha) {
1150                 write_seqlock(&neigh->ha_lock);
1151                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1152                 write_sequnlock(&neigh->ha_lock);
1153                 neigh_update_hhs(neigh);
1154                 if (!(new & NUD_CONNECTED))
1155                         neigh->confirmed = jiffies -
1156                                       (neigh->parms->base_reachable_time << 1);
1157                 notify = 1;
1158         }
1159         if (new == old)
1160                 goto out;
1161         if (new & NUD_CONNECTED)
1162                 neigh_connect(neigh);
1163         else
1164                 neigh_suspect(neigh);
1165         if (!(old & NUD_VALID)) {
1166                 struct sk_buff *skb;
1167
1168                 /* Again: avoid dead loop if something went wrong */
1169
1170                 while (neigh->nud_state & NUD_VALID &&
1171                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1172                         struct dst_entry *dst = skb_dst(skb);
1173                         struct neighbour *n2, *n1 = neigh;
1174                         write_unlock_bh(&neigh->lock);
1175
1176                         rcu_read_lock();
1177                         /* On shaper/eql skb->dst->neighbour != neigh :( */
1178                         if (dst && (n2 = dst_get_neighbour(dst)) != NULL)
1179                                 n1 = n2;
1180                         n1->output(n1, skb);
1181                         rcu_read_unlock();
1182
1183                         write_lock_bh(&neigh->lock);
1184                 }
1185                 skb_queue_purge(&neigh->arp_queue);
1186                 neigh->arp_queue_len_bytes = 0;
1187         }
1188 out:
1189         if (update_isrouter) {
1190                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1191                         (neigh->flags | NTF_ROUTER) :
1192                         (neigh->flags & ~NTF_ROUTER);
1193         }
1194         write_unlock_bh(&neigh->lock);
1195
1196         if (notify)
1197                 neigh_update_notify(neigh);
1198
1199         return err;
1200 }
1201 EXPORT_SYMBOL(neigh_update);
1202
1203 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1204                                  u8 *lladdr, void *saddr,
1205                                  struct net_device *dev)
1206 {
1207         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1208                                                  lladdr || !dev->addr_len);
1209         if (neigh)
1210                 neigh_update(neigh, lladdr, NUD_STALE,
1211                              NEIGH_UPDATE_F_OVERRIDE);
1212         return neigh;
1213 }
1214 EXPORT_SYMBOL(neigh_event_ns);
1215
1216 /* called with read_lock_bh(&n->lock); */
1217 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1218 {
1219         struct net_device *dev = dst->dev;
1220         __be16 prot = dst->ops->protocol;
1221         struct hh_cache *hh = &n->hh;
1222
1223         write_lock_bh(&n->lock);
1224
1225         /* Only one thread can come in here and initialize the
1226          * hh_cache entry.
1227          */
1228         if (!hh->hh_len)
1229                 dev->header_ops->cache(n, hh, prot);
1230
1231         write_unlock_bh(&n->lock);
1232 }
1233
1234 /* This function can be used in contexts, where only old dev_queue_xmit
1235  * worked, f.e. if you want to override normal output path (eql, shaper),
1236  * but resolution is not made yet.
1237  */
1238
1239 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1240 {
1241         struct net_device *dev = skb->dev;
1242
1243         __skb_pull(skb, skb_network_offset(skb));
1244
1245         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1246                             skb->len) < 0 &&
1247             dev->header_ops->rebuild(skb))
1248                 return 0;
1249
1250         return dev_queue_xmit(skb);
1251 }
1252 EXPORT_SYMBOL(neigh_compat_output);
1253
1254 /* Slow and careful. */
1255
1256 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1257 {
1258         struct dst_entry *dst = skb_dst(skb);
1259         int rc = 0;
1260
1261         if (!dst)
1262                 goto discard;
1263
1264         __skb_pull(skb, skb_network_offset(skb));
1265
1266         if (!neigh_event_send(neigh, skb)) {
1267                 int err;
1268                 struct net_device *dev = neigh->dev;
1269                 unsigned int seq;
1270
1271                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1272                         neigh_hh_init(neigh, dst);
1273
1274                 do {
1275                         seq = read_seqbegin(&neigh->ha_lock);
1276                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1277                                               neigh->ha, NULL, skb->len);
1278                 } while (read_seqretry(&neigh->ha_lock, seq));
1279
1280                 if (err >= 0)
1281                         rc = dev_queue_xmit(skb);
1282                 else
1283                         goto out_kfree_skb;
1284         }
1285 out:
1286         return rc;
1287 discard:
1288         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1289                       dst, neigh);
1290 out_kfree_skb:
1291         rc = -EINVAL;
1292         kfree_skb(skb);
1293         goto out;
1294 }
1295 EXPORT_SYMBOL(neigh_resolve_output);
1296
1297 /* As fast as possible without hh cache */
1298
1299 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1300 {
1301         struct net_device *dev = neigh->dev;
1302         unsigned int seq;
1303         int err;
1304
1305         __skb_pull(skb, skb_network_offset(skb));
1306
1307         do {
1308                 seq = read_seqbegin(&neigh->ha_lock);
1309                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1310                                       neigh->ha, NULL, skb->len);
1311         } while (read_seqretry(&neigh->ha_lock, seq));
1312
1313         if (err >= 0)
1314                 err = dev_queue_xmit(skb);
1315         else {
1316                 err = -EINVAL;
1317                 kfree_skb(skb);
1318         }
1319         return err;
1320 }
1321 EXPORT_SYMBOL(neigh_connected_output);
1322
1323 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1324 {
1325         return dev_queue_xmit(skb);
1326 }
1327 EXPORT_SYMBOL(neigh_direct_output);
1328
1329 static void neigh_proxy_process(unsigned long arg)
1330 {
1331         struct neigh_table *tbl = (struct neigh_table *)arg;
1332         long sched_next = 0;
1333         unsigned long now = jiffies;
1334         struct sk_buff *skb, *n;
1335
1336         spin_lock(&tbl->proxy_queue.lock);
1337
1338         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1339                 long tdif = NEIGH_CB(skb)->sched_next - now;
1340
1341                 if (tdif <= 0) {
1342                         struct net_device *dev = skb->dev;
1343
1344                         __skb_unlink(skb, &tbl->proxy_queue);
1345                         if (tbl->proxy_redo && netif_running(dev)) {
1346                                 rcu_read_lock();
1347                                 tbl->proxy_redo(skb);
1348                                 rcu_read_unlock();
1349                         } else {
1350                                 kfree_skb(skb);
1351                         }
1352
1353                         dev_put(dev);
1354                 } else if (!sched_next || tdif < sched_next)
1355                         sched_next = tdif;
1356         }
1357         del_timer(&tbl->proxy_timer);
1358         if (sched_next)
1359                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1360         spin_unlock(&tbl->proxy_queue.lock);
1361 }
1362
1363 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1364                     struct sk_buff *skb)
1365 {
1366         unsigned long now = jiffies;
1367         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1368
1369         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1370                 kfree_skb(skb);
1371                 return;
1372         }
1373
1374         NEIGH_CB(skb)->sched_next = sched_next;
1375         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1376
1377         spin_lock(&tbl->proxy_queue.lock);
1378         if (del_timer(&tbl->proxy_timer)) {
1379                 if (time_before(tbl->proxy_timer.expires, sched_next))
1380                         sched_next = tbl->proxy_timer.expires;
1381         }
1382         skb_dst_drop(skb);
1383         dev_hold(skb->dev);
1384         __skb_queue_tail(&tbl->proxy_queue, skb);
1385         mod_timer(&tbl->proxy_timer, sched_next);
1386         spin_unlock(&tbl->proxy_queue.lock);
1387 }
1388 EXPORT_SYMBOL(pneigh_enqueue);
1389
1390 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1391                                                       struct net *net, int ifindex)
1392 {
1393         struct neigh_parms *p;
1394
1395         for (p = &tbl->parms; p; p = p->next) {
1396                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1397                     (!p->dev && !ifindex))
1398                         return p;
1399         }
1400
1401         return NULL;
1402 }
1403
1404 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1405                                       struct neigh_table *tbl)
1406 {
1407         struct neigh_parms *p, *ref;
1408         struct net *net = dev_net(dev);
1409         const struct net_device_ops *ops = dev->netdev_ops;
1410
1411         ref = lookup_neigh_parms(tbl, net, 0);
1412         if (!ref)
1413                 return NULL;
1414
1415         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1416         if (p) {
1417                 p->tbl            = tbl;
1418                 atomic_set(&p->refcnt, 1);
1419                 p->reachable_time =
1420                                 neigh_rand_reach_time(p->base_reachable_time);
1421
1422                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1423                         kfree(p);
1424                         return NULL;
1425                 }
1426
1427                 dev_hold(dev);
1428                 p->dev = dev;
1429                 write_pnet(&p->net, hold_net(net));
1430                 p->sysctl_table = NULL;
1431                 write_lock_bh(&tbl->lock);
1432                 p->next         = tbl->parms.next;
1433                 tbl->parms.next = p;
1434                 write_unlock_bh(&tbl->lock);
1435         }
1436         return p;
1437 }
1438 EXPORT_SYMBOL(neigh_parms_alloc);
1439
1440 static void neigh_rcu_free_parms(struct rcu_head *head)
1441 {
1442         struct neigh_parms *parms =
1443                 container_of(head, struct neigh_parms, rcu_head);
1444
1445         neigh_parms_put(parms);
1446 }
1447
1448 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1449 {
1450         struct neigh_parms **p;
1451
1452         if (!parms || parms == &tbl->parms)
1453                 return;
1454         write_lock_bh(&tbl->lock);
1455         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1456                 if (*p == parms) {
1457                         *p = parms->next;
1458                         parms->dead = 1;
1459                         write_unlock_bh(&tbl->lock);
1460                         if (parms->dev)
1461                                 dev_put(parms->dev);
1462                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1463                         return;
1464                 }
1465         }
1466         write_unlock_bh(&tbl->lock);
1467         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1468 }
1469 EXPORT_SYMBOL(neigh_parms_release);
1470
1471 static void neigh_parms_destroy(struct neigh_parms *parms)
1472 {
1473         release_net(neigh_parms_net(parms));
1474         kfree(parms);
1475 }
1476
1477 static struct lock_class_key neigh_table_proxy_queue_class;
1478
1479 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1480 {
1481         unsigned long now = jiffies;
1482         unsigned long phsize;
1483
1484         write_pnet(&tbl->parms.net, &init_net);
1485         atomic_set(&tbl->parms.refcnt, 1);
1486         tbl->parms.reachable_time =
1487                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1488
1489         if (!tbl->kmem_cachep)
1490                 tbl->kmem_cachep =
1491                         kmem_cache_create(tbl->id, tbl->entry_size, 0,
1492                                           SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1493                                           NULL);
1494         tbl->stats = alloc_percpu(struct neigh_statistics);
1495         if (!tbl->stats)
1496                 panic("cannot create neighbour cache statistics");
1497
1498 #ifdef CONFIG_PROC_FS
1499         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1500                               &neigh_stat_seq_fops, tbl))
1501                 panic("cannot create neighbour proc dir entry");
1502 #endif
1503
1504         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1505
1506         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1507         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1508
1509         if (!tbl->nht || !tbl->phash_buckets)
1510                 panic("cannot allocate neighbour cache hashes");
1511
1512         rwlock_init(&tbl->lock);
1513         INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1514         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1515         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1516         skb_queue_head_init_class(&tbl->proxy_queue,
1517                         &neigh_table_proxy_queue_class);
1518
1519         tbl->last_flush = now;
1520         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1521 }
1522 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1523
1524 void neigh_table_init(struct neigh_table *tbl)
1525 {
1526         struct neigh_table *tmp;
1527
1528         neigh_table_init_no_netlink(tbl);
1529         write_lock(&neigh_tbl_lock);
1530         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1531                 if (tmp->family == tbl->family)
1532                         break;
1533         }
1534         tbl->next       = neigh_tables;
1535         neigh_tables    = tbl;
1536         write_unlock(&neigh_tbl_lock);
1537
1538         if (unlikely(tmp)) {
1539                 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1540                        "family %d\n", tbl->family);
1541                 dump_stack();
1542         }
1543 }
1544 EXPORT_SYMBOL(neigh_table_init);
1545
1546 int neigh_table_clear(struct neigh_table *tbl)
1547 {
1548         struct neigh_table **tp;
1549
1550         /* It is not clean... Fix it to unload IPv6 module safely */
1551         cancel_delayed_work_sync(&tbl->gc_work);
1552         del_timer_sync(&tbl->proxy_timer);
1553         pneigh_queue_purge(&tbl->proxy_queue);
1554         neigh_ifdown(tbl, NULL);
1555         if (atomic_read(&tbl->entries))
1556                 printk(KERN_CRIT "neighbour leakage\n");
1557         write_lock(&neigh_tbl_lock);
1558         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1559                 if (*tp == tbl) {
1560                         *tp = tbl->next;
1561                         break;
1562                 }
1563         }
1564         write_unlock(&neigh_tbl_lock);
1565
1566         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1567                  neigh_hash_free_rcu);
1568         tbl->nht = NULL;
1569
1570         kfree(tbl->phash_buckets);
1571         tbl->phash_buckets = NULL;
1572
1573         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1574
1575         free_percpu(tbl->stats);
1576         tbl->stats = NULL;
1577
1578         kmem_cache_destroy(tbl->kmem_cachep);
1579         tbl->kmem_cachep = NULL;
1580
1581         return 0;
1582 }
1583 EXPORT_SYMBOL(neigh_table_clear);
1584
1585 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1586 {
1587         struct net *net = sock_net(skb->sk);
1588         struct ndmsg *ndm;
1589         struct nlattr *dst_attr;
1590         struct neigh_table *tbl;
1591         struct net_device *dev = NULL;
1592         int err = -EINVAL;
1593
1594         ASSERT_RTNL();
1595         if (nlmsg_len(nlh) < sizeof(*ndm))
1596                 goto out;
1597
1598         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1599         if (dst_attr == NULL)
1600                 goto out;
1601
1602         ndm = nlmsg_data(nlh);
1603         if (ndm->ndm_ifindex) {
1604                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1605                 if (dev == NULL) {
1606                         err = -ENODEV;
1607                         goto out;
1608                 }
1609         }
1610
1611         read_lock(&neigh_tbl_lock);
1612         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1613                 struct neighbour *neigh;
1614
1615                 if (tbl->family != ndm->ndm_family)
1616                         continue;
1617                 read_unlock(&neigh_tbl_lock);
1618
1619                 if (nla_len(dst_attr) < tbl->key_len)
1620                         goto out;
1621
1622                 if (ndm->ndm_flags & NTF_PROXY) {
1623                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1624                         goto out;
1625                 }
1626
1627                 if (dev == NULL)
1628                         goto out;
1629
1630                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1631                 if (neigh == NULL) {
1632                         err = -ENOENT;
1633                         goto out;
1634                 }
1635
1636                 err = neigh_update(neigh, NULL, NUD_FAILED,
1637                                    NEIGH_UPDATE_F_OVERRIDE |
1638                                    NEIGH_UPDATE_F_ADMIN);
1639                 neigh_release(neigh);
1640                 goto out;
1641         }
1642         read_unlock(&neigh_tbl_lock);
1643         err = -EAFNOSUPPORT;
1644
1645 out:
1646         return err;
1647 }
1648
1649 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1650 {
1651         struct net *net = sock_net(skb->sk);
1652         struct ndmsg *ndm;
1653         struct nlattr *tb[NDA_MAX+1];
1654         struct neigh_table *tbl;
1655         struct net_device *dev = NULL;
1656         int err;
1657
1658         ASSERT_RTNL();
1659         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1660         if (err < 0)
1661                 goto out;
1662
1663         err = -EINVAL;
1664         if (tb[NDA_DST] == NULL)
1665                 goto out;
1666
1667         ndm = nlmsg_data(nlh);
1668         if (ndm->ndm_ifindex) {
1669                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1670                 if (dev == NULL) {
1671                         err = -ENODEV;
1672                         goto out;
1673                 }
1674
1675                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1676                         goto out;
1677         }
1678
1679         read_lock(&neigh_tbl_lock);
1680         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1681                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1682                 struct neighbour *neigh;
1683                 void *dst, *lladdr;
1684
1685                 if (tbl->family != ndm->ndm_family)
1686                         continue;
1687                 read_unlock(&neigh_tbl_lock);
1688
1689                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1690                         goto out;
1691                 dst = nla_data(tb[NDA_DST]);
1692                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1693
1694                 if (ndm->ndm_flags & NTF_PROXY) {
1695                         struct pneigh_entry *pn;
1696
1697                         err = -ENOBUFS;
1698                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1699                         if (pn) {
1700                                 pn->flags = ndm->ndm_flags;
1701                                 err = 0;
1702                         }
1703                         goto out;
1704                 }
1705
1706                 if (dev == NULL)
1707                         goto out;
1708
1709                 neigh = neigh_lookup(tbl, dst, dev);
1710                 if (neigh == NULL) {
1711                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1712                                 err = -ENOENT;
1713                                 goto out;
1714                         }
1715
1716                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1717                         if (IS_ERR(neigh)) {
1718                                 err = PTR_ERR(neigh);
1719                                 goto out;
1720                         }
1721                 } else {
1722                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1723                                 err = -EEXIST;
1724                                 neigh_release(neigh);
1725                                 goto out;
1726                         }
1727
1728                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1729                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1730                 }
1731
1732                 if (ndm->ndm_flags & NTF_USE) {
1733                         neigh_event_send(neigh, NULL);
1734                         err = 0;
1735                 } else
1736                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1737                 neigh_release(neigh);
1738                 goto out;
1739         }
1740
1741         read_unlock(&neigh_tbl_lock);
1742         err = -EAFNOSUPPORT;
1743 out:
1744         return err;
1745 }
1746
1747 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1748 {
1749         struct nlattr *nest;
1750
1751         nest = nla_nest_start(skb, NDTA_PARMS);
1752         if (nest == NULL)
1753                 return -ENOBUFS;
1754
1755         if (parms->dev)
1756                 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1757
1758         NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1759         NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes);
1760         /* approximative value for deprecated QUEUE_LEN (in packets) */
1761         NLA_PUT_U32(skb, NDTPA_QUEUE_LEN,
1762                     DIV_ROUND_UP(parms->queue_len_bytes,
1763                                  SKB_TRUESIZE(ETH_FRAME_LEN)));
1764         NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1765         NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1766         NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1767         NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1768         NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1769         NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1770                       parms->base_reachable_time);
1771         NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1772         NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1773         NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1774         NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1775         NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1776         NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1777
1778         return nla_nest_end(skb, nest);
1779
1780 nla_put_failure:
1781         nla_nest_cancel(skb, nest);
1782         return -EMSGSIZE;
1783 }
1784
1785 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1786                               u32 pid, u32 seq, int type, int flags)
1787 {
1788         struct nlmsghdr *nlh;
1789         struct ndtmsg *ndtmsg;
1790
1791         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1792         if (nlh == NULL)
1793                 return -EMSGSIZE;
1794
1795         ndtmsg = nlmsg_data(nlh);
1796
1797         read_lock_bh(&tbl->lock);
1798         ndtmsg->ndtm_family = tbl->family;
1799         ndtmsg->ndtm_pad1   = 0;
1800         ndtmsg->ndtm_pad2   = 0;
1801
1802         NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1803         NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1804         NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1805         NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1806         NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1807
1808         {
1809                 unsigned long now = jiffies;
1810                 unsigned int flush_delta = now - tbl->last_flush;
1811                 unsigned int rand_delta = now - tbl->last_rand;
1812                 struct neigh_hash_table *nht;
1813                 struct ndt_config ndc = {
1814                         .ndtc_key_len           = tbl->key_len,
1815                         .ndtc_entry_size        = tbl->entry_size,
1816                         .ndtc_entries           = atomic_read(&tbl->entries),
1817                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1818                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1819                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1820                 };
1821
1822                 rcu_read_lock_bh();
1823                 nht = rcu_dereference_bh(tbl->nht);
1824                 ndc.ndtc_hash_rnd = nht->hash_rnd;
1825                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1826                 rcu_read_unlock_bh();
1827
1828                 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1829         }
1830
1831         {
1832                 int cpu;
1833                 struct ndt_stats ndst;
1834
1835                 memset(&ndst, 0, sizeof(ndst));
1836
1837                 for_each_possible_cpu(cpu) {
1838                         struct neigh_statistics *st;
1839
1840                         st = per_cpu_ptr(tbl->stats, cpu);
1841                         ndst.ndts_allocs                += st->allocs;
1842                         ndst.ndts_destroys              += st->destroys;
1843                         ndst.ndts_hash_grows            += st->hash_grows;
1844                         ndst.ndts_res_failed            += st->res_failed;
1845                         ndst.ndts_lookups               += st->lookups;
1846                         ndst.ndts_hits                  += st->hits;
1847                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1848                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1849                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1850                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1851                 }
1852
1853                 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1854         }
1855
1856         BUG_ON(tbl->parms.dev);
1857         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1858                 goto nla_put_failure;
1859
1860         read_unlock_bh(&tbl->lock);
1861         return nlmsg_end(skb, nlh);
1862
1863 nla_put_failure:
1864         read_unlock_bh(&tbl->lock);
1865         nlmsg_cancel(skb, nlh);
1866         return -EMSGSIZE;
1867 }
1868
1869 static int neightbl_fill_param_info(struct sk_buff *skb,
1870                                     struct neigh_table *tbl,
1871                                     struct neigh_parms *parms,
1872                                     u32 pid, u32 seq, int type,
1873                                     unsigned int flags)
1874 {
1875         struct ndtmsg *ndtmsg;
1876         struct nlmsghdr *nlh;
1877
1878         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1879         if (nlh == NULL)
1880                 return -EMSGSIZE;
1881
1882         ndtmsg = nlmsg_data(nlh);
1883
1884         read_lock_bh(&tbl->lock);
1885         ndtmsg->ndtm_family = tbl->family;
1886         ndtmsg->ndtm_pad1   = 0;
1887         ndtmsg->ndtm_pad2   = 0;
1888
1889         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1890             neightbl_fill_parms(skb, parms) < 0)
1891                 goto errout;
1892
1893         read_unlock_bh(&tbl->lock);
1894         return nlmsg_end(skb, nlh);
1895 errout:
1896         read_unlock_bh(&tbl->lock);
1897         nlmsg_cancel(skb, nlh);
1898         return -EMSGSIZE;
1899 }
1900
1901 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1902         [NDTA_NAME]             = { .type = NLA_STRING },
1903         [NDTA_THRESH1]          = { .type = NLA_U32 },
1904         [NDTA_THRESH2]          = { .type = NLA_U32 },
1905         [NDTA_THRESH3]          = { .type = NLA_U32 },
1906         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1907         [NDTA_PARMS]            = { .type = NLA_NESTED },
1908 };
1909
1910 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1911         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1912         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1913         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1914         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1915         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1916         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1917         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1918         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1919         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1920         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1921         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1922         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1923         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1924 };
1925
1926 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1927 {
1928         struct net *net = sock_net(skb->sk);
1929         struct neigh_table *tbl;
1930         struct ndtmsg *ndtmsg;
1931         struct nlattr *tb[NDTA_MAX+1];
1932         int err;
1933
1934         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1935                           nl_neightbl_policy);
1936         if (err < 0)
1937                 goto errout;
1938
1939         if (tb[NDTA_NAME] == NULL) {
1940                 err = -EINVAL;
1941                 goto errout;
1942         }
1943
1944         ndtmsg = nlmsg_data(nlh);
1945         read_lock(&neigh_tbl_lock);
1946         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1947                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1948                         continue;
1949
1950                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1951                         break;
1952         }
1953
1954         if (tbl == NULL) {
1955                 err = -ENOENT;
1956                 goto errout_locked;
1957         }
1958
1959         /*
1960          * We acquire tbl->lock to be nice to the periodic timers and
1961          * make sure they always see a consistent set of values.
1962          */
1963         write_lock_bh(&tbl->lock);
1964
1965         if (tb[NDTA_PARMS]) {
1966                 struct nlattr *tbp[NDTPA_MAX+1];
1967                 struct neigh_parms *p;
1968                 int i, ifindex = 0;
1969
1970                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1971                                        nl_ntbl_parm_policy);
1972                 if (err < 0)
1973                         goto errout_tbl_lock;
1974
1975                 if (tbp[NDTPA_IFINDEX])
1976                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1977
1978                 p = lookup_neigh_parms(tbl, net, ifindex);
1979                 if (p == NULL) {
1980                         err = -ENOENT;
1981                         goto errout_tbl_lock;
1982                 }
1983
1984                 for (i = 1; i <= NDTPA_MAX; i++) {
1985                         if (tbp[i] == NULL)
1986                                 continue;
1987
1988                         switch (i) {
1989                         case NDTPA_QUEUE_LEN:
1990                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
1991                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
1992                                 break;
1993                         case NDTPA_QUEUE_LENBYTES:
1994                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
1995                                 break;
1996                         case NDTPA_PROXY_QLEN:
1997                                 p->proxy_qlen = nla_get_u32(tbp[i]);
1998                                 break;
1999                         case NDTPA_APP_PROBES:
2000                                 p->app_probes = nla_get_u32(tbp[i]);
2001                                 break;
2002                         case NDTPA_UCAST_PROBES:
2003                                 p->ucast_probes = nla_get_u32(tbp[i]);
2004                                 break;
2005                         case NDTPA_MCAST_PROBES:
2006                                 p->mcast_probes = nla_get_u32(tbp[i]);
2007                                 break;
2008                         case NDTPA_BASE_REACHABLE_TIME:
2009                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2010                                 break;
2011                         case NDTPA_GC_STALETIME:
2012                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2013                                 break;
2014                         case NDTPA_DELAY_PROBE_TIME:
2015                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2016                                 break;
2017                         case NDTPA_RETRANS_TIME:
2018                                 p->retrans_time = nla_get_msecs(tbp[i]);
2019                                 break;
2020                         case NDTPA_ANYCAST_DELAY:
2021                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2022                                 break;
2023                         case NDTPA_PROXY_DELAY:
2024                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2025                                 break;
2026                         case NDTPA_LOCKTIME:
2027                                 p->locktime = nla_get_msecs(tbp[i]);
2028                                 break;
2029                         }
2030                 }
2031         }
2032
2033         if (tb[NDTA_THRESH1])
2034                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2035
2036         if (tb[NDTA_THRESH2])
2037                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2038
2039         if (tb[NDTA_THRESH3])
2040                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2041
2042         if (tb[NDTA_GC_INTERVAL])
2043                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2044
2045         err = 0;
2046
2047 errout_tbl_lock:
2048         write_unlock_bh(&tbl->lock);
2049 errout_locked:
2050         read_unlock(&neigh_tbl_lock);
2051 errout:
2052         return err;
2053 }
2054
2055 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2056 {
2057         struct net *net = sock_net(skb->sk);
2058         int family, tidx, nidx = 0;
2059         int tbl_skip = cb->args[0];
2060         int neigh_skip = cb->args[1];
2061         struct neigh_table *tbl;
2062
2063         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2064
2065         read_lock(&neigh_tbl_lock);
2066         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2067                 struct neigh_parms *p;
2068
2069                 if (tidx < tbl_skip || (family && tbl->family != family))
2070                         continue;
2071
2072                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2073                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2074                                        NLM_F_MULTI) <= 0)
2075                         break;
2076
2077                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2078                         if (!net_eq(neigh_parms_net(p), net))
2079                                 continue;
2080
2081                         if (nidx < neigh_skip)
2082                                 goto next;
2083
2084                         if (neightbl_fill_param_info(skb, tbl, p,
2085                                                      NETLINK_CB(cb->skb).pid,
2086                                                      cb->nlh->nlmsg_seq,
2087                                                      RTM_NEWNEIGHTBL,
2088                                                      NLM_F_MULTI) <= 0)
2089                                 goto out;
2090                 next:
2091                         nidx++;
2092                 }
2093
2094                 neigh_skip = 0;
2095         }
2096 out:
2097         read_unlock(&neigh_tbl_lock);
2098         cb->args[0] = tidx;
2099         cb->args[1] = nidx;
2100
2101         return skb->len;
2102 }
2103
2104 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2105                            u32 pid, u32 seq, int type, unsigned int flags)
2106 {
2107         unsigned long now = jiffies;
2108         struct nda_cacheinfo ci;
2109         struct nlmsghdr *nlh;
2110         struct ndmsg *ndm;
2111
2112         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2113         if (nlh == NULL)
2114                 return -EMSGSIZE;
2115
2116         ndm = nlmsg_data(nlh);
2117         ndm->ndm_family  = neigh->ops->family;
2118         ndm->ndm_pad1    = 0;
2119         ndm->ndm_pad2    = 0;
2120         ndm->ndm_flags   = neigh->flags;
2121         ndm->ndm_type    = neigh->type;
2122         ndm->ndm_ifindex = neigh->dev->ifindex;
2123
2124         NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2125
2126         read_lock_bh(&neigh->lock);
2127         ndm->ndm_state   = neigh->nud_state;
2128         if (neigh->nud_state & NUD_VALID) {
2129                 char haddr[MAX_ADDR_LEN];
2130
2131                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2132                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2133                         read_unlock_bh(&neigh->lock);
2134                         goto nla_put_failure;
2135                 }
2136         }
2137
2138         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2139         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2140         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2141         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2142         read_unlock_bh(&neigh->lock);
2143
2144         NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2145         NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2146
2147         return nlmsg_end(skb, nlh);
2148
2149 nla_put_failure:
2150         nlmsg_cancel(skb, nlh);
2151         return -EMSGSIZE;
2152 }
2153
2154 static void neigh_update_notify(struct neighbour *neigh)
2155 {
2156         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2157         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2158 }
2159
2160 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2161                             struct netlink_callback *cb)
2162 {
2163         struct net *net = sock_net(skb->sk);
2164         struct neighbour *n;
2165         int rc, h, s_h = cb->args[1];
2166         int idx, s_idx = idx = cb->args[2];
2167         struct neigh_hash_table *nht;
2168
2169         rcu_read_lock_bh();
2170         nht = rcu_dereference_bh(tbl->nht);
2171
2172         for (h = 0; h < (1 << nht->hash_shift); h++) {
2173                 if (h < s_h)
2174                         continue;
2175                 if (h > s_h)
2176                         s_idx = 0;
2177                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2178                      n != NULL;
2179                      n = rcu_dereference_bh(n->next)) {
2180                         if (!net_eq(dev_net(n->dev), net))
2181                                 continue;
2182                         if (idx < s_idx)
2183                                 goto next;
2184                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2185                                             cb->nlh->nlmsg_seq,
2186                                             RTM_NEWNEIGH,
2187                                             NLM_F_MULTI) <= 0) {
2188                                 rc = -1;
2189                                 goto out;
2190                         }
2191 next:
2192                         idx++;
2193                 }
2194         }
2195         rc = skb->len;
2196 out:
2197         rcu_read_unlock_bh();
2198         cb->args[1] = h;
2199         cb->args[2] = idx;
2200         return rc;
2201 }
2202
2203 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2204 {
2205         struct neigh_table *tbl;
2206         int t, family, s_t;
2207
2208         read_lock(&neigh_tbl_lock);
2209         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2210         s_t = cb->args[0];
2211
2212         for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2213                 if (t < s_t || (family && tbl->family != family))
2214                         continue;
2215                 if (t > s_t)
2216                         memset(&cb->args[1], 0, sizeof(cb->args) -
2217                                                 sizeof(cb->args[0]));
2218                 if (neigh_dump_table(tbl, skb, cb) < 0)
2219                         break;
2220         }
2221         read_unlock(&neigh_tbl_lock);
2222
2223         cb->args[0] = t;
2224         return skb->len;
2225 }
2226
2227 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2228 {
2229         int chain;
2230         struct neigh_hash_table *nht;
2231
2232         rcu_read_lock_bh();
2233         nht = rcu_dereference_bh(tbl->nht);
2234
2235         read_lock(&tbl->lock); /* avoid resizes */
2236         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2237                 struct neighbour *n;
2238
2239                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2240                      n != NULL;
2241                      n = rcu_dereference_bh(n->next))
2242                         cb(n, cookie);
2243         }
2244         read_unlock(&tbl->lock);
2245         rcu_read_unlock_bh();
2246 }
2247 EXPORT_SYMBOL(neigh_for_each);
2248
2249 /* The tbl->lock must be held as a writer and BH disabled. */
2250 void __neigh_for_each_release(struct neigh_table *tbl,
2251                               int (*cb)(struct neighbour *))
2252 {
2253         int chain;
2254         struct neigh_hash_table *nht;
2255
2256         nht = rcu_dereference_protected(tbl->nht,
2257                                         lockdep_is_held(&tbl->lock));
2258         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2259                 struct neighbour *n;
2260                 struct neighbour __rcu **np;
2261
2262                 np = &nht->hash_buckets[chain];
2263                 while ((n = rcu_dereference_protected(*np,
2264                                         lockdep_is_held(&tbl->lock))) != NULL) {
2265                         int release;
2266
2267                         write_lock(&n->lock);
2268                         release = cb(n);
2269                         if (release) {
2270                                 rcu_assign_pointer(*np,
2271                                         rcu_dereference_protected(n->next,
2272                                                 lockdep_is_held(&tbl->lock)));
2273                                 n->dead = 1;
2274                         } else
2275                                 np = &n->next;
2276                         write_unlock(&n->lock);
2277                         if (release)
2278                                 neigh_cleanup_and_release(n);
2279                 }
2280         }
2281 }
2282 EXPORT_SYMBOL(__neigh_for_each_release);
2283
2284 #ifdef CONFIG_PROC_FS
2285
2286 static struct neighbour *neigh_get_first(struct seq_file *seq)
2287 {
2288         struct neigh_seq_state *state = seq->private;
2289         struct net *net = seq_file_net(seq);
2290         struct neigh_hash_table *nht = state->nht;
2291         struct neighbour *n = NULL;
2292         int bucket = state->bucket;
2293
2294         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2295         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2296                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2297
2298                 while (n) {
2299                         if (!net_eq(dev_net(n->dev), net))
2300                                 goto next;
2301                         if (state->neigh_sub_iter) {
2302                                 loff_t fakep = 0;
2303                                 void *v;
2304
2305                                 v = state->neigh_sub_iter(state, n, &fakep);
2306                                 if (!v)
2307                                         goto next;
2308                         }
2309                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2310                                 break;
2311                         if (n->nud_state & ~NUD_NOARP)
2312                                 break;
2313 next:
2314                         n = rcu_dereference_bh(n->next);
2315                 }
2316
2317                 if (n)
2318                         break;
2319         }
2320         state->bucket = bucket;
2321
2322         return n;
2323 }
2324
2325 static struct neighbour *neigh_get_next(struct seq_file *seq,
2326                                         struct neighbour *n,
2327                                         loff_t *pos)
2328 {
2329         struct neigh_seq_state *state = seq->private;
2330         struct net *net = seq_file_net(seq);
2331         struct neigh_hash_table *nht = state->nht;
2332
2333         if (state->neigh_sub_iter) {
2334                 void *v = state->neigh_sub_iter(state, n, pos);
2335                 if (v)
2336                         return n;
2337         }
2338         n = rcu_dereference_bh(n->next);
2339
2340         while (1) {
2341                 while (n) {
2342                         if (!net_eq(dev_net(n->dev), net))
2343                                 goto next;
2344                         if (state->neigh_sub_iter) {
2345                                 void *v = state->neigh_sub_iter(state, n, pos);
2346                                 if (v)
2347                                         return n;
2348                                 goto next;
2349                         }
2350                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2351                                 break;
2352
2353                         if (n->nud_state & ~NUD_NOARP)
2354                                 break;
2355 next:
2356                         n = rcu_dereference_bh(n->next);
2357                 }
2358
2359                 if (n)
2360                         break;
2361
2362                 if (++state->bucket >= (1 << nht->hash_shift))
2363                         break;
2364
2365                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2366         }
2367
2368         if (n && pos)
2369                 --(*pos);
2370         return n;
2371 }
2372
2373 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2374 {
2375         struct neighbour *n = neigh_get_first(seq);
2376
2377         if (n) {
2378                 --(*pos);
2379                 while (*pos) {
2380                         n = neigh_get_next(seq, n, pos);
2381                         if (!n)
2382                                 break;
2383                 }
2384         }
2385         return *pos ? NULL : n;
2386 }
2387
2388 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2389 {
2390         struct neigh_seq_state *state = seq->private;
2391         struct net *net = seq_file_net(seq);
2392         struct neigh_table *tbl = state->tbl;
2393         struct pneigh_entry *pn = NULL;
2394         int bucket = state->bucket;
2395
2396         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2397         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2398                 pn = tbl->phash_buckets[bucket];
2399                 while (pn && !net_eq(pneigh_net(pn), net))
2400                         pn = pn->next;
2401                 if (pn)
2402                         break;
2403         }
2404         state->bucket = bucket;
2405
2406         return pn;
2407 }
2408
2409 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2410                                             struct pneigh_entry *pn,
2411                                             loff_t *pos)
2412 {
2413         struct neigh_seq_state *state = seq->private;
2414         struct net *net = seq_file_net(seq);
2415         struct neigh_table *tbl = state->tbl;
2416
2417         do {
2418                 pn = pn->next;
2419         } while (pn && !net_eq(pneigh_net(pn), net));
2420
2421         while (!pn) {
2422                 if (++state->bucket > PNEIGH_HASHMASK)
2423                         break;
2424                 pn = tbl->phash_buckets[state->bucket];
2425                 while (pn && !net_eq(pneigh_net(pn), net))
2426                         pn = pn->next;
2427                 if (pn)
2428                         break;
2429         }
2430
2431         if (pn && pos)
2432                 --(*pos);
2433
2434         return pn;
2435 }
2436
2437 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2438 {
2439         struct pneigh_entry *pn = pneigh_get_first(seq);
2440
2441         if (pn) {
2442                 --(*pos);
2443                 while (*pos) {
2444                         pn = pneigh_get_next(seq, pn, pos);
2445                         if (!pn)
2446                                 break;
2447                 }
2448         }
2449         return *pos ? NULL : pn;
2450 }
2451
2452 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2453 {
2454         struct neigh_seq_state *state = seq->private;
2455         void *rc;
2456         loff_t idxpos = *pos;
2457
2458         rc = neigh_get_idx(seq, &idxpos);
2459         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2460                 rc = pneigh_get_idx(seq, &idxpos);
2461
2462         return rc;
2463 }
2464
2465 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2466         __acquires(rcu_bh)
2467 {
2468         struct neigh_seq_state *state = seq->private;
2469
2470         state->tbl = tbl;
2471         state->bucket = 0;
2472         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2473
2474         rcu_read_lock_bh();
2475         state->nht = rcu_dereference_bh(tbl->nht);
2476
2477         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2478 }
2479 EXPORT_SYMBOL(neigh_seq_start);
2480
2481 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2482 {
2483         struct neigh_seq_state *state;
2484         void *rc;
2485
2486         if (v == SEQ_START_TOKEN) {
2487                 rc = neigh_get_first(seq);
2488                 goto out;
2489         }
2490
2491         state = seq->private;
2492         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2493                 rc = neigh_get_next(seq, v, NULL);
2494                 if (rc)
2495                         goto out;
2496                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2497                         rc = pneigh_get_first(seq);
2498         } else {
2499                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2500                 rc = pneigh_get_next(seq, v, NULL);
2501         }
2502 out:
2503         ++(*pos);
2504         return rc;
2505 }
2506 EXPORT_SYMBOL(neigh_seq_next);
2507
2508 void neigh_seq_stop(struct seq_file *seq, void *v)
2509         __releases(rcu_bh)
2510 {
2511         rcu_read_unlock_bh();
2512 }
2513 EXPORT_SYMBOL(neigh_seq_stop);
2514
2515 /* statistics via seq_file */
2516
2517 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2518 {
2519         struct neigh_table *tbl = seq->private;
2520         int cpu;
2521
2522         if (*pos == 0)
2523                 return SEQ_START_TOKEN;
2524
2525         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2526                 if (!cpu_possible(cpu))
2527                         continue;
2528                 *pos = cpu+1;
2529                 return per_cpu_ptr(tbl->stats, cpu);
2530         }
2531         return NULL;
2532 }
2533
2534 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2535 {
2536         struct neigh_table *tbl = seq->private;
2537         int cpu;
2538
2539         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2540                 if (!cpu_possible(cpu))
2541                         continue;
2542                 *pos = cpu+1;
2543                 return per_cpu_ptr(tbl->stats, cpu);
2544         }
2545         return NULL;
2546 }
2547
2548 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2549 {
2550
2551 }
2552
2553 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2554 {
2555         struct neigh_table *tbl = seq->private;
2556         struct neigh_statistics *st = v;
2557
2558         if (v == SEQ_START_TOKEN) {
2559                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2560                 return 0;
2561         }
2562
2563         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2564                         "%08lx %08lx  %08lx %08lx %08lx\n",
2565                    atomic_read(&tbl->entries),
2566
2567                    st->allocs,
2568                    st->destroys,
2569                    st->hash_grows,
2570
2571                    st->lookups,
2572                    st->hits,
2573
2574                    st->res_failed,
2575
2576                    st->rcv_probes_mcast,
2577                    st->rcv_probes_ucast,
2578
2579                    st->periodic_gc_runs,
2580                    st->forced_gc_runs,
2581                    st->unres_discards
2582                    );
2583
2584         return 0;
2585 }
2586
2587 static const struct seq_operations neigh_stat_seq_ops = {
2588         .start  = neigh_stat_seq_start,
2589         .next   = neigh_stat_seq_next,
2590         .stop   = neigh_stat_seq_stop,
2591         .show   = neigh_stat_seq_show,
2592 };
2593
2594 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2595 {
2596         int ret = seq_open(file, &neigh_stat_seq_ops);
2597
2598         if (!ret) {
2599                 struct seq_file *sf = file->private_data;
2600                 sf->private = PDE(inode)->data;
2601         }
2602         return ret;
2603 };
2604
2605 static const struct file_operations neigh_stat_seq_fops = {
2606         .owner   = THIS_MODULE,
2607         .open    = neigh_stat_seq_open,
2608         .read    = seq_read,
2609         .llseek  = seq_lseek,
2610         .release = seq_release,
2611 };
2612
2613 #endif /* CONFIG_PROC_FS */
2614
2615 static inline size_t neigh_nlmsg_size(void)
2616 {
2617         return NLMSG_ALIGN(sizeof(struct ndmsg))
2618                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2619                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2620                + nla_total_size(sizeof(struct nda_cacheinfo))
2621                + nla_total_size(4); /* NDA_PROBES */
2622 }
2623
2624 static void __neigh_notify(struct neighbour *n, int type, int flags)
2625 {
2626         struct net *net = dev_net(n->dev);
2627         struct sk_buff *skb;
2628         int err = -ENOBUFS;
2629
2630         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2631         if (skb == NULL)
2632                 goto errout;
2633
2634         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2635         if (err < 0) {
2636                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2637                 WARN_ON(err == -EMSGSIZE);
2638                 kfree_skb(skb);
2639                 goto errout;
2640         }
2641         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2642         return;
2643 errout:
2644         if (err < 0)
2645                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2646 }
2647
2648 #ifdef CONFIG_ARPD
2649 void neigh_app_ns(struct neighbour *n)
2650 {
2651         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2652 }
2653 EXPORT_SYMBOL(neigh_app_ns);
2654 #endif /* CONFIG_ARPD */
2655
2656 #ifdef CONFIG_SYSCTL
2657
2658 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2659                            size_t *lenp, loff_t *ppos)
2660 {
2661         int size, ret;
2662         ctl_table tmp = *ctl;
2663
2664         tmp.data = &size;
2665         size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
2666         ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2667         if (write && !ret)
2668                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2669         return ret;
2670 }
2671
2672 enum {
2673         NEIGH_VAR_MCAST_PROBE,
2674         NEIGH_VAR_UCAST_PROBE,
2675         NEIGH_VAR_APP_PROBE,
2676         NEIGH_VAR_RETRANS_TIME,
2677         NEIGH_VAR_BASE_REACHABLE_TIME,
2678         NEIGH_VAR_DELAY_PROBE_TIME,
2679         NEIGH_VAR_GC_STALETIME,
2680         NEIGH_VAR_QUEUE_LEN,
2681         NEIGH_VAR_QUEUE_LEN_BYTES,
2682         NEIGH_VAR_PROXY_QLEN,
2683         NEIGH_VAR_ANYCAST_DELAY,
2684         NEIGH_VAR_PROXY_DELAY,
2685         NEIGH_VAR_LOCKTIME,
2686         NEIGH_VAR_RETRANS_TIME_MS,
2687         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2688         NEIGH_VAR_GC_INTERVAL,
2689         NEIGH_VAR_GC_THRESH1,
2690         NEIGH_VAR_GC_THRESH2,
2691         NEIGH_VAR_GC_THRESH3,
2692         NEIGH_VAR_MAX
2693 };
2694
2695 static struct neigh_sysctl_table {
2696         struct ctl_table_header *sysctl_header;
2697         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2698         char *dev_name;
2699 } neigh_sysctl_template __read_mostly = {
2700         .neigh_vars = {
2701                 [NEIGH_VAR_MCAST_PROBE] = {
2702                         .procname       = "mcast_solicit",
2703                         .maxlen         = sizeof(int),
2704                         .mode           = 0644,
2705                         .proc_handler   = proc_dointvec,
2706                 },
2707                 [NEIGH_VAR_UCAST_PROBE] = {
2708                         .procname       = "ucast_solicit",
2709                         .maxlen         = sizeof(int),
2710                         .mode           = 0644,
2711                         .proc_handler   = proc_dointvec,
2712                 },
2713                 [NEIGH_VAR_APP_PROBE] = {
2714                         .procname       = "app_solicit",
2715                         .maxlen         = sizeof(int),
2716                         .mode           = 0644,
2717                         .proc_handler   = proc_dointvec,
2718                 },
2719                 [NEIGH_VAR_RETRANS_TIME] = {
2720                         .procname       = "retrans_time",
2721                         .maxlen         = sizeof(int),
2722                         .mode           = 0644,
2723                         .proc_handler   = proc_dointvec_userhz_jiffies,
2724                 },
2725                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2726                         .procname       = "base_reachable_time",
2727                         .maxlen         = sizeof(int),
2728                         .mode           = 0644,
2729                         .proc_handler   = proc_dointvec_jiffies,
2730                 },
2731                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2732                         .procname       = "delay_first_probe_time",
2733                         .maxlen         = sizeof(int),
2734                         .mode           = 0644,
2735                         .proc_handler   = proc_dointvec_jiffies,
2736                 },
2737                 [NEIGH_VAR_GC_STALETIME] = {
2738                         .procname       = "gc_stale_time",
2739                         .maxlen         = sizeof(int),
2740                         .mode           = 0644,
2741                         .proc_handler   = proc_dointvec_jiffies,
2742                 },
2743                 [NEIGH_VAR_QUEUE_LEN] = {
2744                         .procname       = "unres_qlen",
2745                         .maxlen         = sizeof(int),
2746                         .mode           = 0644,
2747                         .proc_handler   = proc_unres_qlen,
2748                 },
2749                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2750                         .procname       = "unres_qlen_bytes",
2751                         .maxlen         = sizeof(int),
2752                         .mode           = 0644,
2753                         .proc_handler   = proc_dointvec,
2754                 },
2755                 [NEIGH_VAR_PROXY_QLEN] = {
2756                         .procname       = "proxy_qlen",
2757                         .maxlen         = sizeof(int),
2758                         .mode           = 0644,
2759                         .proc_handler   = proc_dointvec,
2760                 },
2761                 [NEIGH_VAR_ANYCAST_DELAY] = {
2762                         .procname       = "anycast_delay",
2763                         .maxlen         = sizeof(int),
2764                         .mode           = 0644,
2765                         .proc_handler   = proc_dointvec_userhz_jiffies,
2766                 },
2767                 [NEIGH_VAR_PROXY_DELAY] = {
2768                         .procname       = "proxy_delay",
2769                         .maxlen         = sizeof(int),
2770                         .mode           = 0644,
2771                         .proc_handler   = proc_dointvec_userhz_jiffies,
2772                 },
2773                 [NEIGH_VAR_LOCKTIME] = {
2774                         .procname       = "locktime",
2775                         .maxlen         = sizeof(int),
2776                         .mode           = 0644,
2777                         .proc_handler   = proc_dointvec_userhz_jiffies,
2778                 },
2779                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2780                         .procname       = "retrans_time_ms",
2781                         .maxlen         = sizeof(int),
2782                         .mode           = 0644,
2783                         .proc_handler   = proc_dointvec_ms_jiffies,
2784                 },
2785                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2786                         .procname       = "base_reachable_time_ms",
2787                         .maxlen         = sizeof(int),
2788                         .mode           = 0644,
2789                         .proc_handler   = proc_dointvec_ms_jiffies,
2790                 },
2791                 [NEIGH_VAR_GC_INTERVAL] = {
2792                         .procname       = "gc_interval",
2793                         .maxlen         = sizeof(int),
2794                         .mode           = 0644,
2795                         .proc_handler   = proc_dointvec_jiffies,
2796                 },
2797                 [NEIGH_VAR_GC_THRESH1] = {
2798                         .procname       = "gc_thresh1",
2799                         .maxlen         = sizeof(int),
2800                         .mode           = 0644,
2801                         .proc_handler   = proc_dointvec,
2802                 },
2803                 [NEIGH_VAR_GC_THRESH2] = {
2804                         .procname       = "gc_thresh2",
2805                         .maxlen         = sizeof(int),
2806                         .mode           = 0644,
2807                         .proc_handler   = proc_dointvec,
2808                 },
2809                 [NEIGH_VAR_GC_THRESH3] = {
2810                         .procname       = "gc_thresh3",
2811                         .maxlen         = sizeof(int),
2812                         .mode           = 0644,
2813                         .proc_handler   = proc_dointvec,
2814                 },
2815                 {},
2816         },
2817 };
2818
2819 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2820                           char *p_name, proc_handler *handler)
2821 {
2822         struct neigh_sysctl_table *t;
2823         const char *dev_name_source = NULL;
2824
2825 #define NEIGH_CTL_PATH_ROOT     0
2826 #define NEIGH_CTL_PATH_PROTO    1
2827 #define NEIGH_CTL_PATH_NEIGH    2
2828 #define NEIGH_CTL_PATH_DEV      3
2829
2830         struct ctl_path neigh_path[] = {
2831                 { .procname = "net",     },
2832                 { .procname = "proto",   },
2833                 { .procname = "neigh",   },
2834                 { .procname = "default", },
2835                 { },
2836         };
2837
2838         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2839         if (!t)
2840                 goto err;
2841
2842         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2843         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2844         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2845         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2846         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2847         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2848         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2849         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2850         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2851         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2852         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2853         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2854         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2855         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2856         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2857
2858         if (dev) {
2859                 dev_name_source = dev->name;
2860                 /* Terminate the table early */
2861                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2862                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2863         } else {
2864                 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2865                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2866                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2867                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2868                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2869         }
2870
2871
2872         if (handler) {
2873                 /* RetransTime */
2874                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2875                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2876                 /* ReachableTime */
2877                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2878                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2879                 /* RetransTime (in milliseconds)*/
2880                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2881                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2882                 /* ReachableTime (in milliseconds) */
2883                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2884                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2885         }
2886
2887         t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2888         if (!t->dev_name)
2889                 goto free;
2890
2891         neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2892         neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2893
2894         t->sysctl_header =
2895                 register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2896         if (!t->sysctl_header)
2897                 goto free_procname;
2898
2899         p->sysctl_table = t;
2900         return 0;
2901
2902 free_procname:
2903         kfree(t->dev_name);
2904 free:
2905         kfree(t);
2906 err:
2907         return -ENOBUFS;
2908 }
2909 EXPORT_SYMBOL(neigh_sysctl_register);
2910
2911 void neigh_sysctl_unregister(struct neigh_parms *p)
2912 {
2913         if (p->sysctl_table) {
2914                 struct neigh_sysctl_table *t = p->sysctl_table;
2915                 p->sysctl_table = NULL;
2916                 unregister_sysctl_table(t->sysctl_header);
2917                 kfree(t->dev_name);
2918                 kfree(t);
2919         }
2920 }
2921 EXPORT_SYMBOL(neigh_sysctl_unregister);
2922
2923 #endif  /* CONFIG_SYSCTL */
2924
2925 static int __init neigh_init(void)
2926 {
2927         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
2928         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
2929         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
2930
2931         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
2932                       NULL);
2933         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
2934
2935         return 0;
2936 }
2937
2938 subsys_initcall(neigh_init);
2939