Merge branch 'x86-cleanups-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-block.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39
40 #define NEIGH_DEBUG 1
41
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55
56 #define PNEIGH_HASHMASK         0xF
57
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67
68 /*
69    Neighbour hash table buckets are protected with rwlock tbl->lock.
70
71    - All the scans/updates to hash buckets MUST be made under this lock.
72    - NOTHING clever should be made under this lock: no callbacks
73      to protocol backends, no attempts to send something to network.
74      It will result in deadlocks, if backend/driver wants to use neighbour
75      cache.
76    - If the entry requires some non-trivial actions, increase
77      its reference count and release table lock.
78
79    Neighbour entries are protected:
80    - with reference count.
81    - with rwlock neigh->lock
82
83    Reference count prevents destruction.
84
85    neigh->lock mainly serializes ll address data and its validity state.
86    However, the same lock is used to protect another entry fields:
87     - timer
88     - resolution queue
89
90    Again, nothing clever shall be made under neigh->lock,
91    the most complicated procedure, which we allow is dev->hard_header.
92    It is supposed, that dev->hard_header is simplistic and does
93    not make callbacks to neighbour tables.
94
95    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96    list of neighbour tables. This list is used only in process context,
97  */
98
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100
101 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
102 {
103         kfree_skb(skb);
104         return -ENETDOWN;
105 }
106
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109         if (neigh->parms->neigh_cleanup)
110                 neigh->parms->neigh_cleanup(neigh);
111
112         __neigh_notify(neigh, RTM_DELNEIGH, 0);
113         neigh_release(neigh);
114 }
115
116 /*
117  * It is random distribution in the interval (1/2)*base...(3/2)*base.
118  * It corresponds to default IPv6 settings and is not overridable,
119  * because it is really reasonable choice.
120  */
121
122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124         return base ? (net_random() % base) + (base >> 1) : 0;
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127
128
129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131         int shrunk = 0;
132         int i;
133         struct neigh_hash_table *nht;
134
135         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136
137         write_lock_bh(&tbl->lock);
138         nht = rcu_dereference_protected(tbl->nht,
139                                         lockdep_is_held(&tbl->lock));
140         for (i = 0; i < (1 << nht->hash_shift); i++) {
141                 struct neighbour *n;
142                 struct neighbour __rcu **np;
143
144                 np = &nht->hash_buckets[i];
145                 while ((n = rcu_dereference_protected(*np,
146                                         lockdep_is_held(&tbl->lock))) != NULL) {
147                         /* Neighbour record may be discarded if:
148                          * - nobody refers to it.
149                          * - it is not permanent
150                          */
151                         write_lock(&n->lock);
152                         if (atomic_read(&n->refcnt) == 1 &&
153                             !(n->nud_state & NUD_PERMANENT)) {
154                                 rcu_assign_pointer(*np,
155                                         rcu_dereference_protected(n->next,
156                                                   lockdep_is_held(&tbl->lock)));
157                                 n->dead = 1;
158                                 shrunk  = 1;
159                                 write_unlock(&n->lock);
160                                 neigh_cleanup_and_release(n);
161                                 continue;
162                         }
163                         write_unlock(&n->lock);
164                         np = &n->next;
165                 }
166         }
167
168         tbl->last_flush = jiffies;
169
170         write_unlock_bh(&tbl->lock);
171
172         return shrunk;
173 }
174
175 static void neigh_add_timer(struct neighbour *n, unsigned long when)
176 {
177         neigh_hold(n);
178         if (unlikely(mod_timer(&n->timer, when))) {
179                 printk("NEIGH: BUG, double timer add, state is %x\n",
180                        n->nud_state);
181                 dump_stack();
182         }
183 }
184
185 static int neigh_del_timer(struct neighbour *n)
186 {
187         if ((n->nud_state & NUD_IN_TIMER) &&
188             del_timer(&n->timer)) {
189                 neigh_release(n);
190                 return 1;
191         }
192         return 0;
193 }
194
195 static void pneigh_queue_purge(struct sk_buff_head *list)
196 {
197         struct sk_buff *skb;
198
199         while ((skb = skb_dequeue(list)) != NULL) {
200                 dev_put(skb->dev);
201                 kfree_skb(skb);
202         }
203 }
204
205 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
206 {
207         int i;
208         struct neigh_hash_table *nht;
209
210         nht = rcu_dereference_protected(tbl->nht,
211                                         lockdep_is_held(&tbl->lock));
212
213         for (i = 0; i < (1 << nht->hash_shift); i++) {
214                 struct neighbour *n;
215                 struct neighbour __rcu **np = &nht->hash_buckets[i];
216
217                 while ((n = rcu_dereference_protected(*np,
218                                         lockdep_is_held(&tbl->lock))) != NULL) {
219                         if (dev && n->dev != dev) {
220                                 np = &n->next;
221                                 continue;
222                         }
223                         rcu_assign_pointer(*np,
224                                    rcu_dereference_protected(n->next,
225                                                 lockdep_is_held(&tbl->lock)));
226                         write_lock(&n->lock);
227                         neigh_del_timer(n);
228                         n->dead = 1;
229
230                         if (atomic_read(&n->refcnt) != 1) {
231                                 /* The most unpleasant situation.
232                                    We must destroy neighbour entry,
233                                    but someone still uses it.
234
235                                    The destroy will be delayed until
236                                    the last user releases us, but
237                                    we must kill timers etc. and move
238                                    it to safe state.
239                                  */
240                                 skb_queue_purge(&n->arp_queue);
241                                 n->arp_queue_len_bytes = 0;
242                                 n->output = neigh_blackhole;
243                                 if (n->nud_state & NUD_VALID)
244                                         n->nud_state = NUD_NOARP;
245                                 else
246                                         n->nud_state = NUD_NONE;
247                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
248                         }
249                         write_unlock(&n->lock);
250                         neigh_cleanup_and_release(n);
251                 }
252         }
253 }
254
255 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
256 {
257         write_lock_bh(&tbl->lock);
258         neigh_flush_dev(tbl, dev);
259         write_unlock_bh(&tbl->lock);
260 }
261 EXPORT_SYMBOL(neigh_changeaddr);
262
263 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
264 {
265         write_lock_bh(&tbl->lock);
266         neigh_flush_dev(tbl, dev);
267         pneigh_ifdown(tbl, dev);
268         write_unlock_bh(&tbl->lock);
269
270         del_timer_sync(&tbl->proxy_timer);
271         pneigh_queue_purge(&tbl->proxy_queue);
272         return 0;
273 }
274 EXPORT_SYMBOL(neigh_ifdown);
275
276 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
277 {
278         struct neighbour *n = NULL;
279         unsigned long now = jiffies;
280         int entries;
281
282         entries = atomic_inc_return(&tbl->entries) - 1;
283         if (entries >= tbl->gc_thresh3 ||
284             (entries >= tbl->gc_thresh2 &&
285              time_after(now, tbl->last_flush + 5 * HZ))) {
286                 if (!neigh_forced_gc(tbl) &&
287                     entries >= tbl->gc_thresh3)
288                         goto out_entries;
289         }
290
291         if (tbl->entry_size)
292                 n = kzalloc(tbl->entry_size, GFP_ATOMIC);
293         else {
294                 int sz = sizeof(*n) + tbl->key_len;
295
296                 sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
297                 sz += dev->neigh_priv_len;
298                 n = kzalloc(sz, GFP_ATOMIC);
299         }
300         if (!n)
301                 goto out_entries;
302
303         skb_queue_head_init(&n->arp_queue);
304         rwlock_init(&n->lock);
305         seqlock_init(&n->ha_lock);
306         n->updated        = n->used = now;
307         n->nud_state      = NUD_NONE;
308         n->output         = neigh_blackhole;
309         seqlock_init(&n->hh.hh_lock);
310         n->parms          = neigh_parms_clone(&tbl->parms);
311         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
312
313         NEIGH_CACHE_STAT_INC(tbl, allocs);
314         n->tbl            = tbl;
315         atomic_set(&n->refcnt, 1);
316         n->dead           = 1;
317 out:
318         return n;
319
320 out_entries:
321         atomic_dec(&tbl->entries);
322         goto out;
323 }
324
325 static void neigh_get_hash_rnd(u32 *x)
326 {
327         get_random_bytes(x, sizeof(*x));
328         *x |= 1;
329 }
330
331 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
332 {
333         size_t size = (1 << shift) * sizeof(struct neighbour *);
334         struct neigh_hash_table *ret;
335         struct neighbour __rcu **buckets;
336         int i;
337
338         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
339         if (!ret)
340                 return NULL;
341         if (size <= PAGE_SIZE)
342                 buckets = kzalloc(size, GFP_ATOMIC);
343         else
344                 buckets = (struct neighbour __rcu **)
345                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
346                                            get_order(size));
347         if (!buckets) {
348                 kfree(ret);
349                 return NULL;
350         }
351         ret->hash_buckets = buckets;
352         ret->hash_shift = shift;
353         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
354                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
355         return ret;
356 }
357
358 static void neigh_hash_free_rcu(struct rcu_head *head)
359 {
360         struct neigh_hash_table *nht = container_of(head,
361                                                     struct neigh_hash_table,
362                                                     rcu);
363         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
364         struct neighbour __rcu **buckets = nht->hash_buckets;
365
366         if (size <= PAGE_SIZE)
367                 kfree(buckets);
368         else
369                 free_pages((unsigned long)buckets, get_order(size));
370         kfree(nht);
371 }
372
373 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
374                                                 unsigned long new_shift)
375 {
376         unsigned int i, hash;
377         struct neigh_hash_table *new_nht, *old_nht;
378
379         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
380
381         old_nht = rcu_dereference_protected(tbl->nht,
382                                             lockdep_is_held(&tbl->lock));
383         new_nht = neigh_hash_alloc(new_shift);
384         if (!new_nht)
385                 return old_nht;
386
387         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
388                 struct neighbour *n, *next;
389
390                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
391                                                    lockdep_is_held(&tbl->lock));
392                      n != NULL;
393                      n = next) {
394                         hash = tbl->hash(n->primary_key, n->dev,
395                                          new_nht->hash_rnd);
396
397                         hash >>= (32 - new_nht->hash_shift);
398                         next = rcu_dereference_protected(n->next,
399                                                 lockdep_is_held(&tbl->lock));
400
401                         rcu_assign_pointer(n->next,
402                                            rcu_dereference_protected(
403                                                 new_nht->hash_buckets[hash],
404                                                 lockdep_is_held(&tbl->lock)));
405                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
406                 }
407         }
408
409         rcu_assign_pointer(tbl->nht, new_nht);
410         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
411         return new_nht;
412 }
413
414 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
415                                struct net_device *dev)
416 {
417         struct neighbour *n;
418         int key_len = tbl->key_len;
419         u32 hash_val;
420         struct neigh_hash_table *nht;
421
422         NEIGH_CACHE_STAT_INC(tbl, lookups);
423
424         rcu_read_lock_bh();
425         nht = rcu_dereference_bh(tbl->nht);
426         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
427
428         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
429              n != NULL;
430              n = rcu_dereference_bh(n->next)) {
431                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
432                         if (!atomic_inc_not_zero(&n->refcnt))
433                                 n = NULL;
434                         NEIGH_CACHE_STAT_INC(tbl, hits);
435                         break;
436                 }
437         }
438
439         rcu_read_unlock_bh();
440         return n;
441 }
442 EXPORT_SYMBOL(neigh_lookup);
443
444 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
445                                      const void *pkey)
446 {
447         struct neighbour *n;
448         int key_len = tbl->key_len;
449         u32 hash_val;
450         struct neigh_hash_table *nht;
451
452         NEIGH_CACHE_STAT_INC(tbl, lookups);
453
454         rcu_read_lock_bh();
455         nht = rcu_dereference_bh(tbl->nht);
456         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
457
458         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
459              n != NULL;
460              n = rcu_dereference_bh(n->next)) {
461                 if (!memcmp(n->primary_key, pkey, key_len) &&
462                     net_eq(dev_net(n->dev), net)) {
463                         if (!atomic_inc_not_zero(&n->refcnt))
464                                 n = NULL;
465                         NEIGH_CACHE_STAT_INC(tbl, hits);
466                         break;
467                 }
468         }
469
470         rcu_read_unlock_bh();
471         return n;
472 }
473 EXPORT_SYMBOL(neigh_lookup_nodev);
474
475 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
476                                struct net_device *dev)
477 {
478         u32 hash_val;
479         int key_len = tbl->key_len;
480         int error;
481         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
482         struct neigh_hash_table *nht;
483
484         if (!n) {
485                 rc = ERR_PTR(-ENOBUFS);
486                 goto out;
487         }
488
489         memcpy(n->primary_key, pkey, key_len);
490         n->dev = dev;
491         dev_hold(dev);
492
493         /* Protocol specific setup. */
494         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
495                 rc = ERR_PTR(error);
496                 goto out_neigh_release;
497         }
498
499         if (dev->netdev_ops->ndo_neigh_construct) {
500                 error = dev->netdev_ops->ndo_neigh_construct(n);
501                 if (error < 0) {
502                         rc = ERR_PTR(error);
503                         goto out_neigh_release;
504                 }
505         }
506
507         /* Device specific setup. */
508         if (n->parms->neigh_setup &&
509             (error = n->parms->neigh_setup(n)) < 0) {
510                 rc = ERR_PTR(error);
511                 goto out_neigh_release;
512         }
513
514         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
515
516         write_lock_bh(&tbl->lock);
517         nht = rcu_dereference_protected(tbl->nht,
518                                         lockdep_is_held(&tbl->lock));
519
520         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
521                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
522
523         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
524
525         if (n->parms->dead) {
526                 rc = ERR_PTR(-EINVAL);
527                 goto out_tbl_unlock;
528         }
529
530         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
531                                             lockdep_is_held(&tbl->lock));
532              n1 != NULL;
533              n1 = rcu_dereference_protected(n1->next,
534                         lockdep_is_held(&tbl->lock))) {
535                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
536                         neigh_hold(n1);
537                         rc = n1;
538                         goto out_tbl_unlock;
539                 }
540         }
541
542         n->dead = 0;
543         neigh_hold(n);
544         rcu_assign_pointer(n->next,
545                            rcu_dereference_protected(nht->hash_buckets[hash_val],
546                                                      lockdep_is_held(&tbl->lock)));
547         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
548         write_unlock_bh(&tbl->lock);
549         NEIGH_PRINTK2("neigh %p is created.\n", n);
550         rc = n;
551 out:
552         return rc;
553 out_tbl_unlock:
554         write_unlock_bh(&tbl->lock);
555 out_neigh_release:
556         neigh_release(n);
557         goto out;
558 }
559 EXPORT_SYMBOL(neigh_create);
560
561 static u32 pneigh_hash(const void *pkey, int key_len)
562 {
563         u32 hash_val = *(u32 *)(pkey + key_len - 4);
564         hash_val ^= (hash_val >> 16);
565         hash_val ^= hash_val >> 8;
566         hash_val ^= hash_val >> 4;
567         hash_val &= PNEIGH_HASHMASK;
568         return hash_val;
569 }
570
571 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
572                                               struct net *net,
573                                               const void *pkey,
574                                               int key_len,
575                                               struct net_device *dev)
576 {
577         while (n) {
578                 if (!memcmp(n->key, pkey, key_len) &&
579                     net_eq(pneigh_net(n), net) &&
580                     (n->dev == dev || !n->dev))
581                         return n;
582                 n = n->next;
583         }
584         return NULL;
585 }
586
587 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
588                 struct net *net, const void *pkey, struct net_device *dev)
589 {
590         int key_len = tbl->key_len;
591         u32 hash_val = pneigh_hash(pkey, key_len);
592
593         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
594                                  net, pkey, key_len, dev);
595 }
596 EXPORT_SYMBOL_GPL(__pneigh_lookup);
597
598 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
599                                     struct net *net, const void *pkey,
600                                     struct net_device *dev, int creat)
601 {
602         struct pneigh_entry *n;
603         int key_len = tbl->key_len;
604         u32 hash_val = pneigh_hash(pkey, key_len);
605
606         read_lock_bh(&tbl->lock);
607         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
608                               net, pkey, key_len, dev);
609         read_unlock_bh(&tbl->lock);
610
611         if (n || !creat)
612                 goto out;
613
614         ASSERT_RTNL();
615
616         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
617         if (!n)
618                 goto out;
619
620         write_pnet(&n->net, hold_net(net));
621         memcpy(n->key, pkey, key_len);
622         n->dev = dev;
623         if (dev)
624                 dev_hold(dev);
625
626         if (tbl->pconstructor && tbl->pconstructor(n)) {
627                 if (dev)
628                         dev_put(dev);
629                 release_net(net);
630                 kfree(n);
631                 n = NULL;
632                 goto out;
633         }
634
635         write_lock_bh(&tbl->lock);
636         n->next = tbl->phash_buckets[hash_val];
637         tbl->phash_buckets[hash_val] = n;
638         write_unlock_bh(&tbl->lock);
639 out:
640         return n;
641 }
642 EXPORT_SYMBOL(pneigh_lookup);
643
644
645 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
646                   struct net_device *dev)
647 {
648         struct pneigh_entry *n, **np;
649         int key_len = tbl->key_len;
650         u32 hash_val = pneigh_hash(pkey, key_len);
651
652         write_lock_bh(&tbl->lock);
653         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
654              np = &n->next) {
655                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
656                     net_eq(pneigh_net(n), net)) {
657                         *np = n->next;
658                         write_unlock_bh(&tbl->lock);
659                         if (tbl->pdestructor)
660                                 tbl->pdestructor(n);
661                         if (n->dev)
662                                 dev_put(n->dev);
663                         release_net(pneigh_net(n));
664                         kfree(n);
665                         return 0;
666                 }
667         }
668         write_unlock_bh(&tbl->lock);
669         return -ENOENT;
670 }
671
672 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
673 {
674         struct pneigh_entry *n, **np;
675         u32 h;
676
677         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
678                 np = &tbl->phash_buckets[h];
679                 while ((n = *np) != NULL) {
680                         if (!dev || n->dev == dev) {
681                                 *np = n->next;
682                                 if (tbl->pdestructor)
683                                         tbl->pdestructor(n);
684                                 if (n->dev)
685                                         dev_put(n->dev);
686                                 release_net(pneigh_net(n));
687                                 kfree(n);
688                                 continue;
689                         }
690                         np = &n->next;
691                 }
692         }
693         return -ENOENT;
694 }
695
696 static void neigh_parms_destroy(struct neigh_parms *parms);
697
698 static inline void neigh_parms_put(struct neigh_parms *parms)
699 {
700         if (atomic_dec_and_test(&parms->refcnt))
701                 neigh_parms_destroy(parms);
702 }
703
704 /*
705  *      neighbour must already be out of the table;
706  *
707  */
708 void neigh_destroy(struct neighbour *neigh)
709 {
710         struct net_device *dev = neigh->dev;
711
712         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
713
714         if (!neigh->dead) {
715                 printk(KERN_WARNING
716                        "Destroying alive neighbour %p\n", neigh);
717                 dump_stack();
718                 return;
719         }
720
721         if (neigh_del_timer(neigh))
722                 printk(KERN_WARNING "Impossible event.\n");
723
724         skb_queue_purge(&neigh->arp_queue);
725         neigh->arp_queue_len_bytes = 0;
726
727         if (dev->netdev_ops->ndo_neigh_destroy)
728                 dev->netdev_ops->ndo_neigh_destroy(neigh);
729
730         dev_put(dev);
731         neigh_parms_put(neigh->parms);
732
733         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
734
735         atomic_dec(&neigh->tbl->entries);
736         kfree_rcu(neigh, rcu);
737 }
738 EXPORT_SYMBOL(neigh_destroy);
739
740 /* Neighbour state is suspicious;
741    disable fast path.
742
743    Called with write_locked neigh.
744  */
745 static void neigh_suspect(struct neighbour *neigh)
746 {
747         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
748
749         neigh->output = neigh->ops->output;
750 }
751
752 /* Neighbour state is OK;
753    enable fast path.
754
755    Called with write_locked neigh.
756  */
757 static void neigh_connect(struct neighbour *neigh)
758 {
759         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
760
761         neigh->output = neigh->ops->connected_output;
762 }
763
764 static void neigh_periodic_work(struct work_struct *work)
765 {
766         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
767         struct neighbour *n;
768         struct neighbour __rcu **np;
769         unsigned int i;
770         struct neigh_hash_table *nht;
771
772         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
773
774         write_lock_bh(&tbl->lock);
775         nht = rcu_dereference_protected(tbl->nht,
776                                         lockdep_is_held(&tbl->lock));
777
778         /*
779          *      periodically recompute ReachableTime from random function
780          */
781
782         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
783                 struct neigh_parms *p;
784                 tbl->last_rand = jiffies;
785                 for (p = &tbl->parms; p; p = p->next)
786                         p->reachable_time =
787                                 neigh_rand_reach_time(p->base_reachable_time);
788         }
789
790         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
791                 np = &nht->hash_buckets[i];
792
793                 while ((n = rcu_dereference_protected(*np,
794                                 lockdep_is_held(&tbl->lock))) != NULL) {
795                         unsigned int state;
796
797                         write_lock(&n->lock);
798
799                         state = n->nud_state;
800                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
801                                 write_unlock(&n->lock);
802                                 goto next_elt;
803                         }
804
805                         if (time_before(n->used, n->confirmed))
806                                 n->used = n->confirmed;
807
808                         if (atomic_read(&n->refcnt) == 1 &&
809                             (state == NUD_FAILED ||
810                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
811                                 *np = n->next;
812                                 n->dead = 1;
813                                 write_unlock(&n->lock);
814                                 neigh_cleanup_and_release(n);
815                                 continue;
816                         }
817                         write_unlock(&n->lock);
818
819 next_elt:
820                         np = &n->next;
821                 }
822                 /*
823                  * It's fine to release lock here, even if hash table
824                  * grows while we are preempted.
825                  */
826                 write_unlock_bh(&tbl->lock);
827                 cond_resched();
828                 write_lock_bh(&tbl->lock);
829                 nht = rcu_dereference_protected(tbl->nht,
830                                                 lockdep_is_held(&tbl->lock));
831         }
832         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
833          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
834          * base_reachable_time.
835          */
836         schedule_delayed_work(&tbl->gc_work,
837                               tbl->parms.base_reachable_time >> 1);
838         write_unlock_bh(&tbl->lock);
839 }
840
841 static __inline__ int neigh_max_probes(struct neighbour *n)
842 {
843         struct neigh_parms *p = n->parms;
844         return (n->nud_state & NUD_PROBE) ?
845                 p->ucast_probes :
846                 p->ucast_probes + p->app_probes + p->mcast_probes;
847 }
848
849 static void neigh_invalidate(struct neighbour *neigh)
850         __releases(neigh->lock)
851         __acquires(neigh->lock)
852 {
853         struct sk_buff *skb;
854
855         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
856         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
857         neigh->updated = jiffies;
858
859         /* It is very thin place. report_unreachable is very complicated
860            routine. Particularly, it can hit the same neighbour entry!
861
862            So that, we try to be accurate and avoid dead loop. --ANK
863          */
864         while (neigh->nud_state == NUD_FAILED &&
865                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
866                 write_unlock(&neigh->lock);
867                 neigh->ops->error_report(neigh, skb);
868                 write_lock(&neigh->lock);
869         }
870         skb_queue_purge(&neigh->arp_queue);
871         neigh->arp_queue_len_bytes = 0;
872 }
873
874 static void neigh_probe(struct neighbour *neigh)
875         __releases(neigh->lock)
876 {
877         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
878         /* keep skb alive even if arp_queue overflows */
879         if (skb)
880                 skb = skb_copy(skb, GFP_ATOMIC);
881         write_unlock(&neigh->lock);
882         neigh->ops->solicit(neigh, skb);
883         atomic_inc(&neigh->probes);
884         kfree_skb(skb);
885 }
886
887 /* Called when a timer expires for a neighbour entry. */
888
889 static void neigh_timer_handler(unsigned long arg)
890 {
891         unsigned long now, next;
892         struct neighbour *neigh = (struct neighbour *)arg;
893         unsigned state;
894         int notify = 0;
895
896         write_lock(&neigh->lock);
897
898         state = neigh->nud_state;
899         now = jiffies;
900         next = now + HZ;
901
902         if (!(state & NUD_IN_TIMER))
903                 goto out;
904
905         if (state & NUD_REACHABLE) {
906                 if (time_before_eq(now,
907                                    neigh->confirmed + neigh->parms->reachable_time)) {
908                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
909                         next = neigh->confirmed + neigh->parms->reachable_time;
910                 } else if (time_before_eq(now,
911                                           neigh->used + neigh->parms->delay_probe_time)) {
912                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
913                         neigh->nud_state = NUD_DELAY;
914                         neigh->updated = jiffies;
915                         neigh_suspect(neigh);
916                         next = now + neigh->parms->delay_probe_time;
917                 } else {
918                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
919                         neigh->nud_state = NUD_STALE;
920                         neigh->updated = jiffies;
921                         neigh_suspect(neigh);
922                         notify = 1;
923                 }
924         } else if (state & NUD_DELAY) {
925                 if (time_before_eq(now,
926                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
927                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
928                         neigh->nud_state = NUD_REACHABLE;
929                         neigh->updated = jiffies;
930                         neigh_connect(neigh);
931                         notify = 1;
932                         next = neigh->confirmed + neigh->parms->reachable_time;
933                 } else {
934                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
935                         neigh->nud_state = NUD_PROBE;
936                         neigh->updated = jiffies;
937                         atomic_set(&neigh->probes, 0);
938                         next = now + neigh->parms->retrans_time;
939                 }
940         } else {
941                 /* NUD_PROBE|NUD_INCOMPLETE */
942                 next = now + neigh->parms->retrans_time;
943         }
944
945         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
946             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
947                 neigh->nud_state = NUD_FAILED;
948                 notify = 1;
949                 neigh_invalidate(neigh);
950         }
951
952         if (neigh->nud_state & NUD_IN_TIMER) {
953                 if (time_before(next, jiffies + HZ/2))
954                         next = jiffies + HZ/2;
955                 if (!mod_timer(&neigh->timer, next))
956                         neigh_hold(neigh);
957         }
958         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
959                 neigh_probe(neigh);
960         } else {
961 out:
962                 write_unlock(&neigh->lock);
963         }
964
965         if (notify)
966                 neigh_update_notify(neigh);
967
968         neigh_release(neigh);
969 }
970
971 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
972 {
973         int rc;
974         bool immediate_probe = false;
975
976         write_lock_bh(&neigh->lock);
977
978         rc = 0;
979         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
980                 goto out_unlock_bh;
981
982         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
983                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
984                         unsigned long next, now = jiffies;
985
986                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
987                         neigh->nud_state     = NUD_INCOMPLETE;
988                         neigh->updated = now;
989                         next = now + max(neigh->parms->retrans_time, HZ/2);
990                         neigh_add_timer(neigh, next);
991                         immediate_probe = true;
992                 } else {
993                         neigh->nud_state = NUD_FAILED;
994                         neigh->updated = jiffies;
995                         write_unlock_bh(&neigh->lock);
996
997                         kfree_skb(skb);
998                         return 1;
999                 }
1000         } else if (neigh->nud_state & NUD_STALE) {
1001                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
1002                 neigh->nud_state = NUD_DELAY;
1003                 neigh->updated = jiffies;
1004                 neigh_add_timer(neigh,
1005                                 jiffies + neigh->parms->delay_probe_time);
1006         }
1007
1008         if (neigh->nud_state == NUD_INCOMPLETE) {
1009                 if (skb) {
1010                         while (neigh->arp_queue_len_bytes + skb->truesize >
1011                                neigh->parms->queue_len_bytes) {
1012                                 struct sk_buff *buff;
1013
1014                                 buff = __skb_dequeue(&neigh->arp_queue);
1015                                 if (!buff)
1016                                         break;
1017                                 neigh->arp_queue_len_bytes -= buff->truesize;
1018                                 kfree_skb(buff);
1019                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1020                         }
1021                         skb_dst_force(skb);
1022                         __skb_queue_tail(&neigh->arp_queue, skb);
1023                         neigh->arp_queue_len_bytes += skb->truesize;
1024                 }
1025                 rc = 1;
1026         }
1027 out_unlock_bh:
1028         if (immediate_probe)
1029                 neigh_probe(neigh);
1030         else
1031                 write_unlock(&neigh->lock);
1032         local_bh_enable();
1033         return rc;
1034 }
1035 EXPORT_SYMBOL(__neigh_event_send);
1036
1037 static void neigh_update_hhs(struct neighbour *neigh)
1038 {
1039         struct hh_cache *hh;
1040         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1041                 = NULL;
1042
1043         if (neigh->dev->header_ops)
1044                 update = neigh->dev->header_ops->cache_update;
1045
1046         if (update) {
1047                 hh = &neigh->hh;
1048                 if (hh->hh_len) {
1049                         write_seqlock_bh(&hh->hh_lock);
1050                         update(hh, neigh->dev, neigh->ha);
1051                         write_sequnlock_bh(&hh->hh_lock);
1052                 }
1053         }
1054 }
1055
1056
1057
1058 /* Generic update routine.
1059    -- lladdr is new lladdr or NULL, if it is not supplied.
1060    -- new    is new state.
1061    -- flags
1062         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1063                                 if it is different.
1064         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1065                                 lladdr instead of overriding it
1066                                 if it is different.
1067                                 It also allows to retain current state
1068                                 if lladdr is unchanged.
1069         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1070
1071         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1072                                 NTF_ROUTER flag.
1073         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1074                                 a router.
1075
1076    Caller MUST hold reference count on the entry.
1077  */
1078
1079 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1080                  u32 flags)
1081 {
1082         u8 old;
1083         int err;
1084         int notify = 0;
1085         struct net_device *dev;
1086         int update_isrouter = 0;
1087
1088         write_lock_bh(&neigh->lock);
1089
1090         dev    = neigh->dev;
1091         old    = neigh->nud_state;
1092         err    = -EPERM;
1093
1094         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1095             (old & (NUD_NOARP | NUD_PERMANENT)))
1096                 goto out;
1097
1098         if (!(new & NUD_VALID)) {
1099                 neigh_del_timer(neigh);
1100                 if (old & NUD_CONNECTED)
1101                         neigh_suspect(neigh);
1102                 neigh->nud_state = new;
1103                 err = 0;
1104                 notify = old & NUD_VALID;
1105                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1106                     (new & NUD_FAILED)) {
1107                         neigh_invalidate(neigh);
1108                         notify = 1;
1109                 }
1110                 goto out;
1111         }
1112
1113         /* Compare new lladdr with cached one */
1114         if (!dev->addr_len) {
1115                 /* First case: device needs no address. */
1116                 lladdr = neigh->ha;
1117         } else if (lladdr) {
1118                 /* The second case: if something is already cached
1119                    and a new address is proposed:
1120                    - compare new & old
1121                    - if they are different, check override flag
1122                  */
1123                 if ((old & NUD_VALID) &&
1124                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1125                         lladdr = neigh->ha;
1126         } else {
1127                 /* No address is supplied; if we know something,
1128                    use it, otherwise discard the request.
1129                  */
1130                 err = -EINVAL;
1131                 if (!(old & NUD_VALID))
1132                         goto out;
1133                 lladdr = neigh->ha;
1134         }
1135
1136         if (new & NUD_CONNECTED)
1137                 neigh->confirmed = jiffies;
1138         neigh->updated = jiffies;
1139
1140         /* If entry was valid and address is not changed,
1141            do not change entry state, if new one is STALE.
1142          */
1143         err = 0;
1144         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1145         if (old & NUD_VALID) {
1146                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1147                         update_isrouter = 0;
1148                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1149                             (old & NUD_CONNECTED)) {
1150                                 lladdr = neigh->ha;
1151                                 new = NUD_STALE;
1152                         } else
1153                                 goto out;
1154                 } else {
1155                         if (lladdr == neigh->ha && new == NUD_STALE &&
1156                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1157                              (old & NUD_CONNECTED))
1158                             )
1159                                 new = old;
1160                 }
1161         }
1162
1163         if (new != old) {
1164                 neigh_del_timer(neigh);
1165                 if (new & NUD_IN_TIMER)
1166                         neigh_add_timer(neigh, (jiffies +
1167                                                 ((new & NUD_REACHABLE) ?
1168                                                  neigh->parms->reachable_time :
1169                                                  0)));
1170                 neigh->nud_state = new;
1171         }
1172
1173         if (lladdr != neigh->ha) {
1174                 write_seqlock(&neigh->ha_lock);
1175                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1176                 write_sequnlock(&neigh->ha_lock);
1177                 neigh_update_hhs(neigh);
1178                 if (!(new & NUD_CONNECTED))
1179                         neigh->confirmed = jiffies -
1180                                       (neigh->parms->base_reachable_time << 1);
1181                 notify = 1;
1182         }
1183         if (new == old)
1184                 goto out;
1185         if (new & NUD_CONNECTED)
1186                 neigh_connect(neigh);
1187         else
1188                 neigh_suspect(neigh);
1189         if (!(old & NUD_VALID)) {
1190                 struct sk_buff *skb;
1191
1192                 /* Again: avoid dead loop if something went wrong */
1193
1194                 while (neigh->nud_state & NUD_VALID &&
1195                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1196                         struct dst_entry *dst = skb_dst(skb);
1197                         struct neighbour *n2, *n1 = neigh;
1198                         write_unlock_bh(&neigh->lock);
1199
1200                         rcu_read_lock();
1201                         /* On shaper/eql skb->dst->neighbour != neigh :( */
1202                         if (dst && (n2 = dst_get_neighbour_noref(dst)) != NULL)
1203                                 n1 = n2;
1204                         n1->output(n1, skb);
1205                         rcu_read_unlock();
1206
1207                         write_lock_bh(&neigh->lock);
1208                 }
1209                 skb_queue_purge(&neigh->arp_queue);
1210                 neigh->arp_queue_len_bytes = 0;
1211         }
1212 out:
1213         if (update_isrouter) {
1214                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1215                         (neigh->flags | NTF_ROUTER) :
1216                         (neigh->flags & ~NTF_ROUTER);
1217         }
1218         write_unlock_bh(&neigh->lock);
1219
1220         if (notify)
1221                 neigh_update_notify(neigh);
1222
1223         return err;
1224 }
1225 EXPORT_SYMBOL(neigh_update);
1226
1227 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1228                                  u8 *lladdr, void *saddr,
1229                                  struct net_device *dev)
1230 {
1231         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1232                                                  lladdr || !dev->addr_len);
1233         if (neigh)
1234                 neigh_update(neigh, lladdr, NUD_STALE,
1235                              NEIGH_UPDATE_F_OVERRIDE);
1236         return neigh;
1237 }
1238 EXPORT_SYMBOL(neigh_event_ns);
1239
1240 /* called with read_lock_bh(&n->lock); */
1241 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1242 {
1243         struct net_device *dev = dst->dev;
1244         __be16 prot = dst->ops->protocol;
1245         struct hh_cache *hh = &n->hh;
1246
1247         write_lock_bh(&n->lock);
1248
1249         /* Only one thread can come in here and initialize the
1250          * hh_cache entry.
1251          */
1252         if (!hh->hh_len)
1253                 dev->header_ops->cache(n, hh, prot);
1254
1255         write_unlock_bh(&n->lock);
1256 }
1257
1258 /* This function can be used in contexts, where only old dev_queue_xmit
1259  * worked, f.e. if you want to override normal output path (eql, shaper),
1260  * but resolution is not made yet.
1261  */
1262
1263 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1264 {
1265         struct net_device *dev = skb->dev;
1266
1267         __skb_pull(skb, skb_network_offset(skb));
1268
1269         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1270                             skb->len) < 0 &&
1271             dev->header_ops->rebuild(skb))
1272                 return 0;
1273
1274         return dev_queue_xmit(skb);
1275 }
1276 EXPORT_SYMBOL(neigh_compat_output);
1277
1278 /* Slow and careful. */
1279
1280 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1281 {
1282         struct dst_entry *dst = skb_dst(skb);
1283         int rc = 0;
1284
1285         if (!dst)
1286                 goto discard;
1287
1288         __skb_pull(skb, skb_network_offset(skb));
1289
1290         if (!neigh_event_send(neigh, skb)) {
1291                 int err;
1292                 struct net_device *dev = neigh->dev;
1293                 unsigned int seq;
1294
1295                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1296                         neigh_hh_init(neigh, dst);
1297
1298                 do {
1299                         seq = read_seqbegin(&neigh->ha_lock);
1300                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1301                                               neigh->ha, NULL, skb->len);
1302                 } while (read_seqretry(&neigh->ha_lock, seq));
1303
1304                 if (err >= 0)
1305                         rc = dev_queue_xmit(skb);
1306                 else
1307                         goto out_kfree_skb;
1308         }
1309 out:
1310         return rc;
1311 discard:
1312         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1313                       dst, neigh);
1314 out_kfree_skb:
1315         rc = -EINVAL;
1316         kfree_skb(skb);
1317         goto out;
1318 }
1319 EXPORT_SYMBOL(neigh_resolve_output);
1320
1321 /* As fast as possible without hh cache */
1322
1323 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1324 {
1325         struct net_device *dev = neigh->dev;
1326         unsigned int seq;
1327         int err;
1328
1329         __skb_pull(skb, skb_network_offset(skb));
1330
1331         do {
1332                 seq = read_seqbegin(&neigh->ha_lock);
1333                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1334                                       neigh->ha, NULL, skb->len);
1335         } while (read_seqretry(&neigh->ha_lock, seq));
1336
1337         if (err >= 0)
1338                 err = dev_queue_xmit(skb);
1339         else {
1340                 err = -EINVAL;
1341                 kfree_skb(skb);
1342         }
1343         return err;
1344 }
1345 EXPORT_SYMBOL(neigh_connected_output);
1346
1347 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1348 {
1349         return dev_queue_xmit(skb);
1350 }
1351 EXPORT_SYMBOL(neigh_direct_output);
1352
1353 static void neigh_proxy_process(unsigned long arg)
1354 {
1355         struct neigh_table *tbl = (struct neigh_table *)arg;
1356         long sched_next = 0;
1357         unsigned long now = jiffies;
1358         struct sk_buff *skb, *n;
1359
1360         spin_lock(&tbl->proxy_queue.lock);
1361
1362         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1363                 long tdif = NEIGH_CB(skb)->sched_next - now;
1364
1365                 if (tdif <= 0) {
1366                         struct net_device *dev = skb->dev;
1367
1368                         __skb_unlink(skb, &tbl->proxy_queue);
1369                         if (tbl->proxy_redo && netif_running(dev)) {
1370                                 rcu_read_lock();
1371                                 tbl->proxy_redo(skb);
1372                                 rcu_read_unlock();
1373                         } else {
1374                                 kfree_skb(skb);
1375                         }
1376
1377                         dev_put(dev);
1378                 } else if (!sched_next || tdif < sched_next)
1379                         sched_next = tdif;
1380         }
1381         del_timer(&tbl->proxy_timer);
1382         if (sched_next)
1383                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1384         spin_unlock(&tbl->proxy_queue.lock);
1385 }
1386
1387 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1388                     struct sk_buff *skb)
1389 {
1390         unsigned long now = jiffies;
1391         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1392
1393         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1394                 kfree_skb(skb);
1395                 return;
1396         }
1397
1398         NEIGH_CB(skb)->sched_next = sched_next;
1399         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1400
1401         spin_lock(&tbl->proxy_queue.lock);
1402         if (del_timer(&tbl->proxy_timer)) {
1403                 if (time_before(tbl->proxy_timer.expires, sched_next))
1404                         sched_next = tbl->proxy_timer.expires;
1405         }
1406         skb_dst_drop(skb);
1407         dev_hold(skb->dev);
1408         __skb_queue_tail(&tbl->proxy_queue, skb);
1409         mod_timer(&tbl->proxy_timer, sched_next);
1410         spin_unlock(&tbl->proxy_queue.lock);
1411 }
1412 EXPORT_SYMBOL(pneigh_enqueue);
1413
1414 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1415                                                       struct net *net, int ifindex)
1416 {
1417         struct neigh_parms *p;
1418
1419         for (p = &tbl->parms; p; p = p->next) {
1420                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1421                     (!p->dev && !ifindex))
1422                         return p;
1423         }
1424
1425         return NULL;
1426 }
1427
1428 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1429                                       struct neigh_table *tbl)
1430 {
1431         struct neigh_parms *p, *ref;
1432         struct net *net = dev_net(dev);
1433         const struct net_device_ops *ops = dev->netdev_ops;
1434
1435         ref = lookup_neigh_parms(tbl, net, 0);
1436         if (!ref)
1437                 return NULL;
1438
1439         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1440         if (p) {
1441                 p->tbl            = tbl;
1442                 atomic_set(&p->refcnt, 1);
1443                 p->reachable_time =
1444                                 neigh_rand_reach_time(p->base_reachable_time);
1445
1446                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1447                         kfree(p);
1448                         return NULL;
1449                 }
1450
1451                 dev_hold(dev);
1452                 p->dev = dev;
1453                 write_pnet(&p->net, hold_net(net));
1454                 p->sysctl_table = NULL;
1455                 write_lock_bh(&tbl->lock);
1456                 p->next         = tbl->parms.next;
1457                 tbl->parms.next = p;
1458                 write_unlock_bh(&tbl->lock);
1459         }
1460         return p;
1461 }
1462 EXPORT_SYMBOL(neigh_parms_alloc);
1463
1464 static void neigh_rcu_free_parms(struct rcu_head *head)
1465 {
1466         struct neigh_parms *parms =
1467                 container_of(head, struct neigh_parms, rcu_head);
1468
1469         neigh_parms_put(parms);
1470 }
1471
1472 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1473 {
1474         struct neigh_parms **p;
1475
1476         if (!parms || parms == &tbl->parms)
1477                 return;
1478         write_lock_bh(&tbl->lock);
1479         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1480                 if (*p == parms) {
1481                         *p = parms->next;
1482                         parms->dead = 1;
1483                         write_unlock_bh(&tbl->lock);
1484                         if (parms->dev)
1485                                 dev_put(parms->dev);
1486                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1487                         return;
1488                 }
1489         }
1490         write_unlock_bh(&tbl->lock);
1491         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1492 }
1493 EXPORT_SYMBOL(neigh_parms_release);
1494
1495 static void neigh_parms_destroy(struct neigh_parms *parms)
1496 {
1497         release_net(neigh_parms_net(parms));
1498         kfree(parms);
1499 }
1500
1501 static struct lock_class_key neigh_table_proxy_queue_class;
1502
1503 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1504 {
1505         unsigned long now = jiffies;
1506         unsigned long phsize;
1507
1508         write_pnet(&tbl->parms.net, &init_net);
1509         atomic_set(&tbl->parms.refcnt, 1);
1510         tbl->parms.reachable_time =
1511                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1512
1513         tbl->stats = alloc_percpu(struct neigh_statistics);
1514         if (!tbl->stats)
1515                 panic("cannot create neighbour cache statistics");
1516
1517 #ifdef CONFIG_PROC_FS
1518         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1519                               &neigh_stat_seq_fops, tbl))
1520                 panic("cannot create neighbour proc dir entry");
1521 #endif
1522
1523         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1524
1525         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1526         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1527
1528         if (!tbl->nht || !tbl->phash_buckets)
1529                 panic("cannot allocate neighbour cache hashes");
1530
1531         rwlock_init(&tbl->lock);
1532         INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1533         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1534         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1535         skb_queue_head_init_class(&tbl->proxy_queue,
1536                         &neigh_table_proxy_queue_class);
1537
1538         tbl->last_flush = now;
1539         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1540 }
1541 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1542
1543 void neigh_table_init(struct neigh_table *tbl)
1544 {
1545         struct neigh_table *tmp;
1546
1547         neigh_table_init_no_netlink(tbl);
1548         write_lock(&neigh_tbl_lock);
1549         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1550                 if (tmp->family == tbl->family)
1551                         break;
1552         }
1553         tbl->next       = neigh_tables;
1554         neigh_tables    = tbl;
1555         write_unlock(&neigh_tbl_lock);
1556
1557         if (unlikely(tmp)) {
1558                 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1559                        "family %d\n", tbl->family);
1560                 dump_stack();
1561         }
1562 }
1563 EXPORT_SYMBOL(neigh_table_init);
1564
1565 int neigh_table_clear(struct neigh_table *tbl)
1566 {
1567         struct neigh_table **tp;
1568
1569         /* It is not clean... Fix it to unload IPv6 module safely */
1570         cancel_delayed_work_sync(&tbl->gc_work);
1571         del_timer_sync(&tbl->proxy_timer);
1572         pneigh_queue_purge(&tbl->proxy_queue);
1573         neigh_ifdown(tbl, NULL);
1574         if (atomic_read(&tbl->entries))
1575                 printk(KERN_CRIT "neighbour leakage\n");
1576         write_lock(&neigh_tbl_lock);
1577         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1578                 if (*tp == tbl) {
1579                         *tp = tbl->next;
1580                         break;
1581                 }
1582         }
1583         write_unlock(&neigh_tbl_lock);
1584
1585         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1586                  neigh_hash_free_rcu);
1587         tbl->nht = NULL;
1588
1589         kfree(tbl->phash_buckets);
1590         tbl->phash_buckets = NULL;
1591
1592         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1593
1594         free_percpu(tbl->stats);
1595         tbl->stats = NULL;
1596
1597         return 0;
1598 }
1599 EXPORT_SYMBOL(neigh_table_clear);
1600
1601 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1602 {
1603         struct net *net = sock_net(skb->sk);
1604         struct ndmsg *ndm;
1605         struct nlattr *dst_attr;
1606         struct neigh_table *tbl;
1607         struct net_device *dev = NULL;
1608         int err = -EINVAL;
1609
1610         ASSERT_RTNL();
1611         if (nlmsg_len(nlh) < sizeof(*ndm))
1612                 goto out;
1613
1614         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1615         if (dst_attr == NULL)
1616                 goto out;
1617
1618         ndm = nlmsg_data(nlh);
1619         if (ndm->ndm_ifindex) {
1620                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1621                 if (dev == NULL) {
1622                         err = -ENODEV;
1623                         goto out;
1624                 }
1625         }
1626
1627         read_lock(&neigh_tbl_lock);
1628         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1629                 struct neighbour *neigh;
1630
1631                 if (tbl->family != ndm->ndm_family)
1632                         continue;
1633                 read_unlock(&neigh_tbl_lock);
1634
1635                 if (nla_len(dst_attr) < tbl->key_len)
1636                         goto out;
1637
1638                 if (ndm->ndm_flags & NTF_PROXY) {
1639                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1640                         goto out;
1641                 }
1642
1643                 if (dev == NULL)
1644                         goto out;
1645
1646                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1647                 if (neigh == NULL) {
1648                         err = -ENOENT;
1649                         goto out;
1650                 }
1651
1652                 err = neigh_update(neigh, NULL, NUD_FAILED,
1653                                    NEIGH_UPDATE_F_OVERRIDE |
1654                                    NEIGH_UPDATE_F_ADMIN);
1655                 neigh_release(neigh);
1656                 goto out;
1657         }
1658         read_unlock(&neigh_tbl_lock);
1659         err = -EAFNOSUPPORT;
1660
1661 out:
1662         return err;
1663 }
1664
1665 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1666 {
1667         struct net *net = sock_net(skb->sk);
1668         struct ndmsg *ndm;
1669         struct nlattr *tb[NDA_MAX+1];
1670         struct neigh_table *tbl;
1671         struct net_device *dev = NULL;
1672         int err;
1673
1674         ASSERT_RTNL();
1675         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1676         if (err < 0)
1677                 goto out;
1678
1679         err = -EINVAL;
1680         if (tb[NDA_DST] == NULL)
1681                 goto out;
1682
1683         ndm = nlmsg_data(nlh);
1684         if (ndm->ndm_ifindex) {
1685                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1686                 if (dev == NULL) {
1687                         err = -ENODEV;
1688                         goto out;
1689                 }
1690
1691                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1692                         goto out;
1693         }
1694
1695         read_lock(&neigh_tbl_lock);
1696         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1697                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1698                 struct neighbour *neigh;
1699                 void *dst, *lladdr;
1700
1701                 if (tbl->family != ndm->ndm_family)
1702                         continue;
1703                 read_unlock(&neigh_tbl_lock);
1704
1705                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1706                         goto out;
1707                 dst = nla_data(tb[NDA_DST]);
1708                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1709
1710                 if (ndm->ndm_flags & NTF_PROXY) {
1711                         struct pneigh_entry *pn;
1712
1713                         err = -ENOBUFS;
1714                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1715                         if (pn) {
1716                                 pn->flags = ndm->ndm_flags;
1717                                 err = 0;
1718                         }
1719                         goto out;
1720                 }
1721
1722                 if (dev == NULL)
1723                         goto out;
1724
1725                 neigh = neigh_lookup(tbl, dst, dev);
1726                 if (neigh == NULL) {
1727                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1728                                 err = -ENOENT;
1729                                 goto out;
1730                         }
1731
1732                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1733                         if (IS_ERR(neigh)) {
1734                                 err = PTR_ERR(neigh);
1735                                 goto out;
1736                         }
1737                 } else {
1738                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1739                                 err = -EEXIST;
1740                                 neigh_release(neigh);
1741                                 goto out;
1742                         }
1743
1744                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1745                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1746                 }
1747
1748                 if (ndm->ndm_flags & NTF_USE) {
1749                         neigh_event_send(neigh, NULL);
1750                         err = 0;
1751                 } else
1752                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1753                 neigh_release(neigh);
1754                 goto out;
1755         }
1756
1757         read_unlock(&neigh_tbl_lock);
1758         err = -EAFNOSUPPORT;
1759 out:
1760         return err;
1761 }
1762
1763 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1764 {
1765         struct nlattr *nest;
1766
1767         nest = nla_nest_start(skb, NDTA_PARMS);
1768         if (nest == NULL)
1769                 return -ENOBUFS;
1770
1771         if (parms->dev)
1772                 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1773
1774         NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1775         NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes);
1776         /* approximative value for deprecated QUEUE_LEN (in packets) */
1777         NLA_PUT_U32(skb, NDTPA_QUEUE_LEN,
1778                     DIV_ROUND_UP(parms->queue_len_bytes,
1779                                  SKB_TRUESIZE(ETH_FRAME_LEN)));
1780         NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1781         NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1782         NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1783         NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1784         NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1785         NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1786                       parms->base_reachable_time);
1787         NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1788         NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1789         NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1790         NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1791         NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1792         NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1793
1794         return nla_nest_end(skb, nest);
1795
1796 nla_put_failure:
1797         nla_nest_cancel(skb, nest);
1798         return -EMSGSIZE;
1799 }
1800
1801 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1802                               u32 pid, u32 seq, int type, int flags)
1803 {
1804         struct nlmsghdr *nlh;
1805         struct ndtmsg *ndtmsg;
1806
1807         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1808         if (nlh == NULL)
1809                 return -EMSGSIZE;
1810
1811         ndtmsg = nlmsg_data(nlh);
1812
1813         read_lock_bh(&tbl->lock);
1814         ndtmsg->ndtm_family = tbl->family;
1815         ndtmsg->ndtm_pad1   = 0;
1816         ndtmsg->ndtm_pad2   = 0;
1817
1818         NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1819         NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1820         NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1821         NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1822         NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1823
1824         {
1825                 unsigned long now = jiffies;
1826                 unsigned int flush_delta = now - tbl->last_flush;
1827                 unsigned int rand_delta = now - tbl->last_rand;
1828                 struct neigh_hash_table *nht;
1829                 struct ndt_config ndc = {
1830                         .ndtc_key_len           = tbl->key_len,
1831                         .ndtc_entry_size        = tbl->entry_size,
1832                         .ndtc_entries           = atomic_read(&tbl->entries),
1833                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1834                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1835                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1836                 };
1837
1838                 rcu_read_lock_bh();
1839                 nht = rcu_dereference_bh(tbl->nht);
1840                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1841                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1842                 rcu_read_unlock_bh();
1843
1844                 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1845         }
1846
1847         {
1848                 int cpu;
1849                 struct ndt_stats ndst;
1850
1851                 memset(&ndst, 0, sizeof(ndst));
1852
1853                 for_each_possible_cpu(cpu) {
1854                         struct neigh_statistics *st;
1855
1856                         st = per_cpu_ptr(tbl->stats, cpu);
1857                         ndst.ndts_allocs                += st->allocs;
1858                         ndst.ndts_destroys              += st->destroys;
1859                         ndst.ndts_hash_grows            += st->hash_grows;
1860                         ndst.ndts_res_failed            += st->res_failed;
1861                         ndst.ndts_lookups               += st->lookups;
1862                         ndst.ndts_hits                  += st->hits;
1863                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1864                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1865                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1866                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1867                 }
1868
1869                 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1870         }
1871
1872         BUG_ON(tbl->parms.dev);
1873         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1874                 goto nla_put_failure;
1875
1876         read_unlock_bh(&tbl->lock);
1877         return nlmsg_end(skb, nlh);
1878
1879 nla_put_failure:
1880         read_unlock_bh(&tbl->lock);
1881         nlmsg_cancel(skb, nlh);
1882         return -EMSGSIZE;
1883 }
1884
1885 static int neightbl_fill_param_info(struct sk_buff *skb,
1886                                     struct neigh_table *tbl,
1887                                     struct neigh_parms *parms,
1888                                     u32 pid, u32 seq, int type,
1889                                     unsigned int flags)
1890 {
1891         struct ndtmsg *ndtmsg;
1892         struct nlmsghdr *nlh;
1893
1894         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1895         if (nlh == NULL)
1896                 return -EMSGSIZE;
1897
1898         ndtmsg = nlmsg_data(nlh);
1899
1900         read_lock_bh(&tbl->lock);
1901         ndtmsg->ndtm_family = tbl->family;
1902         ndtmsg->ndtm_pad1   = 0;
1903         ndtmsg->ndtm_pad2   = 0;
1904
1905         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1906             neightbl_fill_parms(skb, parms) < 0)
1907                 goto errout;
1908
1909         read_unlock_bh(&tbl->lock);
1910         return nlmsg_end(skb, nlh);
1911 errout:
1912         read_unlock_bh(&tbl->lock);
1913         nlmsg_cancel(skb, nlh);
1914         return -EMSGSIZE;
1915 }
1916
1917 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1918         [NDTA_NAME]             = { .type = NLA_STRING },
1919         [NDTA_THRESH1]          = { .type = NLA_U32 },
1920         [NDTA_THRESH2]          = { .type = NLA_U32 },
1921         [NDTA_THRESH3]          = { .type = NLA_U32 },
1922         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1923         [NDTA_PARMS]            = { .type = NLA_NESTED },
1924 };
1925
1926 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1927         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1928         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1929         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1930         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1931         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1932         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1933         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1934         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1935         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1936         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1937         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1938         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1939         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1940 };
1941
1942 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1943 {
1944         struct net *net = sock_net(skb->sk);
1945         struct neigh_table *tbl;
1946         struct ndtmsg *ndtmsg;
1947         struct nlattr *tb[NDTA_MAX+1];
1948         int err;
1949
1950         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1951                           nl_neightbl_policy);
1952         if (err < 0)
1953                 goto errout;
1954
1955         if (tb[NDTA_NAME] == NULL) {
1956                 err = -EINVAL;
1957                 goto errout;
1958         }
1959
1960         ndtmsg = nlmsg_data(nlh);
1961         read_lock(&neigh_tbl_lock);
1962         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1963                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1964                         continue;
1965
1966                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1967                         break;
1968         }
1969
1970         if (tbl == NULL) {
1971                 err = -ENOENT;
1972                 goto errout_locked;
1973         }
1974
1975         /*
1976          * We acquire tbl->lock to be nice to the periodic timers and
1977          * make sure they always see a consistent set of values.
1978          */
1979         write_lock_bh(&tbl->lock);
1980
1981         if (tb[NDTA_PARMS]) {
1982                 struct nlattr *tbp[NDTPA_MAX+1];
1983                 struct neigh_parms *p;
1984                 int i, ifindex = 0;
1985
1986                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1987                                        nl_ntbl_parm_policy);
1988                 if (err < 0)
1989                         goto errout_tbl_lock;
1990
1991                 if (tbp[NDTPA_IFINDEX])
1992                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1993
1994                 p = lookup_neigh_parms(tbl, net, ifindex);
1995                 if (p == NULL) {
1996                         err = -ENOENT;
1997                         goto errout_tbl_lock;
1998                 }
1999
2000                 for (i = 1; i <= NDTPA_MAX; i++) {
2001                         if (tbp[i] == NULL)
2002                                 continue;
2003
2004                         switch (i) {
2005                         case NDTPA_QUEUE_LEN:
2006                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2007                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
2008                                 break;
2009                         case NDTPA_QUEUE_LENBYTES:
2010                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
2011                                 break;
2012                         case NDTPA_PROXY_QLEN:
2013                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2014                                 break;
2015                         case NDTPA_APP_PROBES:
2016                                 p->app_probes = nla_get_u32(tbp[i]);
2017                                 break;
2018                         case NDTPA_UCAST_PROBES:
2019                                 p->ucast_probes = nla_get_u32(tbp[i]);
2020                                 break;
2021                         case NDTPA_MCAST_PROBES:
2022                                 p->mcast_probes = nla_get_u32(tbp[i]);
2023                                 break;
2024                         case NDTPA_BASE_REACHABLE_TIME:
2025                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2026                                 break;
2027                         case NDTPA_GC_STALETIME:
2028                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2029                                 break;
2030                         case NDTPA_DELAY_PROBE_TIME:
2031                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2032                                 break;
2033                         case NDTPA_RETRANS_TIME:
2034                                 p->retrans_time = nla_get_msecs(tbp[i]);
2035                                 break;
2036                         case NDTPA_ANYCAST_DELAY:
2037                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2038                                 break;
2039                         case NDTPA_PROXY_DELAY:
2040                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2041                                 break;
2042                         case NDTPA_LOCKTIME:
2043                                 p->locktime = nla_get_msecs(tbp[i]);
2044                                 break;
2045                         }
2046                 }
2047         }
2048
2049         if (tb[NDTA_THRESH1])
2050                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2051
2052         if (tb[NDTA_THRESH2])
2053                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2054
2055         if (tb[NDTA_THRESH3])
2056                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2057
2058         if (tb[NDTA_GC_INTERVAL])
2059                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2060
2061         err = 0;
2062
2063 errout_tbl_lock:
2064         write_unlock_bh(&tbl->lock);
2065 errout_locked:
2066         read_unlock(&neigh_tbl_lock);
2067 errout:
2068         return err;
2069 }
2070
2071 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2072 {
2073         struct net *net = sock_net(skb->sk);
2074         int family, tidx, nidx = 0;
2075         int tbl_skip = cb->args[0];
2076         int neigh_skip = cb->args[1];
2077         struct neigh_table *tbl;
2078
2079         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2080
2081         read_lock(&neigh_tbl_lock);
2082         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2083                 struct neigh_parms *p;
2084
2085                 if (tidx < tbl_skip || (family && tbl->family != family))
2086                         continue;
2087
2088                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2089                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2090                                        NLM_F_MULTI) <= 0)
2091                         break;
2092
2093                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2094                         if (!net_eq(neigh_parms_net(p), net))
2095                                 continue;
2096
2097                         if (nidx < neigh_skip)
2098                                 goto next;
2099
2100                         if (neightbl_fill_param_info(skb, tbl, p,
2101                                                      NETLINK_CB(cb->skb).pid,
2102                                                      cb->nlh->nlmsg_seq,
2103                                                      RTM_NEWNEIGHTBL,
2104                                                      NLM_F_MULTI) <= 0)
2105                                 goto out;
2106                 next:
2107                         nidx++;
2108                 }
2109
2110                 neigh_skip = 0;
2111         }
2112 out:
2113         read_unlock(&neigh_tbl_lock);
2114         cb->args[0] = tidx;
2115         cb->args[1] = nidx;
2116
2117         return skb->len;
2118 }
2119
2120 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2121                            u32 pid, u32 seq, int type, unsigned int flags)
2122 {
2123         unsigned long now = jiffies;
2124         struct nda_cacheinfo ci;
2125         struct nlmsghdr *nlh;
2126         struct ndmsg *ndm;
2127
2128         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2129         if (nlh == NULL)
2130                 return -EMSGSIZE;
2131
2132         ndm = nlmsg_data(nlh);
2133         ndm->ndm_family  = neigh->ops->family;
2134         ndm->ndm_pad1    = 0;
2135         ndm->ndm_pad2    = 0;
2136         ndm->ndm_flags   = neigh->flags;
2137         ndm->ndm_type    = neigh->type;
2138         ndm->ndm_ifindex = neigh->dev->ifindex;
2139
2140         NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2141
2142         read_lock_bh(&neigh->lock);
2143         ndm->ndm_state   = neigh->nud_state;
2144         if (neigh->nud_state & NUD_VALID) {
2145                 char haddr[MAX_ADDR_LEN];
2146
2147                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2148                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2149                         read_unlock_bh(&neigh->lock);
2150                         goto nla_put_failure;
2151                 }
2152         }
2153
2154         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2155         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2156         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2157         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2158         read_unlock_bh(&neigh->lock);
2159
2160         NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2161         NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2162
2163         return nlmsg_end(skb, nlh);
2164
2165 nla_put_failure:
2166         nlmsg_cancel(skb, nlh);
2167         return -EMSGSIZE;
2168 }
2169
2170 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2171                             u32 pid, u32 seq, int type, unsigned int flags,
2172                             struct neigh_table *tbl)
2173 {
2174         struct nlmsghdr *nlh;
2175         struct ndmsg *ndm;
2176
2177         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2178         if (nlh == NULL)
2179                 return -EMSGSIZE;
2180
2181         ndm = nlmsg_data(nlh);
2182         ndm->ndm_family  = tbl->family;
2183         ndm->ndm_pad1    = 0;
2184         ndm->ndm_pad2    = 0;
2185         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2186         ndm->ndm_type    = NDA_DST;
2187         ndm->ndm_ifindex = pn->dev->ifindex;
2188         ndm->ndm_state   = NUD_NONE;
2189
2190         NLA_PUT(skb, NDA_DST, tbl->key_len, pn->key);
2191
2192         return nlmsg_end(skb, nlh);
2193
2194 nla_put_failure:
2195         nlmsg_cancel(skb, nlh);
2196         return -EMSGSIZE;
2197 }
2198
2199 static void neigh_update_notify(struct neighbour *neigh)
2200 {
2201         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2202         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2203 }
2204
2205 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2206                             struct netlink_callback *cb)
2207 {
2208         struct net *net = sock_net(skb->sk);
2209         struct neighbour *n;
2210         int rc, h, s_h = cb->args[1];
2211         int idx, s_idx = idx = cb->args[2];
2212         struct neigh_hash_table *nht;
2213
2214         rcu_read_lock_bh();
2215         nht = rcu_dereference_bh(tbl->nht);
2216
2217         for (h = 0; h < (1 << nht->hash_shift); h++) {
2218                 if (h < s_h)
2219                         continue;
2220                 if (h > s_h)
2221                         s_idx = 0;
2222                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2223                      n != NULL;
2224                      n = rcu_dereference_bh(n->next)) {
2225                         if (!net_eq(dev_net(n->dev), net))
2226                                 continue;
2227                         if (idx < s_idx)
2228                                 goto next;
2229                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2230                                             cb->nlh->nlmsg_seq,
2231                                             RTM_NEWNEIGH,
2232                                             NLM_F_MULTI) <= 0) {
2233                                 rc = -1;
2234                                 goto out;
2235                         }
2236 next:
2237                         idx++;
2238                 }
2239         }
2240         rc = skb->len;
2241 out:
2242         rcu_read_unlock_bh();
2243         cb->args[1] = h;
2244         cb->args[2] = idx;
2245         return rc;
2246 }
2247
2248 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2249                              struct netlink_callback *cb)
2250 {
2251         struct pneigh_entry *n;
2252         struct net *net = sock_net(skb->sk);
2253         int rc, h, s_h = cb->args[3];
2254         int idx, s_idx = idx = cb->args[4];
2255
2256         read_lock_bh(&tbl->lock);
2257
2258         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
2259                 if (h < s_h)
2260                         continue;
2261                 if (h > s_h)
2262                         s_idx = 0;
2263                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2264                         if (dev_net(n->dev) != net)
2265                                 continue;
2266                         if (idx < s_idx)
2267                                 goto next;
2268                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2269                                             cb->nlh->nlmsg_seq,
2270                                             RTM_NEWNEIGH,
2271                                             NLM_F_MULTI, tbl) <= 0) {
2272                                 read_unlock_bh(&tbl->lock);
2273                                 rc = -1;
2274                                 goto out;
2275                         }
2276                 next:
2277                         idx++;
2278                 }
2279         }
2280
2281         read_unlock_bh(&tbl->lock);
2282         rc = skb->len;
2283 out:
2284         cb->args[3] = h;
2285         cb->args[4] = idx;
2286         return rc;
2287
2288 }
2289
2290 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2291 {
2292         struct neigh_table *tbl;
2293         int t, family, s_t;
2294         int proxy = 0;
2295         int err = 0;
2296
2297         read_lock(&neigh_tbl_lock);
2298         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2299
2300         /* check for full ndmsg structure presence, family member is
2301          * the same for both structures
2302          */
2303         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2304             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2305                 proxy = 1;
2306
2307         s_t = cb->args[0];
2308
2309         for (tbl = neigh_tables, t = 0; tbl && (err >= 0);
2310              tbl = tbl->next, t++) {
2311                 if (t < s_t || (family && tbl->family != family))
2312                         continue;
2313                 if (t > s_t)
2314                         memset(&cb->args[1], 0, sizeof(cb->args) -
2315                                                 sizeof(cb->args[0]));
2316                 if (proxy)
2317                         err = pneigh_dump_table(tbl, skb, cb);
2318                 else
2319                         err = neigh_dump_table(tbl, skb, cb);
2320         }
2321         read_unlock(&neigh_tbl_lock);
2322
2323         cb->args[0] = t;
2324         return skb->len;
2325 }
2326
2327 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2328 {
2329         int chain;
2330         struct neigh_hash_table *nht;
2331
2332         rcu_read_lock_bh();
2333         nht = rcu_dereference_bh(tbl->nht);
2334
2335         read_lock(&tbl->lock); /* avoid resizes */
2336         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2337                 struct neighbour *n;
2338
2339                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2340                      n != NULL;
2341                      n = rcu_dereference_bh(n->next))
2342                         cb(n, cookie);
2343         }
2344         read_unlock(&tbl->lock);
2345         rcu_read_unlock_bh();
2346 }
2347 EXPORT_SYMBOL(neigh_for_each);
2348
2349 /* The tbl->lock must be held as a writer and BH disabled. */
2350 void __neigh_for_each_release(struct neigh_table *tbl,
2351                               int (*cb)(struct neighbour *))
2352 {
2353         int chain;
2354         struct neigh_hash_table *nht;
2355
2356         nht = rcu_dereference_protected(tbl->nht,
2357                                         lockdep_is_held(&tbl->lock));
2358         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2359                 struct neighbour *n;
2360                 struct neighbour __rcu **np;
2361
2362                 np = &nht->hash_buckets[chain];
2363                 while ((n = rcu_dereference_protected(*np,
2364                                         lockdep_is_held(&tbl->lock))) != NULL) {
2365                         int release;
2366
2367                         write_lock(&n->lock);
2368                         release = cb(n);
2369                         if (release) {
2370                                 rcu_assign_pointer(*np,
2371                                         rcu_dereference_protected(n->next,
2372                                                 lockdep_is_held(&tbl->lock)));
2373                                 n->dead = 1;
2374                         } else
2375                                 np = &n->next;
2376                         write_unlock(&n->lock);
2377                         if (release)
2378                                 neigh_cleanup_and_release(n);
2379                 }
2380         }
2381 }
2382 EXPORT_SYMBOL(__neigh_for_each_release);
2383
2384 #ifdef CONFIG_PROC_FS
2385
2386 static struct neighbour *neigh_get_first(struct seq_file *seq)
2387 {
2388         struct neigh_seq_state *state = seq->private;
2389         struct net *net = seq_file_net(seq);
2390         struct neigh_hash_table *nht = state->nht;
2391         struct neighbour *n = NULL;
2392         int bucket = state->bucket;
2393
2394         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2395         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2396                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2397
2398                 while (n) {
2399                         if (!net_eq(dev_net(n->dev), net))
2400                                 goto next;
2401                         if (state->neigh_sub_iter) {
2402                                 loff_t fakep = 0;
2403                                 void *v;
2404
2405                                 v = state->neigh_sub_iter(state, n, &fakep);
2406                                 if (!v)
2407                                         goto next;
2408                         }
2409                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2410                                 break;
2411                         if (n->nud_state & ~NUD_NOARP)
2412                                 break;
2413 next:
2414                         n = rcu_dereference_bh(n->next);
2415                 }
2416
2417                 if (n)
2418                         break;
2419         }
2420         state->bucket = bucket;
2421
2422         return n;
2423 }
2424
2425 static struct neighbour *neigh_get_next(struct seq_file *seq,
2426                                         struct neighbour *n,
2427                                         loff_t *pos)
2428 {
2429         struct neigh_seq_state *state = seq->private;
2430         struct net *net = seq_file_net(seq);
2431         struct neigh_hash_table *nht = state->nht;
2432
2433         if (state->neigh_sub_iter) {
2434                 void *v = state->neigh_sub_iter(state, n, pos);
2435                 if (v)
2436                         return n;
2437         }
2438         n = rcu_dereference_bh(n->next);
2439
2440         while (1) {
2441                 while (n) {
2442                         if (!net_eq(dev_net(n->dev), net))
2443                                 goto next;
2444                         if (state->neigh_sub_iter) {
2445                                 void *v = state->neigh_sub_iter(state, n, pos);
2446                                 if (v)
2447                                         return n;
2448                                 goto next;
2449                         }
2450                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2451                                 break;
2452
2453                         if (n->nud_state & ~NUD_NOARP)
2454                                 break;
2455 next:
2456                         n = rcu_dereference_bh(n->next);
2457                 }
2458
2459                 if (n)
2460                         break;
2461
2462                 if (++state->bucket >= (1 << nht->hash_shift))
2463                         break;
2464
2465                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2466         }
2467
2468         if (n && pos)
2469                 --(*pos);
2470         return n;
2471 }
2472
2473 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2474 {
2475         struct neighbour *n = neigh_get_first(seq);
2476
2477         if (n) {
2478                 --(*pos);
2479                 while (*pos) {
2480                         n = neigh_get_next(seq, n, pos);
2481                         if (!n)
2482                                 break;
2483                 }
2484         }
2485         return *pos ? NULL : n;
2486 }
2487
2488 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2489 {
2490         struct neigh_seq_state *state = seq->private;
2491         struct net *net = seq_file_net(seq);
2492         struct neigh_table *tbl = state->tbl;
2493         struct pneigh_entry *pn = NULL;
2494         int bucket = state->bucket;
2495
2496         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2497         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2498                 pn = tbl->phash_buckets[bucket];
2499                 while (pn && !net_eq(pneigh_net(pn), net))
2500                         pn = pn->next;
2501                 if (pn)
2502                         break;
2503         }
2504         state->bucket = bucket;
2505
2506         return pn;
2507 }
2508
2509 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2510                                             struct pneigh_entry *pn,
2511                                             loff_t *pos)
2512 {
2513         struct neigh_seq_state *state = seq->private;
2514         struct net *net = seq_file_net(seq);
2515         struct neigh_table *tbl = state->tbl;
2516
2517         do {
2518                 pn = pn->next;
2519         } while (pn && !net_eq(pneigh_net(pn), net));
2520
2521         while (!pn) {
2522                 if (++state->bucket > PNEIGH_HASHMASK)
2523                         break;
2524                 pn = tbl->phash_buckets[state->bucket];
2525                 while (pn && !net_eq(pneigh_net(pn), net))
2526                         pn = pn->next;
2527                 if (pn)
2528                         break;
2529         }
2530
2531         if (pn && pos)
2532                 --(*pos);
2533
2534         return pn;
2535 }
2536
2537 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2538 {
2539         struct pneigh_entry *pn = pneigh_get_first(seq);
2540
2541         if (pn) {
2542                 --(*pos);
2543                 while (*pos) {
2544                         pn = pneigh_get_next(seq, pn, pos);
2545                         if (!pn)
2546                                 break;
2547                 }
2548         }
2549         return *pos ? NULL : pn;
2550 }
2551
2552 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2553 {
2554         struct neigh_seq_state *state = seq->private;
2555         void *rc;
2556         loff_t idxpos = *pos;
2557
2558         rc = neigh_get_idx(seq, &idxpos);
2559         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2560                 rc = pneigh_get_idx(seq, &idxpos);
2561
2562         return rc;
2563 }
2564
2565 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2566         __acquires(rcu_bh)
2567 {
2568         struct neigh_seq_state *state = seq->private;
2569
2570         state->tbl = tbl;
2571         state->bucket = 0;
2572         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2573
2574         rcu_read_lock_bh();
2575         state->nht = rcu_dereference_bh(tbl->nht);
2576
2577         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2578 }
2579 EXPORT_SYMBOL(neigh_seq_start);
2580
2581 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2582 {
2583         struct neigh_seq_state *state;
2584         void *rc;
2585
2586         if (v == SEQ_START_TOKEN) {
2587                 rc = neigh_get_first(seq);
2588                 goto out;
2589         }
2590
2591         state = seq->private;
2592         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2593                 rc = neigh_get_next(seq, v, NULL);
2594                 if (rc)
2595                         goto out;
2596                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2597                         rc = pneigh_get_first(seq);
2598         } else {
2599                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2600                 rc = pneigh_get_next(seq, v, NULL);
2601         }
2602 out:
2603         ++(*pos);
2604         return rc;
2605 }
2606 EXPORT_SYMBOL(neigh_seq_next);
2607
2608 void neigh_seq_stop(struct seq_file *seq, void *v)
2609         __releases(rcu_bh)
2610 {
2611         rcu_read_unlock_bh();
2612 }
2613 EXPORT_SYMBOL(neigh_seq_stop);
2614
2615 /* statistics via seq_file */
2616
2617 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2618 {
2619         struct neigh_table *tbl = seq->private;
2620         int cpu;
2621
2622         if (*pos == 0)
2623                 return SEQ_START_TOKEN;
2624
2625         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2626                 if (!cpu_possible(cpu))
2627                         continue;
2628                 *pos = cpu+1;
2629                 return per_cpu_ptr(tbl->stats, cpu);
2630         }
2631         return NULL;
2632 }
2633
2634 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2635 {
2636         struct neigh_table *tbl = seq->private;
2637         int cpu;
2638
2639         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2640                 if (!cpu_possible(cpu))
2641                         continue;
2642                 *pos = cpu+1;
2643                 return per_cpu_ptr(tbl->stats, cpu);
2644         }
2645         return NULL;
2646 }
2647
2648 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2649 {
2650
2651 }
2652
2653 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2654 {
2655         struct neigh_table *tbl = seq->private;
2656         struct neigh_statistics *st = v;
2657
2658         if (v == SEQ_START_TOKEN) {
2659                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2660                 return 0;
2661         }
2662
2663         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2664                         "%08lx %08lx  %08lx %08lx %08lx\n",
2665                    atomic_read(&tbl->entries),
2666
2667                    st->allocs,
2668                    st->destroys,
2669                    st->hash_grows,
2670
2671                    st->lookups,
2672                    st->hits,
2673
2674                    st->res_failed,
2675
2676                    st->rcv_probes_mcast,
2677                    st->rcv_probes_ucast,
2678
2679                    st->periodic_gc_runs,
2680                    st->forced_gc_runs,
2681                    st->unres_discards
2682                    );
2683
2684         return 0;
2685 }
2686
2687 static const struct seq_operations neigh_stat_seq_ops = {
2688         .start  = neigh_stat_seq_start,
2689         .next   = neigh_stat_seq_next,
2690         .stop   = neigh_stat_seq_stop,
2691         .show   = neigh_stat_seq_show,
2692 };
2693
2694 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2695 {
2696         int ret = seq_open(file, &neigh_stat_seq_ops);
2697
2698         if (!ret) {
2699                 struct seq_file *sf = file->private_data;
2700                 sf->private = PDE(inode)->data;
2701         }
2702         return ret;
2703 };
2704
2705 static const struct file_operations neigh_stat_seq_fops = {
2706         .owner   = THIS_MODULE,
2707         .open    = neigh_stat_seq_open,
2708         .read    = seq_read,
2709         .llseek  = seq_lseek,
2710         .release = seq_release,
2711 };
2712
2713 #endif /* CONFIG_PROC_FS */
2714
2715 static inline size_t neigh_nlmsg_size(void)
2716 {
2717         return NLMSG_ALIGN(sizeof(struct ndmsg))
2718                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2719                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2720                + nla_total_size(sizeof(struct nda_cacheinfo))
2721                + nla_total_size(4); /* NDA_PROBES */
2722 }
2723
2724 static void __neigh_notify(struct neighbour *n, int type, int flags)
2725 {
2726         struct net *net = dev_net(n->dev);
2727         struct sk_buff *skb;
2728         int err = -ENOBUFS;
2729
2730         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2731         if (skb == NULL)
2732                 goto errout;
2733
2734         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2735         if (err < 0) {
2736                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2737                 WARN_ON(err == -EMSGSIZE);
2738                 kfree_skb(skb);
2739                 goto errout;
2740         }
2741         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2742         return;
2743 errout:
2744         if (err < 0)
2745                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2746 }
2747
2748 #ifdef CONFIG_ARPD
2749 void neigh_app_ns(struct neighbour *n)
2750 {
2751         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2752 }
2753 EXPORT_SYMBOL(neigh_app_ns);
2754 #endif /* CONFIG_ARPD */
2755
2756 #ifdef CONFIG_SYSCTL
2757
2758 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2759                            size_t *lenp, loff_t *ppos)
2760 {
2761         int size, ret;
2762         ctl_table tmp = *ctl;
2763
2764         tmp.data = &size;
2765         size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
2766         ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2767         if (write && !ret)
2768                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2769         return ret;
2770 }
2771
2772 enum {
2773         NEIGH_VAR_MCAST_PROBE,
2774         NEIGH_VAR_UCAST_PROBE,
2775         NEIGH_VAR_APP_PROBE,
2776         NEIGH_VAR_RETRANS_TIME,
2777         NEIGH_VAR_BASE_REACHABLE_TIME,
2778         NEIGH_VAR_DELAY_PROBE_TIME,
2779         NEIGH_VAR_GC_STALETIME,
2780         NEIGH_VAR_QUEUE_LEN,
2781         NEIGH_VAR_QUEUE_LEN_BYTES,
2782         NEIGH_VAR_PROXY_QLEN,
2783         NEIGH_VAR_ANYCAST_DELAY,
2784         NEIGH_VAR_PROXY_DELAY,
2785         NEIGH_VAR_LOCKTIME,
2786         NEIGH_VAR_RETRANS_TIME_MS,
2787         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2788         NEIGH_VAR_GC_INTERVAL,
2789         NEIGH_VAR_GC_THRESH1,
2790         NEIGH_VAR_GC_THRESH2,
2791         NEIGH_VAR_GC_THRESH3,
2792         NEIGH_VAR_MAX
2793 };
2794
2795 static struct neigh_sysctl_table {
2796         struct ctl_table_header *sysctl_header;
2797         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2798         char *dev_name;
2799 } neigh_sysctl_template __read_mostly = {
2800         .neigh_vars = {
2801                 [NEIGH_VAR_MCAST_PROBE] = {
2802                         .procname       = "mcast_solicit",
2803                         .maxlen         = sizeof(int),
2804                         .mode           = 0644,
2805                         .proc_handler   = proc_dointvec,
2806                 },
2807                 [NEIGH_VAR_UCAST_PROBE] = {
2808                         .procname       = "ucast_solicit",
2809                         .maxlen         = sizeof(int),
2810                         .mode           = 0644,
2811                         .proc_handler   = proc_dointvec,
2812                 },
2813                 [NEIGH_VAR_APP_PROBE] = {
2814                         .procname       = "app_solicit",
2815                         .maxlen         = sizeof(int),
2816                         .mode           = 0644,
2817                         .proc_handler   = proc_dointvec,
2818                 },
2819                 [NEIGH_VAR_RETRANS_TIME] = {
2820                         .procname       = "retrans_time",
2821                         .maxlen         = sizeof(int),
2822                         .mode           = 0644,
2823                         .proc_handler   = proc_dointvec_userhz_jiffies,
2824                 },
2825                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2826                         .procname       = "base_reachable_time",
2827                         .maxlen         = sizeof(int),
2828                         .mode           = 0644,
2829                         .proc_handler   = proc_dointvec_jiffies,
2830                 },
2831                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2832                         .procname       = "delay_first_probe_time",
2833                         .maxlen         = sizeof(int),
2834                         .mode           = 0644,
2835                         .proc_handler   = proc_dointvec_jiffies,
2836                 },
2837                 [NEIGH_VAR_GC_STALETIME] = {
2838                         .procname       = "gc_stale_time",
2839                         .maxlen         = sizeof(int),
2840                         .mode           = 0644,
2841                         .proc_handler   = proc_dointvec_jiffies,
2842                 },
2843                 [NEIGH_VAR_QUEUE_LEN] = {
2844                         .procname       = "unres_qlen",
2845                         .maxlen         = sizeof(int),
2846                         .mode           = 0644,
2847                         .proc_handler   = proc_unres_qlen,
2848                 },
2849                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2850                         .procname       = "unres_qlen_bytes",
2851                         .maxlen         = sizeof(int),
2852                         .mode           = 0644,
2853                         .proc_handler   = proc_dointvec,
2854                 },
2855                 [NEIGH_VAR_PROXY_QLEN] = {
2856                         .procname       = "proxy_qlen",
2857                         .maxlen         = sizeof(int),
2858                         .mode           = 0644,
2859                         .proc_handler   = proc_dointvec,
2860                 },
2861                 [NEIGH_VAR_ANYCAST_DELAY] = {
2862                         .procname       = "anycast_delay",
2863                         .maxlen         = sizeof(int),
2864                         .mode           = 0644,
2865                         .proc_handler   = proc_dointvec_userhz_jiffies,
2866                 },
2867                 [NEIGH_VAR_PROXY_DELAY] = {
2868                         .procname       = "proxy_delay",
2869                         .maxlen         = sizeof(int),
2870                         .mode           = 0644,
2871                         .proc_handler   = proc_dointvec_userhz_jiffies,
2872                 },
2873                 [NEIGH_VAR_LOCKTIME] = {
2874                         .procname       = "locktime",
2875                         .maxlen         = sizeof(int),
2876                         .mode           = 0644,
2877                         .proc_handler   = proc_dointvec_userhz_jiffies,
2878                 },
2879                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2880                         .procname       = "retrans_time_ms",
2881                         .maxlen         = sizeof(int),
2882                         .mode           = 0644,
2883                         .proc_handler   = proc_dointvec_ms_jiffies,
2884                 },
2885                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2886                         .procname       = "base_reachable_time_ms",
2887                         .maxlen         = sizeof(int),
2888                         .mode           = 0644,
2889                         .proc_handler   = proc_dointvec_ms_jiffies,
2890                 },
2891                 [NEIGH_VAR_GC_INTERVAL] = {
2892                         .procname       = "gc_interval",
2893                         .maxlen         = sizeof(int),
2894                         .mode           = 0644,
2895                         .proc_handler   = proc_dointvec_jiffies,
2896                 },
2897                 [NEIGH_VAR_GC_THRESH1] = {
2898                         .procname       = "gc_thresh1",
2899                         .maxlen         = sizeof(int),
2900                         .mode           = 0644,
2901                         .proc_handler   = proc_dointvec,
2902                 },
2903                 [NEIGH_VAR_GC_THRESH2] = {
2904                         .procname       = "gc_thresh2",
2905                         .maxlen         = sizeof(int),
2906                         .mode           = 0644,
2907                         .proc_handler   = proc_dointvec,
2908                 },
2909                 [NEIGH_VAR_GC_THRESH3] = {
2910                         .procname       = "gc_thresh3",
2911                         .maxlen         = sizeof(int),
2912                         .mode           = 0644,
2913                         .proc_handler   = proc_dointvec,
2914                 },
2915                 {},
2916         },
2917 };
2918
2919 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2920                           char *p_name, proc_handler *handler)
2921 {
2922         struct neigh_sysctl_table *t;
2923         const char *dev_name_source = NULL;
2924
2925 #define NEIGH_CTL_PATH_ROOT     0
2926 #define NEIGH_CTL_PATH_PROTO    1
2927 #define NEIGH_CTL_PATH_NEIGH    2
2928 #define NEIGH_CTL_PATH_DEV      3
2929
2930         struct ctl_path neigh_path[] = {
2931                 { .procname = "net",     },
2932                 { .procname = "proto",   },
2933                 { .procname = "neigh",   },
2934                 { .procname = "default", },
2935                 { },
2936         };
2937
2938         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2939         if (!t)
2940                 goto err;
2941
2942         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2943         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2944         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2945         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2946         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2947         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2948         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2949         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2950         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2951         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2952         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2953         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2954         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2955         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2956         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2957
2958         if (dev) {
2959                 dev_name_source = dev->name;
2960                 /* Terminate the table early */
2961                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2962                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2963         } else {
2964                 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2965                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2966                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2967                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2968                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2969         }
2970
2971
2972         if (handler) {
2973                 /* RetransTime */
2974                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2975                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2976                 /* ReachableTime */
2977                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2978                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2979                 /* RetransTime (in milliseconds)*/
2980                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2981                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2982                 /* ReachableTime (in milliseconds) */
2983                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2984                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2985         }
2986
2987         t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2988         if (!t->dev_name)
2989                 goto free;
2990
2991         neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2992         neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2993
2994         t->sysctl_header =
2995                 register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2996         if (!t->sysctl_header)
2997                 goto free_procname;
2998
2999         p->sysctl_table = t;
3000         return 0;
3001
3002 free_procname:
3003         kfree(t->dev_name);
3004 free:
3005         kfree(t);
3006 err:
3007         return -ENOBUFS;
3008 }
3009 EXPORT_SYMBOL(neigh_sysctl_register);
3010
3011 void neigh_sysctl_unregister(struct neigh_parms *p)
3012 {
3013         if (p->sysctl_table) {
3014                 struct neigh_sysctl_table *t = p->sysctl_table;
3015                 p->sysctl_table = NULL;
3016                 unregister_sysctl_table(t->sysctl_header);
3017                 kfree(t->dev_name);
3018                 kfree(t);
3019         }
3020 }
3021 EXPORT_SYMBOL(neigh_sysctl_unregister);
3022
3023 #endif  /* CONFIG_SYSCTL */
3024
3025 static int __init neigh_init(void)
3026 {
3027         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3028         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3029         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3030
3031         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3032                       NULL);
3033         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3034
3035         return 0;
3036 }
3037
3038 subsys_initcall(neigh_init);
3039