tg3: Enable GRO by default.
[linux-block.git] / net / ipv6 / ip6_fib.c
CommitLineData
1da177e4 1/*
1ab1457c 2 * Linux INET6 implementation
1da177e4
LT
3 * Forwarding Information Database
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/*
15 * Changes:
16 * Yuji SEKIYA @USAGI: Support default route on router node;
17 * remove ip6_null_entry from the top of
18 * routing table.
c0bece9f 19 * Ville Nuorvala: Fixed routing subtrees.
1da177e4 20 */
1da177e4
LT
21#include <linux/errno.h>
22#include <linux/types.h>
23#include <linux/net.h>
24#include <linux/route.h>
25#include <linux/netdevice.h>
26#include <linux/in6.h>
27#include <linux/init.h>
c71099ac 28#include <linux/list.h>
5a0e3ad6 29#include <linux/slab.h>
1da177e4
LT
30
31#ifdef CONFIG_PROC_FS
32#include <linux/proc_fs.h>
33#endif
34
35#include <net/ipv6.h>
36#include <net/ndisc.h>
37#include <net/addrconf.h>
38
39#include <net/ip6_fib.h>
40#include <net/ip6_route.h>
41
42#define RT6_DEBUG 2
43
44#if RT6_DEBUG >= 3
45#define RT6_TRACE(x...) printk(KERN_DEBUG x)
46#else
47#define RT6_TRACE(x...) do { ; } while (0)
48#endif
49
e18b890b 50static struct kmem_cache * fib6_node_kmem __read_mostly;
1da177e4
LT
51
52enum fib_walk_state_t
53{
54#ifdef CONFIG_IPV6_SUBTREES
55 FWS_S,
56#endif
57 FWS_L,
58 FWS_R,
59 FWS_C,
60 FWS_U
61};
62
63struct fib6_cleaner_t
64{
65 struct fib6_walker_t w;
ec7d43c2 66 struct net *net;
1da177e4
LT
67 int (*func)(struct rt6_info *, void *arg);
68 void *arg;
69};
70
90d41122 71static DEFINE_RWLOCK(fib6_walker_lock);
1da177e4
LT
72
73#ifdef CONFIG_IPV6_SUBTREES
74#define FWS_INIT FWS_S
1da177e4
LT
75#else
76#define FWS_INIT FWS_L
1da177e4
LT
77#endif
78
ec7d43c2
BT
79static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
80 struct rt6_info *rt);
8ed67789
DL
81static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
82static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
90d41122
AB
83static int fib6_walk(struct fib6_walker_t *w);
84static int fib6_walk_continue(struct fib6_walker_t *w);
1da177e4
LT
85
86/*
87 * A routing update causes an increase of the serial number on the
88 * affected subtree. This allows for cached routes to be asynchronously
89 * tested when modifications are made to the destination cache as a
90 * result of redirects, path MTU changes, etc.
91 */
92
93static __u32 rt_sernum;
94
5b7c931d
DL
95static void fib6_gc_timer_cb(unsigned long arg);
96
bbef49da
AD
97static LIST_HEAD(fib6_walkers);
98#define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh)
1da177e4 99
90d41122
AB
100static inline void fib6_walker_link(struct fib6_walker_t *w)
101{
102 write_lock_bh(&fib6_walker_lock);
bbef49da 103 list_add(&w->lh, &fib6_walkers);
90d41122
AB
104 write_unlock_bh(&fib6_walker_lock);
105}
106
107static inline void fib6_walker_unlink(struct fib6_walker_t *w)
108{
109 write_lock_bh(&fib6_walker_lock);
bbef49da 110 list_del(&w->lh);
90d41122
AB
111 write_unlock_bh(&fib6_walker_lock);
112}
1da177e4
LT
113static __inline__ u32 fib6_new_sernum(void)
114{
115 u32 n = ++rt_sernum;
116 if ((__s32)n <= 0)
117 rt_sernum = n = 1;
118 return n;
119}
120
121/*
122 * Auxiliary address test functions for the radix tree.
123 *
1ab1457c 124 * These assume a 32bit processor (although it will work on
1da177e4
LT
125 * 64bit processors)
126 */
127
128/*
129 * test bit
130 */
02cdce53
YH
131#if defined(__LITTLE_ENDIAN)
132# define BITOP_BE32_SWIZZLE (0x1F & ~7)
133#else
134# define BITOP_BE32_SWIZZLE 0
135#endif
1da177e4 136
e69a4adc 137static __inline__ __be32 addr_bit_set(void *token, int fn_bit)
1da177e4 138{
e69a4adc 139 __be32 *addr = token;
02cdce53
YH
140 /*
141 * Here,
142 * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
143 * is optimized version of
144 * htonl(1 << ((~fn_bit)&0x1F))
145 * See include/asm-generic/bitops/le.h.
146 */
147 return (1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & addr[fn_bit >> 5];
1da177e4
LT
148}
149
1da177e4
LT
150static __inline__ struct fib6_node * node_alloc(void)
151{
152 struct fib6_node *fn;
153
c3762229 154 fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
1da177e4
LT
155
156 return fn;
157}
158
159static __inline__ void node_free(struct fib6_node * fn)
160{
161 kmem_cache_free(fib6_node_kmem, fn);
162}
163
164static __inline__ void rt6_release(struct rt6_info *rt)
165{
166 if (atomic_dec_and_test(&rt->rt6i_ref))
167 dst_free(&rt->u.dst);
168}
169
58f09b78 170static void fib6_link_table(struct net *net, struct fib6_table *tb)
1b43af54
PM
171{
172 unsigned int h;
173
375216ad
TG
174 /*
175 * Initialize table lock at a single place to give lockdep a key,
176 * tables aren't visible prior to being linked to the list.
177 */
178 rwlock_init(&tb->tb6_lock);
179
a33bc5c1 180 h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
1b43af54
PM
181
182 /*
183 * No protection necessary, this is the only list mutatation
184 * operation, tables never disappear once they exist.
185 */
58f09b78 186 hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]);
1b43af54 187}
c71099ac 188
1b43af54 189#ifdef CONFIG_IPV6_MULTIPLE_TABLES
e0b85590 190
8ed67789 191static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
c71099ac
TG
192{
193 struct fib6_table *table;
194
195 table = kzalloc(sizeof(*table), GFP_ATOMIC);
196 if (table != NULL) {
197 table->tb6_id = id;
8ed67789 198 table->tb6_root.leaf = net->ipv6.ip6_null_entry;
c71099ac
TG
199 table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
200 }
201
202 return table;
203}
204
58f09b78 205struct fib6_table *fib6_new_table(struct net *net, u32 id)
c71099ac
TG
206{
207 struct fib6_table *tb;
208
209 if (id == 0)
210 id = RT6_TABLE_MAIN;
58f09b78 211 tb = fib6_get_table(net, id);
c71099ac
TG
212 if (tb)
213 return tb;
214
8ed67789 215 tb = fib6_alloc_table(net, id);
c71099ac 216 if (tb != NULL)
58f09b78 217 fib6_link_table(net, tb);
c71099ac
TG
218
219 return tb;
220}
221
58f09b78 222struct fib6_table *fib6_get_table(struct net *net, u32 id)
c71099ac
TG
223{
224 struct fib6_table *tb;
58f09b78 225 struct hlist_head *head;
c71099ac
TG
226 struct hlist_node *node;
227 unsigned int h;
228
229 if (id == 0)
230 id = RT6_TABLE_MAIN;
a33bc5c1 231 h = id & (FIB6_TABLE_HASHSZ - 1);
c71099ac 232 rcu_read_lock();
58f09b78
DL
233 head = &net->ipv6.fib_table_hash[h];
234 hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
c71099ac
TG
235 if (tb->tb6_id == id) {
236 rcu_read_unlock();
237 return tb;
238 }
239 }
240 rcu_read_unlock();
241
242 return NULL;
243}
244
2c8c1e72 245static void __net_init fib6_tables_init(struct net *net)
c71099ac 246{
58f09b78
DL
247 fib6_link_table(net, net->ipv6.fib6_main_tbl);
248 fib6_link_table(net, net->ipv6.fib6_local_tbl);
c71099ac 249}
c71099ac
TG
250#else
251
58f09b78 252struct fib6_table *fib6_new_table(struct net *net, u32 id)
c71099ac 253{
58f09b78 254 return fib6_get_table(net, id);
c71099ac
TG
255}
256
58f09b78 257struct fib6_table *fib6_get_table(struct net *net, u32 id)
c71099ac 258{
58f09b78 259 return net->ipv6.fib6_main_tbl;
c71099ac
TG
260}
261
58f09b78
DL
262struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
263 int flags, pol_lookup_t lookup)
c71099ac 264{
8ed67789 265 return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl, flags);
c71099ac
TG
266}
267
2c8c1e72 268static void __net_init fib6_tables_init(struct net *net)
c71099ac 269{
58f09b78 270 fib6_link_table(net, net->ipv6.fib6_main_tbl);
c71099ac
TG
271}
272
273#endif
274
1b43af54
PM
275static int fib6_dump_node(struct fib6_walker_t *w)
276{
277 int res;
278 struct rt6_info *rt;
279
7cc48263 280 for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) {
1b43af54
PM
281 res = rt6_dump_route(rt, w->args);
282 if (res < 0) {
283 /* Frame is full, suspend walking */
284 w->leaf = rt;
285 return 1;
286 }
547b792c 287 WARN_ON(res == 0);
1b43af54
PM
288 }
289 w->leaf = NULL;
290 return 0;
291}
292
293static void fib6_dump_end(struct netlink_callback *cb)
294{
295 struct fib6_walker_t *w = (void*)cb->args[2];
296
297 if (w) {
7891cc81
HX
298 if (cb->args[4]) {
299 cb->args[4] = 0;
300 fib6_walker_unlink(w);
301 }
1b43af54
PM
302 cb->args[2] = 0;
303 kfree(w);
304 }
305 cb->done = (void*)cb->args[3];
306 cb->args[1] = 3;
307}
308
309static int fib6_dump_done(struct netlink_callback *cb)
310{
311 fib6_dump_end(cb);
312 return cb->done ? cb->done(cb) : 0;
313}
314
315static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
316 struct netlink_callback *cb)
317{
318 struct fib6_walker_t *w;
319 int res;
320
321 w = (void *)cb->args[2];
322 w->root = &table->tb6_root;
323
324 if (cb->args[4] == 0) {
2bec5a36
PM
325 w->count = 0;
326 w->skip = 0;
327
1b43af54
PM
328 read_lock_bh(&table->tb6_lock);
329 res = fib6_walk(w);
330 read_unlock_bh(&table->tb6_lock);
2bec5a36 331 if (res > 0) {
1b43af54 332 cb->args[4] = 1;
2bec5a36
PM
333 cb->args[5] = w->root->fn_sernum;
334 }
1b43af54 335 } else {
2bec5a36
PM
336 if (cb->args[5] != w->root->fn_sernum) {
337 /* Begin at the root if the tree changed */
338 cb->args[5] = w->root->fn_sernum;
339 w->state = FWS_INIT;
340 w->node = w->root;
341 w->skip = w->count;
342 } else
343 w->skip = 0;
344
1b43af54
PM
345 read_lock_bh(&table->tb6_lock);
346 res = fib6_walk_continue(w);
347 read_unlock_bh(&table->tb6_lock);
7891cc81
HX
348 if (res <= 0) {
349 fib6_walker_unlink(w);
350 cb->args[4] = 0;
1b43af54 351 }
1b43af54 352 }
7891cc81 353
1b43af54
PM
354 return res;
355}
356
c127ea2c 357static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1b43af54 358{
3b1e0a65 359 struct net *net = sock_net(skb->sk);
1b43af54
PM
360 unsigned int h, s_h;
361 unsigned int e = 0, s_e;
362 struct rt6_rtnl_dump_arg arg;
363 struct fib6_walker_t *w;
364 struct fib6_table *tb;
365 struct hlist_node *node;
58f09b78 366 struct hlist_head *head;
1b43af54
PM
367 int res = 0;
368
369 s_h = cb->args[0];
370 s_e = cb->args[1];
371
372 w = (void *)cb->args[2];
373 if (w == NULL) {
374 /* New dump:
375 *
376 * 1. hook callback destructor.
377 */
378 cb->args[3] = (long)cb->done;
379 cb->done = fib6_dump_done;
380
381 /*
382 * 2. allocate and initialize walker.
383 */
384 w = kzalloc(sizeof(*w), GFP_ATOMIC);
385 if (w == NULL)
386 return -ENOMEM;
387 w->func = fib6_dump_node;
388 cb->args[2] = (long)w;
389 }
390
391 arg.skb = skb;
392 arg.cb = cb;
191cd582 393 arg.net = net;
1b43af54
PM
394 w->args = &arg;
395
a33bc5c1 396 for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
1b43af54 397 e = 0;
58f09b78
DL
398 head = &net->ipv6.fib_table_hash[h];
399 hlist_for_each_entry(tb, node, head, tb6_hlist) {
1b43af54
PM
400 if (e < s_e)
401 goto next;
402 res = fib6_dump_table(tb, skb, cb);
403 if (res != 0)
404 goto out;
405next:
406 e++;
407 }
408 }
409out:
410 cb->args[1] = e;
411 cb->args[0] = h;
412
413 res = res < 0 ? res : skb->len;
414 if (res <= 0)
415 fib6_dump_end(cb);
416 return res;
417}
1da177e4
LT
418
419/*
420 * Routing Table
421 *
422 * return the appropriate node for a routing tree "add" operation
423 * by either creating and inserting or by returning an existing
424 * node.
425 */
426
427static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
428 int addrlen, int plen,
429 int offset)
430{
431 struct fib6_node *fn, *in, *ln;
432 struct fib6_node *pn = NULL;
433 struct rt6key *key;
434 int bit;
1ab1457c 435 __be32 dir = 0;
1da177e4
LT
436 __u32 sernum = fib6_new_sernum();
437
438 RT6_TRACE("fib6_add_1\n");
439
440 /* insert node in tree */
441
442 fn = root;
443
444 do {
445 key = (struct rt6key *)((u8 *)fn->leaf + offset);
446
447 /*
448 * Prefix match
449 */
450 if (plen < fn->fn_bit ||
451 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
452 goto insert_above;
1ab1457c 453
1da177e4
LT
454 /*
455 * Exact match ?
456 */
1ab1457c 457
1da177e4
LT
458 if (plen == fn->fn_bit) {
459 /* clean up an intermediate node */
460 if ((fn->fn_flags & RTN_RTINFO) == 0) {
461 rt6_release(fn->leaf);
462 fn->leaf = NULL;
463 }
1ab1457c 464
1da177e4 465 fn->fn_sernum = sernum;
1ab1457c 466
1da177e4
LT
467 return fn;
468 }
469
470 /*
471 * We have more bits to go
472 */
1ab1457c 473
1da177e4
LT
474 /* Try to walk down on tree. */
475 fn->fn_sernum = sernum;
476 dir = addr_bit_set(addr, fn->fn_bit);
477 pn = fn;
478 fn = dir ? fn->right: fn->left;
479 } while (fn);
480
481 /*
482 * We walked to the bottom of tree.
483 * Create new leaf node without children.
484 */
485
486 ln = node_alloc();
487
488 if (ln == NULL)
489 return NULL;
490 ln->fn_bit = plen;
1ab1457c 491
1da177e4
LT
492 ln->parent = pn;
493 ln->fn_sernum = sernum;
494
495 if (dir)
496 pn->right = ln;
497 else
498 pn->left = ln;
499
500 return ln;
501
502
503insert_above:
504 /*
1ab1457c 505 * split since we don't have a common prefix anymore or
1da177e4
LT
506 * we have a less significant route.
507 * we've to insert an intermediate node on the list
508 * this new node will point to the one we need to create
509 * and the current
510 */
511
512 pn = fn->parent;
513
514 /* find 1st bit in difference between the 2 addrs.
515
971f359d 516 See comment in __ipv6_addr_diff: bit may be an invalid value,
1da177e4
LT
517 but if it is >= plen, the value is ignored in any case.
518 */
1ab1457c 519
971f359d 520 bit = __ipv6_addr_diff(addr, &key->addr, addrlen);
1da177e4 521
1ab1457c
YH
522 /*
523 * (intermediate)[in]
1da177e4
LT
524 * / \
525 * (new leaf node)[ln] (old node)[fn]
526 */
527 if (plen > bit) {
528 in = node_alloc();
529 ln = node_alloc();
1ab1457c 530
1da177e4
LT
531 if (in == NULL || ln == NULL) {
532 if (in)
533 node_free(in);
534 if (ln)
535 node_free(ln);
536 return NULL;
537 }
538
1ab1457c
YH
539 /*
540 * new intermediate node.
1da177e4
LT
541 * RTN_RTINFO will
542 * be off since that an address that chooses one of
543 * the branches would not match less specific routes
544 * in the other branch
545 */
546
547 in->fn_bit = bit;
548
549 in->parent = pn;
550 in->leaf = fn->leaf;
551 atomic_inc(&in->leaf->rt6i_ref);
552
553 in->fn_sernum = sernum;
554
555 /* update parent pointer */
556 if (dir)
557 pn->right = in;
558 else
559 pn->left = in;
560
561 ln->fn_bit = plen;
562
563 ln->parent = in;
564 fn->parent = in;
565
566 ln->fn_sernum = sernum;
567
568 if (addr_bit_set(addr, bit)) {
569 in->right = ln;
570 in->left = fn;
571 } else {
572 in->left = ln;
573 in->right = fn;
574 }
575 } else { /* plen <= bit */
576
1ab1457c 577 /*
1da177e4
LT
578 * (new leaf node)[ln]
579 * / \
580 * (old node)[fn] NULL
581 */
582
583 ln = node_alloc();
584
585 if (ln == NULL)
586 return NULL;
587
588 ln->fn_bit = plen;
589
590 ln->parent = pn;
591
592 ln->fn_sernum = sernum;
1ab1457c 593
1da177e4
LT
594 if (dir)
595 pn->right = ln;
596 else
597 pn->left = ln;
598
599 if (addr_bit_set(&key->addr, plen))
600 ln->right = fn;
601 else
602 ln->left = fn;
603
604 fn->parent = ln;
605 }
606 return ln;
607}
608
609/*
610 * Insert routing information in a node.
611 */
612
613static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
86872cb5 614 struct nl_info *info)
1da177e4
LT
615{
616 struct rt6_info *iter = NULL;
617 struct rt6_info **ins;
618
619 ins = &fn->leaf;
620
7cc48263 621 for (iter = fn->leaf; iter; iter=iter->u.dst.rt6_next) {
1da177e4
LT
622 /*
623 * Search for duplicates
624 */
625
626 if (iter->rt6i_metric == rt->rt6i_metric) {
627 /*
628 * Same priority level
629 */
630
631 if (iter->rt6i_dev == rt->rt6i_dev &&
632 iter->rt6i_idev == rt->rt6i_idev &&
633 ipv6_addr_equal(&iter->rt6i_gateway,
634 &rt->rt6i_gateway)) {
635 if (!(iter->rt6i_flags&RTF_EXPIRES))
636 return -EEXIST;
637 iter->rt6i_expires = rt->rt6i_expires;
638 if (!(rt->rt6i_flags&RTF_EXPIRES)) {
639 iter->rt6i_flags &= ~RTF_EXPIRES;
640 iter->rt6i_expires = 0;
641 }
642 return -EEXIST;
643 }
644 }
645
646 if (iter->rt6i_metric > rt->rt6i_metric)
647 break;
648
7cc48263 649 ins = &iter->u.dst.rt6_next;
1da177e4
LT
650 }
651
f11e6659
DM
652 /* Reset round-robin state, if necessary */
653 if (ins == &fn->leaf)
654 fn->rr_ptr = NULL;
655
1da177e4
LT
656 /*
657 * insert node
658 */
659
7cc48263 660 rt->u.dst.rt6_next = iter;
1da177e4
LT
661 *ins = rt;
662 rt->rt6i_node = fn;
663 atomic_inc(&rt->rt6i_ref);
86872cb5 664 inet6_rt_notify(RTM_NEWROUTE, rt, info);
c572872f 665 info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
1da177e4
LT
666
667 if ((fn->fn_flags & RTN_RTINFO) == 0) {
c572872f 668 info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
1da177e4
LT
669 fn->fn_flags |= RTN_RTINFO;
670 }
671
672 return 0;
673}
674
63152fc0 675static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt)
1da177e4 676{
417f28bb 677 if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
1da177e4 678 (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
417f28bb 679 mod_timer(&net->ipv6.ip6_fib_timer,
847499ce 680 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
1da177e4
LT
681}
682
63152fc0 683void fib6_force_start_gc(struct net *net)
1da177e4 684{
417f28bb
SH
685 if (!timer_pending(&net->ipv6.ip6_fib_timer))
686 mod_timer(&net->ipv6.ip6_fib_timer,
847499ce 687 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
1da177e4
LT
688}
689
690/*
691 * Add routing information to the routing tree.
692 * <destination addr>/<source addr>
693 * with source addr info in sub-trees
694 */
695
86872cb5 696int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
1da177e4 697{
66729e18 698 struct fib6_node *fn, *pn = NULL;
1da177e4
LT
699 int err = -ENOMEM;
700
701 fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
702 rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst));
703
704 if (fn == NULL)
705 goto out;
706
66729e18
YH
707 pn = fn;
708
1da177e4
LT
709#ifdef CONFIG_IPV6_SUBTREES
710 if (rt->rt6i_src.plen) {
711 struct fib6_node *sn;
712
713 if (fn->subtree == NULL) {
714 struct fib6_node *sfn;
715
716 /*
717 * Create subtree.
718 *
719 * fn[main tree]
720 * |
721 * sfn[subtree root]
722 * \
723 * sn[new leaf node]
724 */
725
726 /* Create subtree root node */
727 sfn = node_alloc();
728 if (sfn == NULL)
729 goto st_failure;
730
8ed67789
DL
731 sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
732 atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
1da177e4
LT
733 sfn->fn_flags = RTN_ROOT;
734 sfn->fn_sernum = fib6_new_sernum();
735
736 /* Now add the first leaf node to new subtree */
737
738 sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
739 sizeof(struct in6_addr), rt->rt6i_src.plen,
740 offsetof(struct rt6_info, rt6i_src));
741
742 if (sn == NULL) {
743 /* If it is failed, discard just allocated
744 root, and then (in st_failure) stale node
745 in main tree.
746 */
747 node_free(sfn);
748 goto st_failure;
749 }
750
751 /* Now link new subtree to main tree */
752 sfn->parent = fn;
753 fn->subtree = sfn;
1da177e4
LT
754 } else {
755 sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
756 sizeof(struct in6_addr), rt->rt6i_src.plen,
757 offsetof(struct rt6_info, rt6i_src));
758
759 if (sn == NULL)
760 goto st_failure;
761 }
762
66729e18
YH
763 if (fn->leaf == NULL) {
764 fn->leaf = rt;
765 atomic_inc(&rt->rt6i_ref);
766 }
1da177e4
LT
767 fn = sn;
768 }
769#endif
770
86872cb5 771 err = fib6_add_rt2node(fn, rt, info);
1da177e4
LT
772
773 if (err == 0) {
63152fc0 774 fib6_start_gc(info->nl_net, rt);
1da177e4 775 if (!(rt->rt6i_flags&RTF_CACHE))
ec7d43c2 776 fib6_prune_clones(info->nl_net, pn, rt);
1da177e4
LT
777 }
778
779out:
66729e18
YH
780 if (err) {
781#ifdef CONFIG_IPV6_SUBTREES
782 /*
783 * If fib6_add_1 has cleared the old leaf pointer in the
784 * super-tree leaf node we have to find a new one for it.
785 */
3c051235
DM
786 if (pn != fn && pn->leaf == rt) {
787 pn->leaf = NULL;
788 atomic_dec(&rt->rt6i_ref);
789 }
66729e18 790 if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) {
8ed67789 791 pn->leaf = fib6_find_prefix(info->nl_net, pn);
66729e18
YH
792#if RT6_DEBUG >= 2
793 if (!pn->leaf) {
547b792c 794 WARN_ON(pn->leaf == NULL);
8ed67789 795 pn->leaf = info->nl_net->ipv6.ip6_null_entry;
66729e18
YH
796 }
797#endif
798 atomic_inc(&pn->leaf->rt6i_ref);
799 }
800#endif
1da177e4 801 dst_free(&rt->u.dst);
66729e18 802 }
1da177e4
LT
803 return err;
804
805#ifdef CONFIG_IPV6_SUBTREES
806 /* Subtree creation failed, probably main tree node
807 is orphan. If it is, shoot it.
808 */
809st_failure:
810 if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
8ed67789 811 fib6_repair_tree(info->nl_net, fn);
1da177e4
LT
812 dst_free(&rt->u.dst);
813 return err;
814#endif
815}
816
817/*
818 * Routing tree lookup
819 *
820 */
821
822struct lookup_args {
823 int offset; /* key offset on rt6_info */
824 struct in6_addr *addr; /* search key */
825};
826
827static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
828 struct lookup_args *args)
829{
830 struct fib6_node *fn;
e69a4adc 831 __be32 dir;
1da177e4 832
825e288e
YH
833 if (unlikely(args->offset == 0))
834 return NULL;
835
1da177e4
LT
836 /*
837 * Descend on a tree
838 */
839
840 fn = root;
841
842 for (;;) {
843 struct fib6_node *next;
844
845 dir = addr_bit_set(args->addr, fn->fn_bit);
846
847 next = dir ? fn->right : fn->left;
848
849 if (next) {
850 fn = next;
851 continue;
852 }
853
854 break;
855 }
856
3fc5e044 857 while(fn) {
7fc33165 858 if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
1da177e4
LT
859 struct rt6key *key;
860
861 key = (struct rt6key *) ((u8 *) fn->leaf +
862 args->offset);
863
3fc5e044
YH
864 if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
865#ifdef CONFIG_IPV6_SUBTREES
866 if (fn->subtree)
867 fn = fib6_lookup_1(fn->subtree, args + 1);
868#endif
869 if (!fn || fn->fn_flags & RTN_RTINFO)
870 return fn;
871 }
1da177e4
LT
872 }
873
3fc5e044
YH
874 if (fn->fn_flags & RTN_ROOT)
875 break;
876
1da177e4
LT
877 fn = fn->parent;
878 }
879
880 return NULL;
881}
882
883struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
884 struct in6_addr *saddr)
885{
1da177e4 886 struct fib6_node *fn;
825e288e
YH
887 struct lookup_args args[] = {
888 {
889 .offset = offsetof(struct rt6_info, rt6i_dst),
890 .addr = daddr,
891 },
1da177e4 892#ifdef CONFIG_IPV6_SUBTREES
825e288e
YH
893 {
894 .offset = offsetof(struct rt6_info, rt6i_src),
895 .addr = saddr,
896 },
1da177e4 897#endif
825e288e
YH
898 {
899 .offset = 0, /* sentinel */
900 }
901 };
1da177e4 902
fefc2a6c 903 fn = fib6_lookup_1(root, daddr ? args : args + 1);
1da177e4
LT
904
905 if (fn == NULL || fn->fn_flags & RTN_TL_ROOT)
906 fn = root;
907
908 return fn;
909}
910
911/*
912 * Get node with specified destination prefix (and source prefix,
913 * if subtrees are used)
914 */
915
916
917static struct fib6_node * fib6_locate_1(struct fib6_node *root,
918 struct in6_addr *addr,
919 int plen, int offset)
920{
921 struct fib6_node *fn;
922
923 for (fn = root; fn ; ) {
924 struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset);
925
926 /*
927 * Prefix match
928 */
929 if (plen < fn->fn_bit ||
930 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
931 return NULL;
932
933 if (plen == fn->fn_bit)
934 return fn;
935
936 /*
937 * We have more bits to go
938 */
939 if (addr_bit_set(addr, fn->fn_bit))
940 fn = fn->right;
941 else
942 fn = fn->left;
943 }
944 return NULL;
945}
946
947struct fib6_node * fib6_locate(struct fib6_node *root,
948 struct in6_addr *daddr, int dst_len,
949 struct in6_addr *saddr, int src_len)
950{
951 struct fib6_node *fn;
952
953 fn = fib6_locate_1(root, daddr, dst_len,
954 offsetof(struct rt6_info, rt6i_dst));
955
956#ifdef CONFIG_IPV6_SUBTREES
957 if (src_len) {
547b792c 958 WARN_ON(saddr == NULL);
3fc5e044
YH
959 if (fn && fn->subtree)
960 fn = fib6_locate_1(fn->subtree, saddr, src_len,
1da177e4
LT
961 offsetof(struct rt6_info, rt6i_src));
962 }
963#endif
964
965 if (fn && fn->fn_flags&RTN_RTINFO)
966 return fn;
967
968 return NULL;
969}
970
971
972/*
973 * Deletion
974 *
975 */
976
8ed67789 977static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
1da177e4
LT
978{
979 if (fn->fn_flags&RTN_ROOT)
8ed67789 980 return net->ipv6.ip6_null_entry;
1da177e4
LT
981
982 while(fn) {
983 if(fn->left)
984 return fn->left->leaf;
985
986 if(fn->right)
987 return fn->right->leaf;
988
7fc33165 989 fn = FIB6_SUBTREE(fn);
1da177e4
LT
990 }
991 return NULL;
992}
993
994/*
995 * Called to trim the tree of intermediate nodes when possible. "fn"
996 * is the node we want to try and remove.
997 */
998
8ed67789
DL
999static struct fib6_node *fib6_repair_tree(struct net *net,
1000 struct fib6_node *fn)
1da177e4
LT
1001{
1002 int children;
1003 int nstate;
1004 struct fib6_node *child, *pn;
1005 struct fib6_walker_t *w;
1006 int iter = 0;
1007
1008 for (;;) {
1009 RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
1010 iter++;
1011
547b792c
IJ
1012 WARN_ON(fn->fn_flags & RTN_RTINFO);
1013 WARN_ON(fn->fn_flags & RTN_TL_ROOT);
1014 WARN_ON(fn->leaf != NULL);
1da177e4
LT
1015
1016 children = 0;
1017 child = NULL;
1018 if (fn->right) child = fn->right, children |= 1;
1019 if (fn->left) child = fn->left, children |= 2;
1020
7fc33165 1021 if (children == 3 || FIB6_SUBTREE(fn)
1da177e4
LT
1022#ifdef CONFIG_IPV6_SUBTREES
1023 /* Subtree root (i.e. fn) may have one child */
1024 || (children && fn->fn_flags&RTN_ROOT)
1025#endif
1026 ) {
8ed67789 1027 fn->leaf = fib6_find_prefix(net, fn);
1da177e4
LT
1028#if RT6_DEBUG >= 2
1029 if (fn->leaf==NULL) {
547b792c 1030 WARN_ON(!fn->leaf);
8ed67789 1031 fn->leaf = net->ipv6.ip6_null_entry;
1da177e4
LT
1032 }
1033#endif
1034 atomic_inc(&fn->leaf->rt6i_ref);
1035 return fn->parent;
1036 }
1037
1038 pn = fn->parent;
1039#ifdef CONFIG_IPV6_SUBTREES
7fc33165 1040 if (FIB6_SUBTREE(pn) == fn) {
547b792c 1041 WARN_ON(!(fn->fn_flags & RTN_ROOT));
7fc33165 1042 FIB6_SUBTREE(pn) = NULL;
1da177e4
LT
1043 nstate = FWS_L;
1044 } else {
547b792c 1045 WARN_ON(fn->fn_flags & RTN_ROOT);
1da177e4
LT
1046#endif
1047 if (pn->right == fn) pn->right = child;
1048 else if (pn->left == fn) pn->left = child;
1049#if RT6_DEBUG >= 2
547b792c
IJ
1050 else
1051 WARN_ON(1);
1da177e4
LT
1052#endif
1053 if (child)
1054 child->parent = pn;
1055 nstate = FWS_R;
1056#ifdef CONFIG_IPV6_SUBTREES
1057 }
1058#endif
1059
1060 read_lock(&fib6_walker_lock);
1061 FOR_WALKERS(w) {
1062 if (child == NULL) {
1063 if (w->root == fn) {
1064 w->root = w->node = NULL;
1065 RT6_TRACE("W %p adjusted by delroot 1\n", w);
1066 } else if (w->node == fn) {
1067 RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
1068 w->node = pn;
1069 w->state = nstate;
1070 }
1071 } else {
1072 if (w->root == fn) {
1073 w->root = child;
1074 RT6_TRACE("W %p adjusted by delroot 2\n", w);
1075 }
1076 if (w->node == fn) {
1077 w->node = child;
1078 if (children&2) {
1079 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
1080 w->state = w->state>=FWS_R ? FWS_U : FWS_INIT;
1081 } else {
1082 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
1083 w->state = w->state>=FWS_C ? FWS_U : FWS_INIT;
1084 }
1085 }
1086 }
1087 }
1088 read_unlock(&fib6_walker_lock);
1089
1090 node_free(fn);
7fc33165 1091 if (pn->fn_flags&RTN_RTINFO || FIB6_SUBTREE(pn))
1da177e4
LT
1092 return pn;
1093
1094 rt6_release(pn->leaf);
1095 pn->leaf = NULL;
1096 fn = pn;
1097 }
1098}
1099
1100static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
86872cb5 1101 struct nl_info *info)
1da177e4
LT
1102{
1103 struct fib6_walker_t *w;
1104 struct rt6_info *rt = *rtp;
c572872f 1105 struct net *net = info->nl_net;
1da177e4
LT
1106
1107 RT6_TRACE("fib6_del_route\n");
1108
1109 /* Unlink it */
7cc48263 1110 *rtp = rt->u.dst.rt6_next;
1da177e4 1111 rt->rt6i_node = NULL;
c572872f
BT
1112 net->ipv6.rt6_stats->fib_rt_entries--;
1113 net->ipv6.rt6_stats->fib_discarded_routes++;
1da177e4 1114
f11e6659
DM
1115 /* Reset round-robin state, if necessary */
1116 if (fn->rr_ptr == rt)
1117 fn->rr_ptr = NULL;
1118
1da177e4
LT
1119 /* Adjust walkers */
1120 read_lock(&fib6_walker_lock);
1121 FOR_WALKERS(w) {
1122 if (w->state == FWS_C && w->leaf == rt) {
1123 RT6_TRACE("walker %p adjusted by delroute\n", w);
7cc48263 1124 w->leaf = rt->u.dst.rt6_next;
1da177e4
LT
1125 if (w->leaf == NULL)
1126 w->state = FWS_U;
1127 }
1128 }
1129 read_unlock(&fib6_walker_lock);
1130
7cc48263 1131 rt->u.dst.rt6_next = NULL;
1da177e4 1132
1da177e4
LT
1133 /* If it was last route, expunge its radix tree node */
1134 if (fn->leaf == NULL) {
1135 fn->fn_flags &= ~RTN_RTINFO;
c572872f 1136 net->ipv6.rt6_stats->fib_route_nodes--;
8ed67789 1137 fn = fib6_repair_tree(net, fn);
1da177e4
LT
1138 }
1139
1140 if (atomic_read(&rt->rt6i_ref) != 1) {
1141 /* This route is used as dummy address holder in some split
1142 * nodes. It is not leaked, but it still holds other resources,
1143 * which must be released in time. So, scan ascendant nodes
1144 * and replace dummy references to this route with references
1145 * to still alive ones.
1146 */
1147 while (fn) {
1148 if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) {
8ed67789 1149 fn->leaf = fib6_find_prefix(net, fn);
1da177e4
LT
1150 atomic_inc(&fn->leaf->rt6i_ref);
1151 rt6_release(rt);
1152 }
1153 fn = fn->parent;
1154 }
1155 /* No more references are possible at this point. */
2df96af0 1156 BUG_ON(atomic_read(&rt->rt6i_ref) != 1);
1da177e4
LT
1157 }
1158
86872cb5 1159 inet6_rt_notify(RTM_DELROUTE, rt, info);
1da177e4
LT
1160 rt6_release(rt);
1161}
1162
86872cb5 1163int fib6_del(struct rt6_info *rt, struct nl_info *info)
1da177e4 1164{
8ed67789 1165 struct net *net = info->nl_net;
1da177e4
LT
1166 struct fib6_node *fn = rt->rt6i_node;
1167 struct rt6_info **rtp;
1168
1169#if RT6_DEBUG >= 2
1170 if (rt->u.dst.obsolete>0) {
547b792c 1171 WARN_ON(fn != NULL);
1da177e4
LT
1172 return -ENOENT;
1173 }
1174#endif
8ed67789 1175 if (fn == NULL || rt == net->ipv6.ip6_null_entry)
1da177e4
LT
1176 return -ENOENT;
1177
547b792c 1178 WARN_ON(!(fn->fn_flags & RTN_RTINFO));
1da177e4 1179
150730d5
YH
1180 if (!(rt->rt6i_flags&RTF_CACHE)) {
1181 struct fib6_node *pn = fn;
1182#ifdef CONFIG_IPV6_SUBTREES
1183 /* clones of this route might be in another subtree */
1184 if (rt->rt6i_src.plen) {
1185 while (!(pn->fn_flags&RTN_ROOT))
1186 pn = pn->parent;
1187 pn = pn->parent;
1188 }
1189#endif
ec7d43c2 1190 fib6_prune_clones(info->nl_net, pn, rt);
150730d5 1191 }
1da177e4
LT
1192
1193 /*
1194 * Walk the leaf entries looking for ourself
1195 */
1196
7cc48263 1197 for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.dst.rt6_next) {
1da177e4 1198 if (*rtp == rt) {
86872cb5 1199 fib6_del_route(fn, rtp, info);
1da177e4
LT
1200 return 0;
1201 }
1202 }
1203 return -ENOENT;
1204}
1205
1206/*
1207 * Tree traversal function.
1208 *
1209 * Certainly, it is not interrupt safe.
1210 * However, it is internally reenterable wrt itself and fib6_add/fib6_del.
1211 * It means, that we can modify tree during walking
1212 * and use this function for garbage collection, clone pruning,
1ab1457c 1213 * cleaning tree when a device goes down etc. etc.
1da177e4
LT
1214 *
1215 * It guarantees that every node will be traversed,
1216 * and that it will be traversed only once.
1217 *
1218 * Callback function w->func may return:
1219 * 0 -> continue walking.
1220 * positive value -> walking is suspended (used by tree dumps,
1221 * and probably by gc, if it will be split to several slices)
1222 * negative value -> terminate walking.
1223 *
1224 * The function itself returns:
1225 * 0 -> walk is complete.
1226 * >0 -> walk is incomplete (i.e. suspended)
1227 * <0 -> walk is terminated by an error.
1228 */
1229
90d41122 1230static int fib6_walk_continue(struct fib6_walker_t *w)
1da177e4
LT
1231{
1232 struct fib6_node *fn, *pn;
1233
1234 for (;;) {
1235 fn = w->node;
1236 if (fn == NULL)
1237 return 0;
1238
1239 if (w->prune && fn != w->root &&
1240 fn->fn_flags&RTN_RTINFO && w->state < FWS_C) {
1241 w->state = FWS_C;
1242 w->leaf = fn->leaf;
1243 }
1244 switch (w->state) {
1245#ifdef CONFIG_IPV6_SUBTREES
1246 case FWS_S:
7fc33165
YH
1247 if (FIB6_SUBTREE(fn)) {
1248 w->node = FIB6_SUBTREE(fn);
1da177e4
LT
1249 continue;
1250 }
1251 w->state = FWS_L;
1ab1457c 1252#endif
1da177e4
LT
1253 case FWS_L:
1254 if (fn->left) {
1255 w->node = fn->left;
1256 w->state = FWS_INIT;
1257 continue;
1258 }
1259 w->state = FWS_R;
1260 case FWS_R:
1261 if (fn->right) {
1262 w->node = fn->right;
1263 w->state = FWS_INIT;
1264 continue;
1265 }
1266 w->state = FWS_C;
1267 w->leaf = fn->leaf;
1268 case FWS_C:
1269 if (w->leaf && fn->fn_flags&RTN_RTINFO) {
2bec5a36
PM
1270 int err;
1271
1272 if (w->count < w->skip) {
1273 w->count++;
1274 continue;
1275 }
1276
1277 err = w->func(w);
1da177e4
LT
1278 if (err)
1279 return err;
2bec5a36
PM
1280
1281 w->count++;
1da177e4
LT
1282 continue;
1283 }
1284 w->state = FWS_U;
1285 case FWS_U:
1286 if (fn == w->root)
1287 return 0;
1288 pn = fn->parent;
1289 w->node = pn;
1290#ifdef CONFIG_IPV6_SUBTREES
7fc33165 1291 if (FIB6_SUBTREE(pn) == fn) {
547b792c 1292 WARN_ON(!(fn->fn_flags & RTN_ROOT));
1da177e4
LT
1293 w->state = FWS_L;
1294 continue;
1295 }
1296#endif
1297 if (pn->left == fn) {
1298 w->state = FWS_R;
1299 continue;
1300 }
1301 if (pn->right == fn) {
1302 w->state = FWS_C;
1303 w->leaf = w->node->leaf;
1304 continue;
1305 }
1306#if RT6_DEBUG >= 2
547b792c 1307 WARN_ON(1);
1da177e4
LT
1308#endif
1309 }
1310 }
1311}
1312
90d41122 1313static int fib6_walk(struct fib6_walker_t *w)
1da177e4
LT
1314{
1315 int res;
1316
1317 w->state = FWS_INIT;
1318 w->node = w->root;
1319
1320 fib6_walker_link(w);
1321 res = fib6_walk_continue(w);
1322 if (res <= 0)
1323 fib6_walker_unlink(w);
1324 return res;
1325}
1326
1327static int fib6_clean_node(struct fib6_walker_t *w)
1328{
1329 int res;
1330 struct rt6_info *rt;
0a8891a0 1331 struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w);
ec7d43c2
BT
1332 struct nl_info info = {
1333 .nl_net = c->net,
1334 };
1da177e4 1335
7cc48263 1336 for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) {
1da177e4
LT
1337 res = c->func(rt, c->arg);
1338 if (res < 0) {
1339 w->leaf = rt;
528c4ceb 1340 res = fib6_del(rt, &info);
1da177e4
LT
1341 if (res) {
1342#if RT6_DEBUG >= 2
1343 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
1344#endif
1345 continue;
1346 }
1347 return 0;
1348 }
547b792c 1349 WARN_ON(res != 0);
1da177e4
LT
1350 }
1351 w->leaf = rt;
1352 return 0;
1353}
1354
1355/*
1356 * Convenient frontend to tree walker.
1ab1457c 1357 *
1da177e4
LT
1358 * func is called on each route.
1359 * It may return -1 -> delete this route.
1360 * 0 -> continue walking
1361 *
1362 * prune==1 -> only immediate children of node (certainly,
1363 * ignoring pure split nodes) will be scanned.
1364 */
1365
ec7d43c2 1366static void fib6_clean_tree(struct net *net, struct fib6_node *root,
8ce11e6a
AB
1367 int (*func)(struct rt6_info *, void *arg),
1368 int prune, void *arg)
1da177e4
LT
1369{
1370 struct fib6_cleaner_t c;
1371
1372 c.w.root = root;
1373 c.w.func = fib6_clean_node;
1374 c.w.prune = prune;
2bec5a36
PM
1375 c.w.count = 0;
1376 c.w.skip = 0;
1da177e4
LT
1377 c.func = func;
1378 c.arg = arg;
ec7d43c2 1379 c.net = net;
1da177e4
LT
1380
1381 fib6_walk(&c.w);
1382}
1383
f3db4851 1384void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
c71099ac
TG
1385 int prune, void *arg)
1386{
c71099ac 1387 struct fib6_table *table;
1b43af54 1388 struct hlist_node *node;
58f09b78 1389 struct hlist_head *head;
1b43af54 1390 unsigned int h;
c71099ac 1391
1b43af54 1392 rcu_read_lock();
a33bc5c1 1393 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
f3db4851 1394 head = &net->ipv6.fib_table_hash[h];
58f09b78 1395 hlist_for_each_entry_rcu(table, node, head, tb6_hlist) {
c71099ac 1396 write_lock_bh(&table->tb6_lock);
ec7d43c2
BT
1397 fib6_clean_tree(net, &table->tb6_root,
1398 func, prune, arg);
c71099ac
TG
1399 write_unlock_bh(&table->tb6_lock);
1400 }
1401 }
1b43af54 1402 rcu_read_unlock();
c71099ac
TG
1403}
1404
1da177e4
LT
1405static int fib6_prune_clone(struct rt6_info *rt, void *arg)
1406{
1407 if (rt->rt6i_flags & RTF_CACHE) {
1408 RT6_TRACE("pruning clone %p\n", rt);
1409 return -1;
1410 }
1411
1412 return 0;
1413}
1414
ec7d43c2
BT
1415static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
1416 struct rt6_info *rt)
1da177e4 1417{
ec7d43c2 1418 fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt);
1da177e4
LT
1419}
1420
1421/*
1422 * Garbage collection
1423 */
1424
1425static struct fib6_gc_args
1426{
1427 int timeout;
1428 int more;
1429} gc_args;
1430
1431static int fib6_age(struct rt6_info *rt, void *arg)
1432{
1433 unsigned long now = jiffies;
1434
1435 /*
1436 * check addrconf expiration here.
1437 * Routes are expired even if they are in use.
1438 *
1439 * Also age clones. Note, that clones are aged out
1440 * only if they are not in use now.
1441 */
1442
1443 if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) {
1444 if (time_after(now, rt->rt6i_expires)) {
1445 RT6_TRACE("expiring %p\n", rt);
1da177e4
LT
1446 return -1;
1447 }
1448 gc_args.more++;
1449 } else if (rt->rt6i_flags & RTF_CACHE) {
1450 if (atomic_read(&rt->u.dst.__refcnt) == 0 &&
1451 time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) {
1452 RT6_TRACE("aging clone %p\n", rt);
1453 return -1;
1454 } else if ((rt->rt6i_flags & RTF_GATEWAY) &&
1455 (!(rt->rt6i_nexthop->flags & NTF_ROUTER))) {
1456 RT6_TRACE("purging route %p via non-router but gateway\n",
1457 rt);
1458 return -1;
1459 }
1460 gc_args.more++;
1461 }
1462
1463 return 0;
1464}
1465
1466static DEFINE_SPINLOCK(fib6_gc_lock);
1467
5b7c931d 1468void fib6_run_gc(unsigned long expires, struct net *net)
1da177e4 1469{
5b7c931d 1470 if (expires != ~0UL) {
1da177e4 1471 spin_lock_bh(&fib6_gc_lock);
5b7c931d
DL
1472 gc_args.timeout = expires ? (int)expires :
1473 net->ipv6.sysctl.ip6_rt_gc_interval;
1da177e4 1474 } else {
a76d7345 1475 if (!spin_trylock_bh(&fib6_gc_lock)) {
417f28bb 1476 mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
1da177e4
LT
1477 return;
1478 }
5b7c931d 1479 gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval;
1da177e4 1480 }
1da177e4 1481
3d0f24a7 1482 gc_args.more = icmp6_dst_gc();
f3db4851 1483
5b7c931d 1484 fib6_clean_all(net, fib6_age, 0, NULL);
1da177e4
LT
1485
1486 if (gc_args.more)
c8a45222
SH
1487 mod_timer(&net->ipv6.ip6_fib_timer,
1488 round_jiffies(jiffies
1489 + net->ipv6.sysctl.ip6_rt_gc_interval));
417f28bb
SH
1490 else
1491 del_timer(&net->ipv6.ip6_fib_timer);
1da177e4
LT
1492 spin_unlock_bh(&fib6_gc_lock);
1493}
1494
5b7c931d
DL
1495static void fib6_gc_timer_cb(unsigned long arg)
1496{
1497 fib6_run_gc(0, (struct net *)arg);
1498}
1499
2c8c1e72 1500static int __net_init fib6_net_init(struct net *net)
1da177e4 1501{
417f28bb 1502 setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net);
63152fc0 1503
c572872f
BT
1504 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
1505 if (!net->ipv6.rt6_stats)
1506 goto out_timer;
1507
a33bc5c1 1508 net->ipv6.fib_table_hash = kcalloc(FIB6_TABLE_HASHSZ,
75307c0f
SH
1509 sizeof(*net->ipv6.fib_table_hash),
1510 GFP_KERNEL);
58f09b78 1511 if (!net->ipv6.fib_table_hash)
c572872f 1512 goto out_rt6_stats;
e0b85590 1513
58f09b78
DL
1514 net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl),
1515 GFP_KERNEL);
1516 if (!net->ipv6.fib6_main_tbl)
e0b85590
DL
1517 goto out_fib_table_hash;
1518
58f09b78 1519 net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
8ed67789 1520 net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
58f09b78
DL
1521 net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
1522 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
e0b85590
DL
1523
1524#ifdef CONFIG_IPV6_MULTIPLE_TABLES
58f09b78
DL
1525 net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
1526 GFP_KERNEL);
1527 if (!net->ipv6.fib6_local_tbl)
e0b85590 1528 goto out_fib6_main_tbl;
58f09b78 1529 net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
8ed67789 1530 net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
58f09b78
DL
1531 net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
1532 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
e0b85590 1533#endif
58f09b78 1534 fib6_tables_init(net);
f845ab6b 1535
417f28bb 1536 return 0;
d63bddbe 1537
e0b85590 1538#ifdef CONFIG_IPV6_MULTIPLE_TABLES
e0b85590 1539out_fib6_main_tbl:
58f09b78 1540 kfree(net->ipv6.fib6_main_tbl);
e0b85590 1541#endif
e0b85590 1542out_fib_table_hash:
58f09b78 1543 kfree(net->ipv6.fib_table_hash);
c572872f
BT
1544out_rt6_stats:
1545 kfree(net->ipv6.rt6_stats);
63152fc0 1546out_timer:
417f28bb 1547 return -ENOMEM;
58f09b78
DL
1548 }
1549
1550static void fib6_net_exit(struct net *net)
1551{
8ed67789 1552 rt6_ifdown(net, NULL);
417f28bb
SH
1553 del_timer_sync(&net->ipv6.ip6_fib_timer);
1554
58f09b78
DL
1555#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1556 kfree(net->ipv6.fib6_local_tbl);
1557#endif
1558 kfree(net->ipv6.fib6_main_tbl);
1559 kfree(net->ipv6.fib_table_hash);
c572872f 1560 kfree(net->ipv6.rt6_stats);
58f09b78
DL
1561}
1562
1563static struct pernet_operations fib6_net_ops = {
1564 .init = fib6_net_init,
1565 .exit = fib6_net_exit,
1566};
1567
1568int __init fib6_init(void)
1569{
1570 int ret = -ENOMEM;
63152fc0 1571
58f09b78
DL
1572 fib6_node_kmem = kmem_cache_create("fib6_nodes",
1573 sizeof(struct fib6_node),
1574 0, SLAB_HWCACHE_ALIGN,
1575 NULL);
1576 if (!fib6_node_kmem)
1577 goto out;
1578
1579 ret = register_pernet_subsys(&fib6_net_ops);
1580 if (ret)
c572872f 1581 goto out_kmem_cache_create;
58f09b78
DL
1582
1583 ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib);
1584 if (ret)
1585 goto out_unregister_subsys;
1586out:
1587 return ret;
1588
1589out_unregister_subsys:
1590 unregister_pernet_subsys(&fib6_net_ops);
d63bddbe
DL
1591out_kmem_cache_create:
1592 kmem_cache_destroy(fib6_node_kmem);
1593 goto out;
1da177e4
LT
1594}
1595
1596void fib6_gc_cleanup(void)
1597{
58f09b78 1598 unregister_pernet_subsys(&fib6_net_ops);
1da177e4
LT
1599 kmem_cache_destroy(fib6_node_kmem);
1600}