[TG3]: Fix tg3_get_ringparam()
[linux-2.6-block.git] / net / ipv4 / fib_trie.c
CommitLineData
19baf839
RO
1/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
10 * Jens Laas <jens.laas@data.slu.se> Swedish University of
11 * Agricultural Sciences.
12 *
13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
15 * This work is based on the LPC-trie which is originally descibed in:
16 *
17 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
20 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
26 *
27 *
28 * Code from fib_hash has been reused which includes the following header:
29 *
30 *
31 * INET An implementation of the TCP/IP protocol suite for the LINUX
32 * operating system. INET is implemented using the BSD Socket
33 * interface as the means of communication with the user level.
34 *
35 * IPv4 FIB: lookup engine and maintenance routines.
36 *
37 *
38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
39 *
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
fd966255
RO
44 *
45 * Substantial contributions to this work comes from:
46 *
47 * David S. Miller, <davem@davemloft.net>
48 * Stephen Hemminger <shemminger@osdl.org>
49 * Paul E. McKenney <paulmck@us.ibm.com>
50 * Patrick McHardy <kaber@trash.net>
19baf839
RO
51 */
52
772cb712 53#define VERSION "0.404"
19baf839
RO
54
55#include <linux/config.h>
56#include <asm/uaccess.h>
57#include <asm/system.h>
58#include <asm/bitops.h>
59#include <linux/types.h>
60#include <linux/kernel.h>
61#include <linux/sched.h>
62#include <linux/mm.h>
63#include <linux/string.h>
64#include <linux/socket.h>
65#include <linux/sockios.h>
66#include <linux/errno.h>
67#include <linux/in.h>
68#include <linux/inet.h>
cd8787ab 69#include <linux/inetdevice.h>
19baf839
RO
70#include <linux/netdevice.h>
71#include <linux/if_arp.h>
72#include <linux/proc_fs.h>
2373ce1c 73#include <linux/rcupdate.h>
19baf839
RO
74#include <linux/skbuff.h>
75#include <linux/netlink.h>
76#include <linux/init.h>
77#include <linux/list.h>
78#include <net/ip.h>
79#include <net/protocol.h>
80#include <net/route.h>
81#include <net/tcp.h>
82#include <net/sock.h>
83#include <net/ip_fib.h>
84#include "fib_lookup.h"
85
86#undef CONFIG_IP_FIB_TRIE_STATS
87#define MAX_CHILDS 16384
88
19baf839
RO
89#define KEYLENGTH (8*sizeof(t_key))
90#define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
91#define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
92
19baf839
RO
93typedef unsigned int t_key;
94
95#define T_TNODE 0
96#define T_LEAF 1
97#define NODE_TYPE_MASK 0x1UL
91b9a277 98#define NODE_PARENT(node) \
2373ce1c
RO
99 ((struct tnode *)rcu_dereference(((node)->parent & ~NODE_TYPE_MASK)))
100
101#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
102
103#define NODE_SET_PARENT(node, ptr) \
104 rcu_assign_pointer((node)->parent, \
105 ((unsigned long)(ptr)) | NODE_TYPE(node))
91b9a277
OJ
106
107#define IS_TNODE(n) (!(n->parent & T_LEAF))
108#define IS_LEAF(n) (n->parent & T_LEAF)
19baf839
RO
109
110struct node {
91b9a277
OJ
111 t_key key;
112 unsigned long parent;
19baf839
RO
113};
114
115struct leaf {
91b9a277
OJ
116 t_key key;
117 unsigned long parent;
19baf839 118 struct hlist_head list;
2373ce1c 119 struct rcu_head rcu;
19baf839
RO
120};
121
122struct leaf_info {
123 struct hlist_node hlist;
2373ce1c 124 struct rcu_head rcu;
19baf839
RO
125 int plen;
126 struct list_head falh;
127};
128
129struct tnode {
91b9a277
OJ
130 t_key key;
131 unsigned long parent;
132 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */
133 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */
134 unsigned short full_children; /* KEYLENGTH bits needed */
135 unsigned short empty_children; /* KEYLENGTH bits needed */
2373ce1c 136 struct rcu_head rcu;
91b9a277 137 struct node *child[0];
19baf839
RO
138};
139
140#ifdef CONFIG_IP_FIB_TRIE_STATS
141struct trie_use_stats {
142 unsigned int gets;
143 unsigned int backtrack;
144 unsigned int semantic_match_passed;
145 unsigned int semantic_match_miss;
146 unsigned int null_node_hit;
2f36895a 147 unsigned int resize_node_skipped;
19baf839
RO
148};
149#endif
150
151struct trie_stat {
152 unsigned int totdepth;
153 unsigned int maxdepth;
154 unsigned int tnodes;
155 unsigned int leaves;
156 unsigned int nullpointers;
157 unsigned int nodesizes[MAX_CHILDS];
c877efb2 158};
19baf839
RO
159
160struct trie {
91b9a277 161 struct node *trie;
19baf839
RO
162#ifdef CONFIG_IP_FIB_TRIE_STATS
163 struct trie_use_stats stats;
164#endif
91b9a277 165 int size;
19baf839
RO
166 unsigned int revision;
167};
168
19baf839
RO
169static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
170static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
19baf839 171static struct node *resize(struct trie *t, struct tnode *tn);
2f80b3c8
RO
172static struct tnode *inflate(struct trie *t, struct tnode *tn);
173static struct tnode *halve(struct trie *t, struct tnode *tn);
19baf839 174static void tnode_free(struct tnode *tn);
19baf839 175
ba89966c 176static kmem_cache_t *fn_alias_kmem __read_mostly;
19baf839
RO
177static struct trie *trie_local = NULL, *trie_main = NULL;
178
2373ce1c
RO
179
180/* rcu_read_lock needs to be hold by caller from readside */
181
c877efb2 182static inline struct node *tnode_get_child(struct tnode *tn, int i)
19baf839 183{
91b9a277 184 BUG_ON(i >= 1 << tn->bits);
19baf839 185
2373ce1c 186 return rcu_dereference(tn->child[i]);
19baf839
RO
187}
188
bb435b8d 189static inline int tnode_child_length(const struct tnode *tn)
19baf839 190{
91b9a277 191 return 1 << tn->bits;
19baf839
RO
192}
193
19baf839
RO
194static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
195{
91b9a277 196 if (offset < KEYLENGTH)
19baf839 197 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
91b9a277 198 else
19baf839
RO
199 return 0;
200}
201
202static inline int tkey_equals(t_key a, t_key b)
203{
c877efb2 204 return a == b;
19baf839
RO
205}
206
207static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
208{
c877efb2
SH
209 if (bits == 0 || offset >= KEYLENGTH)
210 return 1;
91b9a277
OJ
211 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
212 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
c877efb2 213}
19baf839
RO
214
215static inline int tkey_mismatch(t_key a, int offset, t_key b)
216{
217 t_key diff = a ^ b;
218 int i = offset;
219
c877efb2
SH
220 if (!diff)
221 return 0;
222 while ((diff << i) >> (KEYLENGTH-1) == 0)
19baf839
RO
223 i++;
224 return i;
225}
226
19baf839
RO
227/*
228 To understand this stuff, an understanding of keys and all their bits is
229 necessary. Every node in the trie has a key associated with it, but not
230 all of the bits in that key are significant.
231
232 Consider a node 'n' and its parent 'tp'.
233
234 If n is a leaf, every bit in its key is significant. Its presence is
772cb712 235 necessitated by path compression, since during a tree traversal (when
19baf839
RO
236 searching for a leaf - unless we are doing an insertion) we will completely
237 ignore all skipped bits we encounter. Thus we need to verify, at the end of
238 a potentially successful search, that we have indeed been walking the
239 correct key path.
240
241 Note that we can never "miss" the correct key in the tree if present by
242 following the wrong path. Path compression ensures that segments of the key
243 that are the same for all keys with a given prefix are skipped, but the
244 skipped part *is* identical for each node in the subtrie below the skipped
245 bit! trie_insert() in this implementation takes care of that - note the
246 call to tkey_sub_equals() in trie_insert().
247
248 if n is an internal node - a 'tnode' here, the various parts of its key
249 have many different meanings.
250
251 Example:
252 _________________________________________________________________
253 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
254 -----------------------------------------------------------------
255 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
256
257 _________________________________________________________________
258 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
259 -----------------------------------------------------------------
260 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
261
262 tp->pos = 7
263 tp->bits = 3
264 n->pos = 15
91b9a277 265 n->bits = 4
19baf839
RO
266
267 First, let's just ignore the bits that come before the parent tp, that is
268 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
269 not use them for anything.
270
271 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
272 index into the parent's child array. That is, they will be used to find
273 'n' among tp's children.
274
275 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
276 for the node n.
277
278 All the bits we have seen so far are significant to the node n. The rest
279 of the bits are really not needed or indeed known in n->key.
280
281 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
282 n's child array, and will of course be different for each child.
283
c877efb2 284
19baf839
RO
285 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
286 at this point.
287
288*/
289
0c7770c7 290static inline void check_tnode(const struct tnode *tn)
19baf839 291{
0c7770c7 292 WARN_ON(tn && tn->pos+tn->bits > 32);
19baf839
RO
293}
294
295static int halve_threshold = 25;
296static int inflate_threshold = 50;
e6308be8
RO
297static int halve_threshold_root = 15;
298static int inflate_threshold_root = 25;
19baf839 299
2373ce1c
RO
300
301static void __alias_free_mem(struct rcu_head *head)
19baf839 302{
2373ce1c
RO
303 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
304 kmem_cache_free(fn_alias_kmem, fa);
19baf839
RO
305}
306
2373ce1c 307static inline void alias_free_mem_rcu(struct fib_alias *fa)
19baf839 308{
2373ce1c
RO
309 call_rcu(&fa->rcu, __alias_free_mem);
310}
91b9a277 311
2373ce1c
RO
312static void __leaf_free_rcu(struct rcu_head *head)
313{
314 kfree(container_of(head, struct leaf, rcu));
315}
91b9a277 316
2373ce1c
RO
317static inline void free_leaf(struct leaf *leaf)
318{
319 call_rcu(&leaf->rcu, __leaf_free_rcu);
19baf839
RO
320}
321
2373ce1c 322static void __leaf_info_free_rcu(struct rcu_head *head)
19baf839 323{
2373ce1c 324 kfree(container_of(head, struct leaf_info, rcu));
19baf839
RO
325}
326
2373ce1c 327static inline void free_leaf_info(struct leaf_info *leaf)
19baf839 328{
2373ce1c 329 call_rcu(&leaf->rcu, __leaf_info_free_rcu);
19baf839
RO
330}
331
f0e36f8c
PM
332static struct tnode *tnode_alloc(unsigned int size)
333{
2373ce1c
RO
334 struct page *pages;
335
336 if (size <= PAGE_SIZE)
337 return kcalloc(size, 1, GFP_KERNEL);
338
339 pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size));
340 if (!pages)
341 return NULL;
342
343 return page_address(pages);
f0e36f8c
PM
344}
345
2373ce1c 346static void __tnode_free_rcu(struct rcu_head *head)
f0e36f8c 347{
2373ce1c 348 struct tnode *tn = container_of(head, struct tnode, rcu);
f0e36f8c 349 unsigned int size = sizeof(struct tnode) +
2373ce1c 350 (1 << tn->bits) * sizeof(struct node *);
f0e36f8c
PM
351
352 if (size <= PAGE_SIZE)
353 kfree(tn);
354 else
355 free_pages((unsigned long)tn, get_order(size));
356}
357
2373ce1c
RO
358static inline void tnode_free(struct tnode *tn)
359{
360 call_rcu(&tn->rcu, __tnode_free_rcu);
361}
362
363static struct leaf *leaf_new(void)
364{
365 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL);
366 if (l) {
367 l->parent = T_LEAF;
368 INIT_HLIST_HEAD(&l->list);
369 }
370 return l;
371}
372
373static struct leaf_info *leaf_info_new(int plen)
374{
375 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
376 if (li) {
377 li->plen = plen;
378 INIT_LIST_HEAD(&li->falh);
379 }
380 return li;
381}
382
19baf839
RO
383static struct tnode* tnode_new(t_key key, int pos, int bits)
384{
385 int nchildren = 1<<bits;
386 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
f0e36f8c 387 struct tnode *tn = tnode_alloc(sz);
19baf839 388
91b9a277 389 if (tn) {
19baf839 390 memset(tn, 0, sz);
2373ce1c 391 tn->parent = T_TNODE;
19baf839
RO
392 tn->pos = pos;
393 tn->bits = bits;
394 tn->key = key;
395 tn->full_children = 0;
396 tn->empty_children = 1<<bits;
397 }
c877efb2 398
0c7770c7
SH
399 pr_debug("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
400 (unsigned int) (sizeof(struct node) * 1<<bits));
19baf839
RO
401 return tn;
402}
403
19baf839
RO
404/*
405 * Check whether a tnode 'n' is "full", i.e. it is an internal node
406 * and no bits are skipped. See discussion in dyntree paper p. 6
407 */
408
bb435b8d 409static inline int tnode_full(const struct tnode *tn, const struct node *n)
19baf839 410{
c877efb2 411 if (n == NULL || IS_LEAF(n))
19baf839
RO
412 return 0;
413
414 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
415}
416
c877efb2 417static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
19baf839
RO
418{
419 tnode_put_child_reorg(tn, i, n, -1);
420}
421
c877efb2 422 /*
19baf839
RO
423 * Add a child at position i overwriting the old value.
424 * Update the value of full_children and empty_children.
425 */
426
c877efb2 427static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
19baf839 428{
2373ce1c 429 struct node *chi = tn->child[i];
19baf839
RO
430 int isfull;
431
0c7770c7
SH
432 BUG_ON(i >= 1<<tn->bits);
433
19baf839
RO
434
435 /* update emptyChildren */
436 if (n == NULL && chi != NULL)
437 tn->empty_children++;
438 else if (n != NULL && chi == NULL)
439 tn->empty_children--;
c877efb2 440
19baf839 441 /* update fullChildren */
91b9a277 442 if (wasfull == -1)
19baf839
RO
443 wasfull = tnode_full(tn, chi);
444
445 isfull = tnode_full(tn, n);
c877efb2 446 if (wasfull && !isfull)
19baf839 447 tn->full_children--;
c877efb2 448 else if (!wasfull && isfull)
19baf839 449 tn->full_children++;
91b9a277 450
c877efb2
SH
451 if (n)
452 NODE_SET_PARENT(n, tn);
19baf839 453
2373ce1c 454 rcu_assign_pointer(tn->child[i], n);
19baf839
RO
455}
456
c877efb2 457static struct node *resize(struct trie *t, struct tnode *tn)
19baf839
RO
458{
459 int i;
2f36895a 460 int err = 0;
2f80b3c8 461 struct tnode *old_tn;
e6308be8
RO
462 int inflate_threshold_use;
463 int halve_threshold_use;
19baf839
RO
464
465 if (!tn)
466 return NULL;
467
0c7770c7
SH
468 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
469 tn, inflate_threshold, halve_threshold);
19baf839
RO
470
471 /* No children */
472 if (tn->empty_children == tnode_child_length(tn)) {
473 tnode_free(tn);
474 return NULL;
475 }
476 /* One child */
477 if (tn->empty_children == tnode_child_length(tn) - 1)
478 for (i = 0; i < tnode_child_length(tn); i++) {
91b9a277 479 struct node *n;
19baf839 480
91b9a277 481 n = tn->child[i];
2373ce1c 482 if (!n)
91b9a277 483 continue;
91b9a277
OJ
484
485 /* compress one level */
2373ce1c 486 NODE_SET_PARENT(n, NULL);
91b9a277
OJ
487 tnode_free(tn);
488 return n;
19baf839 489 }
c877efb2 490 /*
19baf839
RO
491 * Double as long as the resulting node has a number of
492 * nonempty nodes that are above the threshold.
493 */
494
495 /*
c877efb2
SH
496 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
497 * the Helsinki University of Technology and Matti Tikkanen of Nokia
19baf839 498 * Telecommunications, page 6:
c877efb2 499 * "A node is doubled if the ratio of non-empty children to all
19baf839
RO
500 * children in the *doubled* node is at least 'high'."
501 *
c877efb2
SH
502 * 'high' in this instance is the variable 'inflate_threshold'. It
503 * is expressed as a percentage, so we multiply it with
504 * tnode_child_length() and instead of multiplying by 2 (since the
505 * child array will be doubled by inflate()) and multiplying
506 * the left-hand side by 100 (to handle the percentage thing) we
19baf839 507 * multiply the left-hand side by 50.
c877efb2
SH
508 *
509 * The left-hand side may look a bit weird: tnode_child_length(tn)
510 * - tn->empty_children is of course the number of non-null children
511 * in the current node. tn->full_children is the number of "full"
19baf839 512 * children, that is non-null tnodes with a skip value of 0.
c877efb2 513 * All of those will be doubled in the resulting inflated tnode, so
19baf839 514 * we just count them one extra time here.
c877efb2 515 *
19baf839 516 * A clearer way to write this would be:
c877efb2 517 *
19baf839 518 * to_be_doubled = tn->full_children;
c877efb2 519 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
19baf839
RO
520 * tn->full_children;
521 *
522 * new_child_length = tnode_child_length(tn) * 2;
523 *
c877efb2 524 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
19baf839
RO
525 * new_child_length;
526 * if (new_fill_factor >= inflate_threshold)
c877efb2
SH
527 *
528 * ...and so on, tho it would mess up the while () loop.
529 *
19baf839
RO
530 * anyway,
531 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
532 * inflate_threshold
c877efb2 533 *
19baf839
RO
534 * avoid a division:
535 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
536 * inflate_threshold * new_child_length
c877efb2 537 *
19baf839 538 * expand not_to_be_doubled and to_be_doubled, and shorten:
c877efb2 539 * 100 * (tnode_child_length(tn) - tn->empty_children +
91b9a277 540 * tn->full_children) >= inflate_threshold * new_child_length
c877efb2 541 *
19baf839 542 * expand new_child_length:
c877efb2 543 * 100 * (tnode_child_length(tn) - tn->empty_children +
91b9a277 544 * tn->full_children) >=
19baf839 545 * inflate_threshold * tnode_child_length(tn) * 2
c877efb2 546 *
19baf839 547 * shorten again:
c877efb2 548 * 50 * (tn->full_children + tnode_child_length(tn) -
91b9a277 549 * tn->empty_children) >= inflate_threshold *
19baf839 550 * tnode_child_length(tn)
c877efb2 551 *
19baf839
RO
552 */
553
554 check_tnode(tn);
c877efb2 555
e6308be8
RO
556 /* Keep root node larger */
557
558 if(!tn->parent)
559 inflate_threshold_use = inflate_threshold_root;
560 else
561 inflate_threshold_use = inflate_threshold;
562
2f36895a 563 err = 0;
19baf839
RO
564 while ((tn->full_children > 0 &&
565 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
e6308be8 566 inflate_threshold_use * tnode_child_length(tn))) {
19baf839 567
2f80b3c8
RO
568 old_tn = tn;
569 tn = inflate(t, tn);
570 if (IS_ERR(tn)) {
571 tn = old_tn;
2f36895a
RO
572#ifdef CONFIG_IP_FIB_TRIE_STATS
573 t->stats.resize_node_skipped++;
574#endif
575 break;
576 }
19baf839
RO
577 }
578
579 check_tnode(tn);
580
581 /*
582 * Halve as long as the number of empty children in this
583 * node is above threshold.
584 */
2f36895a 585
e6308be8
RO
586
587 /* Keep root node larger */
588
589 if(!tn->parent)
590 halve_threshold_use = halve_threshold_root;
591 else
592 halve_threshold_use = halve_threshold;
593
2f36895a 594 err = 0;
19baf839
RO
595 while (tn->bits > 1 &&
596 100 * (tnode_child_length(tn) - tn->empty_children) <
e6308be8 597 halve_threshold_use * tnode_child_length(tn)) {
2f36895a 598
2f80b3c8
RO
599 old_tn = tn;
600 tn = halve(t, tn);
601 if (IS_ERR(tn)) {
602 tn = old_tn;
2f36895a
RO
603#ifdef CONFIG_IP_FIB_TRIE_STATS
604 t->stats.resize_node_skipped++;
605#endif
606 break;
607 }
608 }
19baf839 609
c877efb2 610
19baf839 611 /* Only one child remains */
19baf839
RO
612 if (tn->empty_children == tnode_child_length(tn) - 1)
613 for (i = 0; i < tnode_child_length(tn); i++) {
91b9a277 614 struct node *n;
19baf839 615
91b9a277 616 n = tn->child[i];
2373ce1c 617 if (!n)
91b9a277 618 continue;
91b9a277
OJ
619
620 /* compress one level */
621
2373ce1c 622 NODE_SET_PARENT(n, NULL);
91b9a277
OJ
623 tnode_free(tn);
624 return n;
19baf839
RO
625 }
626
627 return (struct node *) tn;
628}
629
2f80b3c8 630static struct tnode *inflate(struct trie *t, struct tnode *tn)
19baf839
RO
631{
632 struct tnode *inode;
633 struct tnode *oldtnode = tn;
634 int olen = tnode_child_length(tn);
635 int i;
636
0c7770c7 637 pr_debug("In inflate\n");
19baf839
RO
638
639 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
640
0c7770c7 641 if (!tn)
2f80b3c8 642 return ERR_PTR(-ENOMEM);
2f36895a
RO
643
644 /*
c877efb2
SH
645 * Preallocate and store tnodes before the actual work so we
646 * don't get into an inconsistent state if memory allocation
647 * fails. In case of failure we return the oldnode and inflate
2f36895a
RO
648 * of tnode is ignored.
649 */
91b9a277
OJ
650
651 for (i = 0; i < olen; i++) {
2f36895a
RO
652 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
653
654 if (inode &&
655 IS_TNODE(inode) &&
656 inode->pos == oldtnode->pos + oldtnode->bits &&
657 inode->bits > 1) {
658 struct tnode *left, *right;
2f36895a 659 t_key m = TKEY_GET_MASK(inode->pos, 1);
c877efb2 660
2f36895a
RO
661 left = tnode_new(inode->key&(~m), inode->pos + 1,
662 inode->bits - 1);
2f80b3c8
RO
663 if (!left)
664 goto nomem;
91b9a277 665
2f36895a
RO
666 right = tnode_new(inode->key|m, inode->pos + 1,
667 inode->bits - 1);
668
2f80b3c8
RO
669 if (!right) {
670 tnode_free(left);
671 goto nomem;
672 }
2f36895a
RO
673
674 put_child(t, tn, 2*i, (struct node *) left);
675 put_child(t, tn, 2*i+1, (struct node *) right);
676 }
677 }
678
91b9a277 679 for (i = 0; i < olen; i++) {
19baf839 680 struct node *node = tnode_get_child(oldtnode, i);
91b9a277
OJ
681 struct tnode *left, *right;
682 int size, j;
c877efb2 683
19baf839
RO
684 /* An empty child */
685 if (node == NULL)
686 continue;
687
688 /* A leaf or an internal node with skipped bits */
689
c877efb2 690 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
19baf839 691 tn->pos + tn->bits - 1) {
c877efb2 692 if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
19baf839
RO
693 1) == 0)
694 put_child(t, tn, 2*i, node);
695 else
696 put_child(t, tn, 2*i+1, node);
697 continue;
698 }
699
700 /* An internal node with two children */
701 inode = (struct tnode *) node;
702
703 if (inode->bits == 1) {
704 put_child(t, tn, 2*i, inode->child[0]);
705 put_child(t, tn, 2*i+1, inode->child[1]);
706
707 tnode_free(inode);
91b9a277 708 continue;
19baf839
RO
709 }
710
91b9a277
OJ
711 /* An internal node with more than two children */
712
713 /* We will replace this node 'inode' with two new
714 * ones, 'left' and 'right', each with half of the
715 * original children. The two new nodes will have
716 * a position one bit further down the key and this
717 * means that the "significant" part of their keys
718 * (see the discussion near the top of this file)
719 * will differ by one bit, which will be "0" in
720 * left's key and "1" in right's key. Since we are
721 * moving the key position by one step, the bit that
722 * we are moving away from - the bit at position
723 * (inode->pos) - is the one that will differ between
724 * left and right. So... we synthesize that bit in the
725 * two new keys.
726 * The mask 'm' below will be a single "one" bit at
727 * the position (inode->pos)
728 */
19baf839 729
91b9a277
OJ
730 /* Use the old key, but set the new significant
731 * bit to zero.
732 */
2f36895a 733
91b9a277
OJ
734 left = (struct tnode *) tnode_get_child(tn, 2*i);
735 put_child(t, tn, 2*i, NULL);
2f36895a 736
91b9a277 737 BUG_ON(!left);
2f36895a 738
91b9a277
OJ
739 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
740 put_child(t, tn, 2*i+1, NULL);
19baf839 741
91b9a277 742 BUG_ON(!right);
19baf839 743
91b9a277
OJ
744 size = tnode_child_length(left);
745 for (j = 0; j < size; j++) {
746 put_child(t, left, j, inode->child[j]);
747 put_child(t, right, j, inode->child[j + size]);
19baf839 748 }
91b9a277
OJ
749 put_child(t, tn, 2*i, resize(t, left));
750 put_child(t, tn, 2*i+1, resize(t, right));
751
752 tnode_free(inode);
19baf839
RO
753 }
754 tnode_free(oldtnode);
755 return tn;
2f80b3c8
RO
756nomem:
757 {
758 int size = tnode_child_length(tn);
759 int j;
760
0c7770c7 761 for (j = 0; j < size; j++)
2f80b3c8
RO
762 if (tn->child[j])
763 tnode_free((struct tnode *)tn->child[j]);
764
765 tnode_free(tn);
0c7770c7 766
2f80b3c8
RO
767 return ERR_PTR(-ENOMEM);
768 }
19baf839
RO
769}
770
2f80b3c8 771static struct tnode *halve(struct trie *t, struct tnode *tn)
19baf839
RO
772{
773 struct tnode *oldtnode = tn;
774 struct node *left, *right;
775 int i;
776 int olen = tnode_child_length(tn);
777
0c7770c7 778 pr_debug("In halve\n");
c877efb2
SH
779
780 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
19baf839 781
2f80b3c8
RO
782 if (!tn)
783 return ERR_PTR(-ENOMEM);
2f36895a
RO
784
785 /*
c877efb2
SH
786 * Preallocate and store tnodes before the actual work so we
787 * don't get into an inconsistent state if memory allocation
788 * fails. In case of failure we return the oldnode and halve
2f36895a
RO
789 * of tnode is ignored.
790 */
791
91b9a277 792 for (i = 0; i < olen; i += 2) {
2f36895a
RO
793 left = tnode_get_child(oldtnode, i);
794 right = tnode_get_child(oldtnode, i+1);
c877efb2 795
2f36895a 796 /* Two nonempty children */
0c7770c7 797 if (left && right) {
2f80b3c8 798 struct tnode *newn;
0c7770c7 799
2f80b3c8 800 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
0c7770c7
SH
801
802 if (!newn)
2f80b3c8 803 goto nomem;
0c7770c7 804
2f80b3c8 805 put_child(t, tn, i/2, (struct node *)newn);
2f36895a 806 }
2f36895a 807
2f36895a 808 }
19baf839 809
91b9a277
OJ
810 for (i = 0; i < olen; i += 2) {
811 struct tnode *newBinNode;
812
19baf839
RO
813 left = tnode_get_child(oldtnode, i);
814 right = tnode_get_child(oldtnode, i+1);
c877efb2 815
19baf839
RO
816 /* At least one of the children is empty */
817 if (left == NULL) {
818 if (right == NULL) /* Both are empty */
819 continue;
820 put_child(t, tn, i/2, right);
91b9a277 821 continue;
0c7770c7 822 }
91b9a277
OJ
823
824 if (right == NULL) {
19baf839 825 put_child(t, tn, i/2, left);
91b9a277
OJ
826 continue;
827 }
c877efb2 828
19baf839 829 /* Two nonempty children */
91b9a277
OJ
830 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
831 put_child(t, tn, i/2, NULL);
91b9a277
OJ
832 put_child(t, newBinNode, 0, left);
833 put_child(t, newBinNode, 1, right);
834 put_child(t, tn, i/2, resize(t, newBinNode));
19baf839
RO
835 }
836 tnode_free(oldtnode);
837 return tn;
2f80b3c8
RO
838nomem:
839 {
840 int size = tnode_child_length(tn);
841 int j;
842
0c7770c7 843 for (j = 0; j < size; j++)
2f80b3c8
RO
844 if (tn->child[j])
845 tnode_free((struct tnode *)tn->child[j]);
846
847 tnode_free(tn);
0c7770c7 848
2f80b3c8
RO
849 return ERR_PTR(-ENOMEM);
850 }
19baf839
RO
851}
852
91b9a277 853static void trie_init(struct trie *t)
19baf839 854{
91b9a277
OJ
855 if (!t)
856 return;
857
858 t->size = 0;
2373ce1c 859 rcu_assign_pointer(t->trie, NULL);
91b9a277 860 t->revision = 0;
19baf839 861#ifdef CONFIG_IP_FIB_TRIE_STATS
91b9a277 862 memset(&t->stats, 0, sizeof(struct trie_use_stats));
19baf839 863#endif
19baf839
RO
864}
865
772cb712 866/* readside must use rcu_read_lock currently dump routines
2373ce1c
RO
867 via get_fa_head and dump */
868
772cb712 869static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
19baf839 870{
772cb712 871 struct hlist_head *head = &l->list;
19baf839
RO
872 struct hlist_node *node;
873 struct leaf_info *li;
874
2373ce1c 875 hlist_for_each_entry_rcu(li, node, head, hlist)
c877efb2 876 if (li->plen == plen)
19baf839 877 return li;
91b9a277 878
19baf839
RO
879 return NULL;
880}
881
882static inline struct list_head * get_fa_head(struct leaf *l, int plen)
883{
772cb712 884 struct leaf_info *li = find_leaf_info(l, plen);
c877efb2 885
91b9a277
OJ
886 if (!li)
887 return NULL;
c877efb2 888
91b9a277 889 return &li->falh;
19baf839
RO
890}
891
892static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
893{
2373ce1c
RO
894 struct leaf_info *li = NULL, *last = NULL;
895 struct hlist_node *node;
896
897 if (hlist_empty(head)) {
898 hlist_add_head_rcu(&new->hlist, head);
899 } else {
900 hlist_for_each_entry(li, node, head, hlist) {
901 if (new->plen > li->plen)
902 break;
903
904 last = li;
905 }
906 if (last)
907 hlist_add_after_rcu(&last->hlist, &new->hlist);
908 else
909 hlist_add_before_rcu(&new->hlist, &li->hlist);
910 }
19baf839
RO
911}
912
2373ce1c
RO
913/* rcu_read_lock needs to be hold by caller from readside */
914
19baf839
RO
915static struct leaf *
916fib_find_node(struct trie *t, u32 key)
917{
918 int pos;
919 struct tnode *tn;
920 struct node *n;
921
922 pos = 0;
2373ce1c 923 n = rcu_dereference(t->trie);
19baf839
RO
924
925 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
926 tn = (struct tnode *) n;
91b9a277 927
19baf839 928 check_tnode(tn);
91b9a277 929
c877efb2 930 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
91b9a277 931 pos = tn->pos + tn->bits;
19baf839 932 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
91b9a277 933 } else
19baf839
RO
934 break;
935 }
936 /* Case we have found a leaf. Compare prefixes */
937
91b9a277
OJ
938 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
939 return (struct leaf *)n;
940
19baf839
RO
941 return NULL;
942}
943
944static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
945{
19baf839
RO
946 int wasfull;
947 t_key cindex, key;
948 struct tnode *tp = NULL;
949
19baf839 950 key = tn->key;
19baf839
RO
951
952 while (tn != NULL && NODE_PARENT(tn) != NULL) {
19baf839
RO
953
954 tp = NODE_PARENT(tn);
955 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
956 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
957 tn = (struct tnode *) resize (t, (struct tnode *)tn);
958 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
91b9a277 959
c877efb2 960 if (!NODE_PARENT(tn))
19baf839
RO
961 break;
962
963 tn = NODE_PARENT(tn);
964 }
965 /* Handle last (top) tnode */
c877efb2 966 if (IS_TNODE(tn))
19baf839
RO
967 tn = (struct tnode*) resize(t, (struct tnode *)tn);
968
969 return (struct node*) tn;
970}
971
2373ce1c
RO
972/* only used from updater-side */
973
f835e471
RO
974static struct list_head *
975fib_insert_node(struct trie *t, int *err, u32 key, int plen)
19baf839
RO
976{
977 int pos, newpos;
978 struct tnode *tp = NULL, *tn = NULL;
979 struct node *n;
980 struct leaf *l;
981 int missbit;
c877efb2 982 struct list_head *fa_head = NULL;
19baf839
RO
983 struct leaf_info *li;
984 t_key cindex;
985
986 pos = 0;
c877efb2 987 n = t->trie;
19baf839 988
c877efb2
SH
989 /* If we point to NULL, stop. Either the tree is empty and we should
990 * just put a new leaf in if, or we have reached an empty child slot,
19baf839 991 * and we should just put our new leaf in that.
c877efb2
SH
992 * If we point to a T_TNODE, check if it matches our key. Note that
993 * a T_TNODE might be skipping any number of bits - its 'pos' need
19baf839
RO
994 * not be the parent's 'pos'+'bits'!
995 *
c877efb2 996 * If it does match the current key, get pos/bits from it, extract
19baf839
RO
997 * the index from our key, push the T_TNODE and walk the tree.
998 *
999 * If it doesn't, we have to replace it with a new T_TNODE.
1000 *
c877efb2
SH
1001 * If we point to a T_LEAF, it might or might not have the same key
1002 * as we do. If it does, just change the value, update the T_LEAF's
1003 * value, and return it.
19baf839
RO
1004 * If it doesn't, we need to replace it with a T_TNODE.
1005 */
1006
1007 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1008 tn = (struct tnode *) n;
91b9a277 1009
c877efb2 1010 check_tnode(tn);
91b9a277 1011
c877efb2 1012 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
19baf839 1013 tp = tn;
91b9a277 1014 pos = tn->pos + tn->bits;
19baf839
RO
1015 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
1016
0c7770c7 1017 BUG_ON(n && NODE_PARENT(n) != tn);
91b9a277 1018 } else
19baf839
RO
1019 break;
1020 }
1021
1022 /*
1023 * n ----> NULL, LEAF or TNODE
1024 *
c877efb2 1025 * tp is n's (parent) ----> NULL or TNODE
19baf839
RO
1026 */
1027
91b9a277 1028 BUG_ON(tp && IS_LEAF(tp));
19baf839
RO
1029
1030 /* Case 1: n is a leaf. Compare prefixes */
1031
c877efb2 1032 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
91b9a277
OJ
1033 struct leaf *l = (struct leaf *) n;
1034
19baf839 1035 li = leaf_info_new(plen);
91b9a277 1036
c877efb2 1037 if (!li) {
f835e471
RO
1038 *err = -ENOMEM;
1039 goto err;
1040 }
19baf839
RO
1041
1042 fa_head = &li->falh;
1043 insert_leaf_info(&l->list, li);
1044 goto done;
1045 }
1046 t->size++;
1047 l = leaf_new();
1048
c877efb2 1049 if (!l) {
f835e471
RO
1050 *err = -ENOMEM;
1051 goto err;
1052 }
19baf839
RO
1053
1054 l->key = key;
1055 li = leaf_info_new(plen);
1056
c877efb2 1057 if (!li) {
f835e471
RO
1058 tnode_free((struct tnode *) l);
1059 *err = -ENOMEM;
1060 goto err;
1061 }
19baf839
RO
1062
1063 fa_head = &li->falh;
1064 insert_leaf_info(&l->list, li);
1065
19baf839 1066 if (t->trie && n == NULL) {
91b9a277 1067 /* Case 2: n is NULL, and will just insert a new leaf */
19baf839
RO
1068
1069 NODE_SET_PARENT(l, tp);
19baf839 1070
91b9a277
OJ
1071 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1072 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1073 } else {
1074 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
c877efb2
SH
1075 /*
1076 * Add a new tnode here
19baf839
RO
1077 * first tnode need some special handling
1078 */
1079
1080 if (tp)
91b9a277 1081 pos = tp->pos+tp->bits;
19baf839 1082 else
91b9a277
OJ
1083 pos = 0;
1084
c877efb2 1085 if (n) {
19baf839
RO
1086 newpos = tkey_mismatch(key, pos, n->key);
1087 tn = tnode_new(n->key, newpos, 1);
91b9a277 1088 } else {
19baf839 1089 newpos = 0;
c877efb2 1090 tn = tnode_new(key, newpos, 1); /* First tnode */
19baf839 1091 }
19baf839 1092
c877efb2 1093 if (!tn) {
f835e471
RO
1094 free_leaf_info(li);
1095 tnode_free((struct tnode *) l);
1096 *err = -ENOMEM;
1097 goto err;
91b9a277
OJ
1098 }
1099
19baf839
RO
1100 NODE_SET_PARENT(tn, tp);
1101
91b9a277 1102 missbit = tkey_extract_bits(key, newpos, 1);
19baf839
RO
1103 put_child(t, tn, missbit, (struct node *)l);
1104 put_child(t, tn, 1-missbit, n);
1105
c877efb2 1106 if (tp) {
19baf839
RO
1107 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1108 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
91b9a277 1109 } else {
2373ce1c 1110 rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */
19baf839
RO
1111 tp = tn;
1112 }
1113 }
91b9a277
OJ
1114
1115 if (tp && tp->pos + tp->bits > 32)
78c6671a 1116 printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
19baf839 1117 tp, tp->pos, tp->bits, key, plen);
91b9a277 1118
19baf839 1119 /* Rebalance the trie */
2373ce1c
RO
1120
1121 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
f835e471
RO
1122done:
1123 t->revision++;
91b9a277 1124err:
19baf839
RO
1125 return fa_head;
1126}
1127
1128static int
1129fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1130 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1131{
1132 struct trie *t = (struct trie *) tb->tb_data;
1133 struct fib_alias *fa, *new_fa;
c877efb2 1134 struct list_head *fa_head = NULL;
19baf839
RO
1135 struct fib_info *fi;
1136 int plen = r->rtm_dst_len;
1137 int type = r->rtm_type;
1138 u8 tos = r->rtm_tos;
1139 u32 key, mask;
1140 int err;
1141 struct leaf *l;
1142
1143 if (plen > 32)
1144 return -EINVAL;
1145
1146 key = 0;
c877efb2 1147 if (rta->rta_dst)
19baf839
RO
1148 memcpy(&key, rta->rta_dst, 4);
1149
1150 key = ntohl(key);
1151
0c7770c7 1152 pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
19baf839 1153
91b9a277 1154 mask = ntohl(inet_make_mask(plen));
19baf839 1155
c877efb2 1156 if (key & ~mask)
19baf839
RO
1157 return -EINVAL;
1158
1159 key = key & mask;
1160
91b9a277
OJ
1161 fi = fib_create_info(r, rta, nlhdr, &err);
1162
1163 if (!fi)
19baf839
RO
1164 goto err;
1165
1166 l = fib_find_node(t, key);
c877efb2 1167 fa = NULL;
19baf839 1168
c877efb2 1169 if (l) {
19baf839
RO
1170 fa_head = get_fa_head(l, plen);
1171 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1172 }
1173
1174 /* Now fa, if non-NULL, points to the first fib alias
1175 * with the same keys [prefix,tos,priority], if such key already
1176 * exists or to the node before which we will insert new one.
1177 *
1178 * If fa is NULL, we will need to allocate a new one and
1179 * insert to the head of f.
1180 *
1181 * If f is NULL, no fib node matched the destination key
1182 * and we need to allocate a new one of those as well.
1183 */
1184
91b9a277 1185 if (fa && fa->fa_info->fib_priority == fi->fib_priority) {
19baf839
RO
1186 struct fib_alias *fa_orig;
1187
1188 err = -EEXIST;
1189 if (nlhdr->nlmsg_flags & NLM_F_EXCL)
1190 goto out;
1191
1192 if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
1193 struct fib_info *fi_drop;
1194 u8 state;
1195
2373ce1c
RO
1196 err = -ENOBUFS;
1197 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
1198 if (new_fa == NULL)
1199 goto out;
19baf839
RO
1200
1201 fi_drop = fa->fa_info;
2373ce1c
RO
1202 new_fa->fa_tos = fa->fa_tos;
1203 new_fa->fa_info = fi;
1204 new_fa->fa_type = type;
1205 new_fa->fa_scope = r->rtm_scope;
19baf839 1206 state = fa->fa_state;
2373ce1c 1207 new_fa->fa_state &= ~FA_S_ACCESSED;
19baf839 1208
2373ce1c
RO
1209 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1210 alias_free_mem_rcu(fa);
19baf839
RO
1211
1212 fib_release_info(fi_drop);
1213 if (state & FA_S_ACCESSED)
91b9a277 1214 rt_cache_flush(-1);
19baf839 1215
91b9a277 1216 goto succeeded;
19baf839
RO
1217 }
1218 /* Error if we find a perfect match which
1219 * uses the same scope, type, and nexthop
1220 * information.
1221 */
1222 fa_orig = fa;
1223 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
1224 if (fa->fa_tos != tos)
1225 break;
1226 if (fa->fa_info->fib_priority != fi->fib_priority)
1227 break;
1228 if (fa->fa_type == type &&
1229 fa->fa_scope == r->rtm_scope &&
1230 fa->fa_info == fi) {
1231 goto out;
1232 }
1233 }
1234 if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
1235 fa = fa_orig;
1236 }
1237 err = -ENOENT;
91b9a277 1238 if (!(nlhdr->nlmsg_flags & NLM_F_CREATE))
19baf839
RO
1239 goto out;
1240
1241 err = -ENOBUFS;
1242 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
1243 if (new_fa == NULL)
1244 goto out;
1245
1246 new_fa->fa_info = fi;
1247 new_fa->fa_tos = tos;
1248 new_fa->fa_type = type;
1249 new_fa->fa_scope = r->rtm_scope;
1250 new_fa->fa_state = 0;
19baf839
RO
1251 /*
1252 * Insert new entry to the list.
1253 */
1254
c877efb2 1255 if (!fa_head) {
f835e471
RO
1256 fa_head = fib_insert_node(t, &err, key, plen);
1257 err = 0;
c877efb2 1258 if (err)
f835e471
RO
1259 goto out_free_new_fa;
1260 }
19baf839 1261
2373ce1c
RO
1262 list_add_tail_rcu(&new_fa->fa_list,
1263 (fa ? &fa->fa_list : fa_head));
19baf839
RO
1264
1265 rt_cache_flush(-1);
1266 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
1267succeeded:
1268 return 0;
f835e471
RO
1269
1270out_free_new_fa:
1271 kmem_cache_free(fn_alias_kmem, new_fa);
19baf839
RO
1272out:
1273 fib_release_info(fi);
91b9a277 1274err:
19baf839
RO
1275 return err;
1276}
1277
2373ce1c 1278
772cb712 1279/* should be called with rcu_read_lock */
0c7770c7
SH
1280static inline int check_leaf(struct trie *t, struct leaf *l,
1281 t_key key, int *plen, const struct flowi *flp,
06c74270 1282 struct fib_result *res)
19baf839 1283{
06c74270 1284 int err, i;
19baf839
RO
1285 t_key mask;
1286 struct leaf_info *li;
1287 struct hlist_head *hhead = &l->list;
1288 struct hlist_node *node;
c877efb2 1289
2373ce1c 1290 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
19baf839
RO
1291 i = li->plen;
1292 mask = ntohl(inet_make_mask(i));
c877efb2 1293 if (l->key != (key & mask))
19baf839
RO
1294 continue;
1295
06c74270 1296 if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) {
19baf839
RO
1297 *plen = i;
1298#ifdef CONFIG_IP_FIB_TRIE_STATS
1299 t->stats.semantic_match_passed++;
1300#endif
06c74270 1301 return err;
19baf839
RO
1302 }
1303#ifdef CONFIG_IP_FIB_TRIE_STATS
1304 t->stats.semantic_match_miss++;
1305#endif
1306 }
06c74270 1307 return 1;
19baf839
RO
1308}
1309
1310static int
1311fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1312{
1313 struct trie *t = (struct trie *) tb->tb_data;
1314 int plen, ret = 0;
1315 struct node *n;
1316 struct tnode *pn;
1317 int pos, bits;
91b9a277 1318 t_key key = ntohl(flp->fl4_dst);
19baf839
RO
1319 int chopped_off;
1320 t_key cindex = 0;
1321 int current_prefix_length = KEYLENGTH;
91b9a277
OJ
1322 struct tnode *cn;
1323 t_key node_prefix, key_prefix, pref_mismatch;
1324 int mp;
1325
2373ce1c 1326 rcu_read_lock();
91b9a277 1327
2373ce1c 1328 n = rcu_dereference(t->trie);
c877efb2 1329 if (!n)
19baf839
RO
1330 goto failed;
1331
1332#ifdef CONFIG_IP_FIB_TRIE_STATS
1333 t->stats.gets++;
1334#endif
1335
1336 /* Just a leaf? */
1337 if (IS_LEAF(n)) {
06c74270 1338 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
19baf839
RO
1339 goto found;
1340 goto failed;
1341 }
1342 pn = (struct tnode *) n;
1343 chopped_off = 0;
c877efb2 1344
91b9a277 1345 while (pn) {
19baf839
RO
1346 pos = pn->pos;
1347 bits = pn->bits;
1348
c877efb2 1349 if (!chopped_off)
19baf839
RO
1350 cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
1351
1352 n = tnode_get_child(pn, cindex);
1353
1354 if (n == NULL) {
1355#ifdef CONFIG_IP_FIB_TRIE_STATS
1356 t->stats.null_node_hit++;
1357#endif
1358 goto backtrace;
1359 }
1360
91b9a277
OJ
1361 if (IS_LEAF(n)) {
1362 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
1363 goto found;
1364 else
1365 goto backtrace;
1366 }
1367
19baf839
RO
1368#define HL_OPTIMIZE
1369#ifdef HL_OPTIMIZE
91b9a277 1370 cn = (struct tnode *)n;
19baf839 1371
91b9a277
OJ
1372 /*
1373 * It's a tnode, and we can do some extra checks here if we
1374 * like, to avoid descending into a dead-end branch.
1375 * This tnode is in the parent's child array at index
1376 * key[p_pos..p_pos+p_bits] but potentially with some bits
1377 * chopped off, so in reality the index may be just a
1378 * subprefix, padded with zero at the end.
1379 * We can also take a look at any skipped bits in this
1380 * tnode - everything up to p_pos is supposed to be ok,
1381 * and the non-chopped bits of the index (se previous
1382 * paragraph) are also guaranteed ok, but the rest is
1383 * considered unknown.
1384 *
1385 * The skipped bits are key[pos+bits..cn->pos].
1386 */
19baf839 1387
91b9a277
OJ
1388 /* If current_prefix_length < pos+bits, we are already doing
1389 * actual prefix matching, which means everything from
1390 * pos+(bits-chopped_off) onward must be zero along some
1391 * branch of this subtree - otherwise there is *no* valid
1392 * prefix present. Here we can only check the skipped
1393 * bits. Remember, since we have already indexed into the
1394 * parent's child array, we know that the bits we chopped of
1395 * *are* zero.
1396 */
19baf839 1397
91b9a277 1398 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
19baf839 1399
91b9a277
OJ
1400 if (current_prefix_length < pos+bits) {
1401 if (tkey_extract_bits(cn->key, current_prefix_length,
1402 cn->pos - current_prefix_length) != 0 ||
1403 !(cn->child[0]))
1404 goto backtrace;
1405 }
19baf839 1406
91b9a277
OJ
1407 /*
1408 * If chopped_off=0, the index is fully validated and we
1409 * only need to look at the skipped bits for this, the new,
1410 * tnode. What we actually want to do is to find out if
1411 * these skipped bits match our key perfectly, or if we will
1412 * have to count on finding a matching prefix further down,
1413 * because if we do, we would like to have some way of
1414 * verifying the existence of such a prefix at this point.
1415 */
19baf839 1416
91b9a277
OJ
1417 /* The only thing we can do at this point is to verify that
1418 * any such matching prefix can indeed be a prefix to our
1419 * key, and if the bits in the node we are inspecting that
1420 * do not match our key are not ZERO, this cannot be true.
1421 * Thus, find out where there is a mismatch (before cn->pos)
1422 * and verify that all the mismatching bits are zero in the
1423 * new tnode's key.
1424 */
19baf839 1425
91b9a277
OJ
1426 /* Note: We aren't very concerned about the piece of the key
1427 * that precede pn->pos+pn->bits, since these have already been
1428 * checked. The bits after cn->pos aren't checked since these are
1429 * by definition "unknown" at this point. Thus, what we want to
1430 * see is if we are about to enter the "prefix matching" state,
1431 * and in that case verify that the skipped bits that will prevail
1432 * throughout this subtree are zero, as they have to be if we are
1433 * to find a matching prefix.
1434 */
1435
1436 node_prefix = MASK_PFX(cn->key, cn->pos);
1437 key_prefix = MASK_PFX(key, cn->pos);
1438 pref_mismatch = key_prefix^node_prefix;
1439 mp = 0;
1440
1441 /* In short: If skipped bits in this node do not match the search
1442 * key, enter the "prefix matching" state.directly.
1443 */
1444 if (pref_mismatch) {
1445 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1446 mp++;
1447 pref_mismatch = pref_mismatch <<1;
1448 }
1449 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1450
1451 if (key_prefix != 0)
1452 goto backtrace;
1453
1454 if (current_prefix_length >= cn->pos)
1455 current_prefix_length = mp;
c877efb2 1456 }
91b9a277
OJ
1457#endif
1458 pn = (struct tnode *)n; /* Descend */
1459 chopped_off = 0;
1460 continue;
1461
19baf839
RO
1462backtrace:
1463 chopped_off++;
1464
1465 /* As zero don't change the child key (cindex) */
91b9a277 1466 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1))))
19baf839 1467 chopped_off++;
19baf839
RO
1468
1469 /* Decrease current_... with bits chopped off */
1470 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1471 current_prefix_length = pn->pos + pn->bits - chopped_off;
91b9a277 1472
19baf839 1473 /*
c877efb2 1474 * Either we do the actual chop off according or if we have
19baf839
RO
1475 * chopped off all bits in this tnode walk up to our parent.
1476 */
1477
91b9a277 1478 if (chopped_off <= pn->bits) {
19baf839 1479 cindex &= ~(1 << (chopped_off-1));
91b9a277 1480 } else {
c877efb2 1481 if (NODE_PARENT(pn) == NULL)
19baf839 1482 goto failed;
91b9a277 1483
19baf839
RO
1484 /* Get Child's index */
1485 cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
1486 pn = NODE_PARENT(pn);
1487 chopped_off = 0;
1488
1489#ifdef CONFIG_IP_FIB_TRIE_STATS
1490 t->stats.backtrack++;
1491#endif
1492 goto backtrace;
c877efb2 1493 }
19baf839
RO
1494 }
1495failed:
c877efb2 1496 ret = 1;
19baf839 1497found:
2373ce1c 1498 rcu_read_unlock();
19baf839
RO
1499 return ret;
1500}
1501
2373ce1c 1502/* only called from updater side */
19baf839
RO
1503static int trie_leaf_remove(struct trie *t, t_key key)
1504{
1505 t_key cindex;
1506 struct tnode *tp = NULL;
1507 struct node *n = t->trie;
1508 struct leaf *l;
1509
0c7770c7 1510 pr_debug("entering trie_leaf_remove(%p)\n", n);
19baf839
RO
1511
1512 /* Note that in the case skipped bits, those bits are *not* checked!
c877efb2 1513 * When we finish this, we will have NULL or a T_LEAF, and the
19baf839
RO
1514 * T_LEAF may or may not match our key.
1515 */
1516
91b9a277 1517 while (n != NULL && IS_TNODE(n)) {
19baf839
RO
1518 struct tnode *tn = (struct tnode *) n;
1519 check_tnode(tn);
1520 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1521
0c7770c7 1522 BUG_ON(n && NODE_PARENT(n) != tn);
91b9a277 1523 }
19baf839
RO
1524 l = (struct leaf *) n;
1525
c877efb2 1526 if (!n || !tkey_equals(l->key, key))
19baf839 1527 return 0;
c877efb2
SH
1528
1529 /*
1530 * Key found.
1531 * Remove the leaf and rebalance the tree
19baf839
RO
1532 */
1533
1534 t->revision++;
1535 t->size--;
1536
2373ce1c 1537 preempt_disable();
19baf839
RO
1538 tp = NODE_PARENT(n);
1539 tnode_free((struct tnode *) n);
1540
c877efb2 1541 if (tp) {
19baf839
RO
1542 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1543 put_child(t, (struct tnode *)tp, cindex, NULL);
2373ce1c 1544 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
91b9a277 1545 } else
2373ce1c
RO
1546 rcu_assign_pointer(t->trie, NULL);
1547 preempt_enable();
19baf839
RO
1548
1549 return 1;
1550}
1551
1552static int
1553fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
91b9a277 1554 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
19baf839
RO
1555{
1556 struct trie *t = (struct trie *) tb->tb_data;
1557 u32 key, mask;
1558 int plen = r->rtm_dst_len;
1559 u8 tos = r->rtm_tos;
1560 struct fib_alias *fa, *fa_to_delete;
1561 struct list_head *fa_head;
1562 struct leaf *l;
91b9a277
OJ
1563 struct leaf_info *li;
1564
19baf839 1565
c877efb2 1566 if (plen > 32)
19baf839
RO
1567 return -EINVAL;
1568
1569 key = 0;
c877efb2 1570 if (rta->rta_dst)
19baf839
RO
1571 memcpy(&key, rta->rta_dst, 4);
1572
1573 key = ntohl(key);
91b9a277 1574 mask = ntohl(inet_make_mask(plen));
19baf839 1575
c877efb2 1576 if (key & ~mask)
19baf839
RO
1577 return -EINVAL;
1578
1579 key = key & mask;
1580 l = fib_find_node(t, key);
1581
c877efb2 1582 if (!l)
19baf839
RO
1583 return -ESRCH;
1584
1585 fa_head = get_fa_head(l, plen);
1586 fa = fib_find_alias(fa_head, tos, 0);
1587
1588 if (!fa)
1589 return -ESRCH;
1590
0c7770c7 1591 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
19baf839
RO
1592
1593 fa_to_delete = NULL;
1594 fa_head = fa->fa_list.prev;
2373ce1c 1595
19baf839
RO
1596 list_for_each_entry(fa, fa_head, fa_list) {
1597 struct fib_info *fi = fa->fa_info;
1598
1599 if (fa->fa_tos != tos)
1600 break;
1601
1602 if ((!r->rtm_type ||
1603 fa->fa_type == r->rtm_type) &&
1604 (r->rtm_scope == RT_SCOPE_NOWHERE ||
1605 fa->fa_scope == r->rtm_scope) &&
1606 (!r->rtm_protocol ||
1607 fi->fib_protocol == r->rtm_protocol) &&
1608 fib_nh_match(r, nlhdr, rta, fi) == 0) {
1609 fa_to_delete = fa;
1610 break;
1611 }
1612 }
1613
91b9a277
OJ
1614 if (!fa_to_delete)
1615 return -ESRCH;
19baf839 1616
91b9a277
OJ
1617 fa = fa_to_delete;
1618 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
1619
1620 l = fib_find_node(t, key);
772cb712 1621 li = find_leaf_info(l, plen);
19baf839 1622
2373ce1c 1623 list_del_rcu(&fa->fa_list);
19baf839 1624
91b9a277 1625 if (list_empty(fa_head)) {
2373ce1c 1626 hlist_del_rcu(&li->hlist);
91b9a277 1627 free_leaf_info(li);
2373ce1c 1628 }
19baf839 1629
91b9a277
OJ
1630 if (hlist_empty(&l->list))
1631 trie_leaf_remove(t, key);
19baf839 1632
91b9a277
OJ
1633 if (fa->fa_state & FA_S_ACCESSED)
1634 rt_cache_flush(-1);
19baf839 1635
2373ce1c
RO
1636 fib_release_info(fa->fa_info);
1637 alias_free_mem_rcu(fa);
91b9a277 1638 return 0;
19baf839
RO
1639}
1640
1641static int trie_flush_list(struct trie *t, struct list_head *head)
1642{
1643 struct fib_alias *fa, *fa_node;
1644 int found = 0;
1645
1646 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1647 struct fib_info *fi = fa->fa_info;
19baf839 1648
2373ce1c
RO
1649 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1650 list_del_rcu(&fa->fa_list);
1651 fib_release_info(fa->fa_info);
1652 alias_free_mem_rcu(fa);
19baf839
RO
1653 found++;
1654 }
1655 }
1656 return found;
1657}
1658
1659static int trie_flush_leaf(struct trie *t, struct leaf *l)
1660{
1661 int found = 0;
1662 struct hlist_head *lih = &l->list;
1663 struct hlist_node *node, *tmp;
1664 struct leaf_info *li = NULL;
1665
1666 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
19baf839
RO
1667 found += trie_flush_list(t, &li->falh);
1668
1669 if (list_empty(&li->falh)) {
2373ce1c 1670 hlist_del_rcu(&li->hlist);
19baf839
RO
1671 free_leaf_info(li);
1672 }
1673 }
1674 return found;
1675}
1676
2373ce1c
RO
1677/* rcu_read_lock needs to be hold by caller from readside */
1678
19baf839
RO
1679static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
1680{
1681 struct node *c = (struct node *) thisleaf;
1682 struct tnode *p;
1683 int idx;
2373ce1c 1684 struct node *trie = rcu_dereference(t->trie);
19baf839 1685
c877efb2 1686 if (c == NULL) {
2373ce1c 1687 if (trie == NULL)
19baf839
RO
1688 return NULL;
1689
2373ce1c
RO
1690 if (IS_LEAF(trie)) /* trie w. just a leaf */
1691 return (struct leaf *) trie;
19baf839 1692
2373ce1c 1693 p = (struct tnode*) trie; /* Start */
91b9a277 1694 } else
19baf839 1695 p = (struct tnode *) NODE_PARENT(c);
c877efb2 1696
19baf839
RO
1697 while (p) {
1698 int pos, last;
1699
1700 /* Find the next child of the parent */
c877efb2
SH
1701 if (c)
1702 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
1703 else
19baf839
RO
1704 pos = 0;
1705
1706 last = 1 << p->bits;
91b9a277 1707 for (idx = pos; idx < last ; idx++) {
2373ce1c
RO
1708 c = rcu_dereference(p->child[idx]);
1709
1710 if (!c)
91b9a277
OJ
1711 continue;
1712
1713 /* Decend if tnode */
2373ce1c
RO
1714 while (IS_TNODE(c)) {
1715 p = (struct tnode *) c;
1716 idx = 0;
91b9a277
OJ
1717
1718 /* Rightmost non-NULL branch */
1719 if (p && IS_TNODE(p))
2373ce1c
RO
1720 while (!(c = rcu_dereference(p->child[idx]))
1721 && idx < (1<<p->bits)) idx++;
91b9a277
OJ
1722
1723 /* Done with this tnode? */
2373ce1c 1724 if (idx >= (1 << p->bits) || !c)
91b9a277 1725 goto up;
19baf839 1726 }
2373ce1c 1727 return (struct leaf *) c;
19baf839
RO
1728 }
1729up:
1730 /* No more children go up one step */
91b9a277 1731 c = (struct node *) p;
19baf839
RO
1732 p = (struct tnode *) NODE_PARENT(p);
1733 }
1734 return NULL; /* Ready. Root of trie */
1735}
1736
1737static int fn_trie_flush(struct fib_table *tb)
1738{
1739 struct trie *t = (struct trie *) tb->tb_data;
1740 struct leaf *ll = NULL, *l = NULL;
1741 int found = 0, h;
1742
1743 t->revision++;
1744
91b9a277 1745 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
19baf839
RO
1746 found += trie_flush_leaf(t, l);
1747
1748 if (ll && hlist_empty(&ll->list))
1749 trie_leaf_remove(t, ll->key);
1750 ll = l;
1751 }
1752
1753 if (ll && hlist_empty(&ll->list))
1754 trie_leaf_remove(t, ll->key);
1755
0c7770c7 1756 pr_debug("trie_flush found=%d\n", found);
19baf839
RO
1757 return found;
1758}
1759
91b9a277 1760static int trie_last_dflt = -1;
19baf839
RO
1761
1762static void
1763fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1764{
1765 struct trie *t = (struct trie *) tb->tb_data;
1766 int order, last_idx;
1767 struct fib_info *fi = NULL;
1768 struct fib_info *last_resort;
1769 struct fib_alias *fa = NULL;
1770 struct list_head *fa_head;
1771 struct leaf *l;
1772
1773 last_idx = -1;
1774 last_resort = NULL;
1775 order = -1;
1776
2373ce1c 1777 rcu_read_lock();
c877efb2 1778
19baf839 1779 l = fib_find_node(t, 0);
c877efb2 1780 if (!l)
19baf839
RO
1781 goto out;
1782
1783 fa_head = get_fa_head(l, 0);
c877efb2 1784 if (!fa_head)
19baf839
RO
1785 goto out;
1786
c877efb2 1787 if (list_empty(fa_head))
19baf839
RO
1788 goto out;
1789
2373ce1c 1790 list_for_each_entry_rcu(fa, fa_head, fa_list) {
19baf839 1791 struct fib_info *next_fi = fa->fa_info;
91b9a277 1792
19baf839
RO
1793 if (fa->fa_scope != res->scope ||
1794 fa->fa_type != RTN_UNICAST)
1795 continue;
91b9a277 1796
19baf839
RO
1797 if (next_fi->fib_priority > res->fi->fib_priority)
1798 break;
1799 if (!next_fi->fib_nh[0].nh_gw ||
1800 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1801 continue;
1802 fa->fa_state |= FA_S_ACCESSED;
91b9a277 1803
19baf839
RO
1804 if (fi == NULL) {
1805 if (next_fi != res->fi)
1806 break;
1807 } else if (!fib_detect_death(fi, order, &last_resort,
1808 &last_idx, &trie_last_dflt)) {
1809 if (res->fi)
1810 fib_info_put(res->fi);
1811 res->fi = fi;
1812 atomic_inc(&fi->fib_clntref);
1813 trie_last_dflt = order;
1814 goto out;
1815 }
1816 fi = next_fi;
1817 order++;
1818 }
1819 if (order <= 0 || fi == NULL) {
1820 trie_last_dflt = -1;
1821 goto out;
1822 }
1823
1824 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
1825 if (res->fi)
1826 fib_info_put(res->fi);
1827 res->fi = fi;
1828 atomic_inc(&fi->fib_clntref);
1829 trie_last_dflt = order;
1830 goto out;
1831 }
1832 if (last_idx >= 0) {
1833 if (res->fi)
1834 fib_info_put(res->fi);
1835 res->fi = last_resort;
1836 if (last_resort)
1837 atomic_inc(&last_resort->fib_clntref);
1838 }
1839 trie_last_dflt = last_idx;
1840 out:;
2373ce1c 1841 rcu_read_unlock();
19baf839
RO
1842}
1843
c877efb2 1844static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
19baf839
RO
1845 struct sk_buff *skb, struct netlink_callback *cb)
1846{
1847 int i, s_i;
1848 struct fib_alias *fa;
1849
91b9a277 1850 u32 xkey = htonl(key);
19baf839 1851
91b9a277 1852 s_i = cb->args[3];
19baf839
RO
1853 i = 0;
1854
2373ce1c
RO
1855 /* rcu_read_lock is hold by caller */
1856
1857 list_for_each_entry_rcu(fa, fah, fa_list) {
19baf839
RO
1858 if (i < s_i) {
1859 i++;
1860 continue;
1861 }
78c6671a 1862 BUG_ON(!fa->fa_info);
19baf839
RO
1863
1864 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1865 cb->nlh->nlmsg_seq,
1866 RTM_NEWROUTE,
1867 tb->tb_id,
1868 fa->fa_type,
1869 fa->fa_scope,
1870 &xkey,
1871 plen,
1872 fa->fa_tos,
90f66914 1873 fa->fa_info, 0) < 0) {
19baf839
RO
1874 cb->args[3] = i;
1875 return -1;
91b9a277 1876 }
19baf839
RO
1877 i++;
1878 }
91b9a277 1879 cb->args[3] = i;
19baf839
RO
1880 return skb->len;
1881}
1882
c877efb2 1883static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
19baf839
RO
1884 struct netlink_callback *cb)
1885{
1886 int h, s_h;
1887 struct list_head *fa_head;
1888 struct leaf *l = NULL;
19baf839 1889
91b9a277 1890 s_h = cb->args[2];
19baf839 1891
91b9a277 1892 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
19baf839
RO
1893 if (h < s_h)
1894 continue;
1895 if (h > s_h)
1896 memset(&cb->args[3], 0,
1897 sizeof(cb->args) - 3*sizeof(cb->args[0]));
1898
1899 fa_head = get_fa_head(l, plen);
91b9a277 1900
c877efb2 1901 if (!fa_head)
19baf839
RO
1902 continue;
1903
c877efb2 1904 if (list_empty(fa_head))
19baf839
RO
1905 continue;
1906
1907 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
91b9a277 1908 cb->args[2] = h;
19baf839
RO
1909 return -1;
1910 }
1911 }
91b9a277 1912 cb->args[2] = h;
19baf839
RO
1913 return skb->len;
1914}
1915
1916static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
1917{
1918 int m, s_m;
1919 struct trie *t = (struct trie *) tb->tb_data;
1920
1921 s_m = cb->args[1];
1922
2373ce1c 1923 rcu_read_lock();
91b9a277 1924 for (m = 0; m <= 32; m++) {
19baf839
RO
1925 if (m < s_m)
1926 continue;
1927 if (m > s_m)
1928 memset(&cb->args[2], 0,
91b9a277 1929 sizeof(cb->args) - 2*sizeof(cb->args[0]));
19baf839
RO
1930
1931 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
1932 cb->args[1] = m;
1933 goto out;
1934 }
1935 }
2373ce1c 1936 rcu_read_unlock();
19baf839
RO
1937 cb->args[1] = m;
1938 return skb->len;
91b9a277 1939out:
2373ce1c 1940 rcu_read_unlock();
19baf839
RO
1941 return -1;
1942}
1943
1944/* Fix more generic FIB names for init later */
1945
1946#ifdef CONFIG_IP_MULTIPLE_TABLES
1947struct fib_table * fib_hash_init(int id)
1948#else
1949struct fib_table * __init fib_hash_init(int id)
1950#endif
1951{
1952 struct fib_table *tb;
1953 struct trie *t;
1954
1955 if (fn_alias_kmem == NULL)
1956 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1957 sizeof(struct fib_alias),
1958 0, SLAB_HWCACHE_ALIGN,
1959 NULL, NULL);
1960
1961 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1962 GFP_KERNEL);
1963 if (tb == NULL)
1964 return NULL;
1965
1966 tb->tb_id = id;
1967 tb->tb_lookup = fn_trie_lookup;
1968 tb->tb_insert = fn_trie_insert;
1969 tb->tb_delete = fn_trie_delete;
1970 tb->tb_flush = fn_trie_flush;
1971 tb->tb_select_default = fn_trie_select_default;
1972 tb->tb_dump = fn_trie_dump;
1973 memset(tb->tb_data, 0, sizeof(struct trie));
1974
1975 t = (struct trie *) tb->tb_data;
1976
1977 trie_init(t);
1978
c877efb2 1979 if (id == RT_TABLE_LOCAL)
91b9a277 1980 trie_local = t;
c877efb2 1981 else if (id == RT_TABLE_MAIN)
91b9a277 1982 trie_main = t;
19baf839
RO
1983
1984 if (id == RT_TABLE_LOCAL)
78c6671a 1985 printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
19baf839
RO
1986
1987 return tb;
1988}
1989
cb7b593c
SH
1990#ifdef CONFIG_PROC_FS
1991/* Depth first Trie walk iterator */
1992struct fib_trie_iter {
1993 struct tnode *tnode;
1994 struct trie *trie;
1995 unsigned index;
1996 unsigned depth;
1997};
19baf839 1998
cb7b593c 1999static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
19baf839 2000{
cb7b593c
SH
2001 struct tnode *tn = iter->tnode;
2002 unsigned cindex = iter->index;
2003 struct tnode *p;
19baf839 2004
cb7b593c
SH
2005 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2006 iter->tnode, iter->index, iter->depth);
2007rescan:
2008 while (cindex < (1<<tn->bits)) {
2009 struct node *n = tnode_get_child(tn, cindex);
19baf839 2010
cb7b593c
SH
2011 if (n) {
2012 if (IS_LEAF(n)) {
2013 iter->tnode = tn;
2014 iter->index = cindex + 1;
2015 } else {
2016 /* push down one level */
2017 iter->tnode = (struct tnode *) n;
2018 iter->index = 0;
2019 ++iter->depth;
2020 }
2021 return n;
2022 }
19baf839 2023
cb7b593c
SH
2024 ++cindex;
2025 }
91b9a277 2026
cb7b593c
SH
2027 /* Current node exhausted, pop back up */
2028 p = NODE_PARENT(tn);
2029 if (p) {
2030 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2031 tn = p;
2032 --iter->depth;
2033 goto rescan;
19baf839 2034 }
cb7b593c
SH
2035
2036 /* got root? */
2037 return NULL;
19baf839
RO
2038}
2039
cb7b593c
SH
2040static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2041 struct trie *t)
19baf839 2042{
cb7b593c 2043 struct node *n = rcu_dereference(t->trie);
19baf839 2044
cb7b593c
SH
2045 if (n && IS_TNODE(n)) {
2046 iter->tnode = (struct tnode *) n;
2047 iter->trie = t;
2048 iter->index = 0;
1d25cd6c 2049 iter->depth = 1;
cb7b593c 2050 return n;
91b9a277 2051 }
cb7b593c
SH
2052 return NULL;
2053}
91b9a277 2054
cb7b593c
SH
2055static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2056{
2057 struct node *n;
2058 struct fib_trie_iter iter;
91b9a277 2059
cb7b593c 2060 memset(s, 0, sizeof(*s));
91b9a277 2061
cb7b593c
SH
2062 rcu_read_lock();
2063 for (n = fib_trie_get_first(&iter, t); n;
2064 n = fib_trie_get_next(&iter)) {
2065 if (IS_LEAF(n)) {
2066 s->leaves++;
2067 s->totdepth += iter.depth;
2068 if (iter.depth > s->maxdepth)
2069 s->maxdepth = iter.depth;
2070 } else {
2071 const struct tnode *tn = (const struct tnode *) n;
2072 int i;
2073
2074 s->tnodes++;
2075 s->nodesizes[tn->bits]++;
2076 for (i = 0; i < (1<<tn->bits); i++)
2077 if (!tn->child[i])
2078 s->nullpointers++;
19baf839 2079 }
19baf839 2080 }
2373ce1c 2081 rcu_read_unlock();
19baf839
RO
2082}
2083
cb7b593c
SH
2084/*
2085 * This outputs /proc/net/fib_triestats
2086 */
2087static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
19baf839 2088{
cb7b593c 2089 unsigned i, max, pointers, bytes, avdepth;
c877efb2 2090
cb7b593c
SH
2091 if (stat->leaves)
2092 avdepth = stat->totdepth*100 / stat->leaves;
2093 else
2094 avdepth = 0;
91b9a277 2095
cb7b593c
SH
2096 seq_printf(seq, "\tAver depth: %d.%02d\n", avdepth / 100, avdepth % 100 );
2097 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
91b9a277 2098
cb7b593c 2099 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
91b9a277 2100
cb7b593c
SH
2101 bytes = sizeof(struct leaf) * stat->leaves;
2102 seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes);
2103 bytes += sizeof(struct tnode) * stat->tnodes;
19baf839 2104
cb7b593c
SH
2105 max = MAX_CHILDS-1;
2106 while (max >= 0 && stat->nodesizes[max] == 0)
2107 max--;
19baf839 2108
cb7b593c
SH
2109 pointers = 0;
2110 for (i = 1; i <= max; i++)
2111 if (stat->nodesizes[i] != 0) {
2112 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
2113 pointers += (1<<i) * stat->nodesizes[i];
2114 }
2115 seq_putc(seq, '\n');
2116 seq_printf(seq, "\tPointers: %d\n", pointers);
2373ce1c 2117
cb7b593c
SH
2118 bytes += sizeof(struct node *) * pointers;
2119 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
2120 seq_printf(seq, "Total size: %d kB\n", (bytes + 1023) / 1024);
2373ce1c 2121
cb7b593c
SH
2122#ifdef CONFIG_IP_FIB_TRIE_STATS
2123 seq_printf(seq, "Counters:\n---------\n");
2124 seq_printf(seq,"gets = %d\n", t->stats.gets);
2125 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
2126 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2127 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2128 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
2129 seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
2130#ifdef CLEAR_STATS
2131 memset(&(t->stats), 0, sizeof(t->stats));
2132#endif
2133#endif /* CONFIG_IP_FIB_TRIE_STATS */
2134}
19baf839 2135
cb7b593c
SH
2136static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2137{
2138 struct trie_stat *stat;
91b9a277 2139
cb7b593c
SH
2140 stat = kmalloc(sizeof(*stat), GFP_KERNEL);
2141 if (!stat)
2142 return -ENOMEM;
91b9a277 2143
cb7b593c
SH
2144 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
2145 sizeof(struct leaf), sizeof(struct tnode));
91b9a277 2146
cb7b593c
SH
2147 if (trie_local) {
2148 seq_printf(seq, "Local:\n");
2149 trie_collect_stats(trie_local, stat);
2150 trie_show_stats(seq, stat);
2151 }
91b9a277 2152
cb7b593c
SH
2153 if (trie_main) {
2154 seq_printf(seq, "Main:\n");
2155 trie_collect_stats(trie_main, stat);
2156 trie_show_stats(seq, stat);
19baf839 2157 }
cb7b593c 2158 kfree(stat);
19baf839 2159
cb7b593c 2160 return 0;
19baf839
RO
2161}
2162
cb7b593c 2163static int fib_triestat_seq_open(struct inode *inode, struct file *file)
19baf839 2164{
cb7b593c 2165 return single_open(file, fib_triestat_seq_show, NULL);
19baf839
RO
2166}
2167
cb7b593c
SH
2168static struct file_operations fib_triestat_fops = {
2169 .owner = THIS_MODULE,
2170 .open = fib_triestat_seq_open,
2171 .read = seq_read,
2172 .llseek = seq_lseek,
2173 .release = single_release,
2174};
2175
2176static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
2177 loff_t pos)
19baf839 2178{
cb7b593c
SH
2179 loff_t idx = 0;
2180 struct node *n;
2181
2182 for (n = fib_trie_get_first(iter, trie_local);
2183 n; ++idx, n = fib_trie_get_next(iter)) {
2184 if (pos == idx)
2185 return n;
2186 }
2187
2188 for (n = fib_trie_get_first(iter, trie_main);
2189 n; ++idx, n = fib_trie_get_next(iter)) {
2190 if (pos == idx)
2191 return n;
2192 }
19baf839
RO
2193 return NULL;
2194}
2195
cb7b593c 2196static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
19baf839 2197{
cb7b593c
SH
2198 rcu_read_lock();
2199 if (*pos == 0)
91b9a277 2200 return SEQ_START_TOKEN;
cb7b593c 2201 return fib_trie_get_idx(seq->private, *pos - 1);
19baf839
RO
2202}
2203
cb7b593c 2204static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
19baf839 2205{
cb7b593c
SH
2206 struct fib_trie_iter *iter = seq->private;
2207 void *l = v;
2208
19baf839 2209 ++*pos;
91b9a277 2210 if (v == SEQ_START_TOKEN)
cb7b593c 2211 return fib_trie_get_idx(iter, 0);
19baf839 2212
cb7b593c
SH
2213 v = fib_trie_get_next(iter);
2214 BUG_ON(v == l);
2215 if (v)
2216 return v;
19baf839 2217
cb7b593c
SH
2218 /* continue scan in next trie */
2219 if (iter->trie == trie_local)
2220 return fib_trie_get_first(iter, trie_main);
19baf839 2221
cb7b593c
SH
2222 return NULL;
2223}
19baf839 2224
cb7b593c 2225static void fib_trie_seq_stop(struct seq_file *seq, void *v)
19baf839 2226{
cb7b593c
SH
2227 rcu_read_unlock();
2228}
91b9a277 2229
cb7b593c
SH
2230static void seq_indent(struct seq_file *seq, int n)
2231{
2232 while (n-- > 0) seq_puts(seq, " ");
2233}
19baf839 2234
cb7b593c
SH
2235static inline const char *rtn_scope(enum rt_scope_t s)
2236{
2237 static char buf[32];
19baf839 2238
cb7b593c
SH
2239 switch(s) {
2240 case RT_SCOPE_UNIVERSE: return "universe";
2241 case RT_SCOPE_SITE: return "site";
2242 case RT_SCOPE_LINK: return "link";
2243 case RT_SCOPE_HOST: return "host";
2244 case RT_SCOPE_NOWHERE: return "nowhere";
2245 default:
2246 snprintf(buf, sizeof(buf), "scope=%d", s);
2247 return buf;
2248 }
2249}
19baf839 2250
cb7b593c
SH
2251static const char *rtn_type_names[__RTN_MAX] = {
2252 [RTN_UNSPEC] = "UNSPEC",
2253 [RTN_UNICAST] = "UNICAST",
2254 [RTN_LOCAL] = "LOCAL",
2255 [RTN_BROADCAST] = "BROADCAST",
2256 [RTN_ANYCAST] = "ANYCAST",
2257 [RTN_MULTICAST] = "MULTICAST",
2258 [RTN_BLACKHOLE] = "BLACKHOLE",
2259 [RTN_UNREACHABLE] = "UNREACHABLE",
2260 [RTN_PROHIBIT] = "PROHIBIT",
2261 [RTN_THROW] = "THROW",
2262 [RTN_NAT] = "NAT",
2263 [RTN_XRESOLVE] = "XRESOLVE",
2264};
19baf839 2265
cb7b593c
SH
2266static inline const char *rtn_type(unsigned t)
2267{
2268 static char buf[32];
19baf839 2269
cb7b593c
SH
2270 if (t < __RTN_MAX && rtn_type_names[t])
2271 return rtn_type_names[t];
2272 snprintf(buf, sizeof(buf), "type %d", t);
2273 return buf;
19baf839
RO
2274}
2275
cb7b593c
SH
2276/* Pretty print the trie */
2277static int fib_trie_seq_show(struct seq_file *seq, void *v)
19baf839 2278{
cb7b593c
SH
2279 const struct fib_trie_iter *iter = seq->private;
2280 struct node *n = v;
c877efb2 2281
cb7b593c
SH
2282 if (v == SEQ_START_TOKEN)
2283 return 0;
19baf839 2284
cb7b593c
SH
2285 if (IS_TNODE(n)) {
2286 struct tnode *tn = (struct tnode *) n;
2287 t_key prf = ntohl(MASK_PFX(tn->key, tn->pos));
91b9a277 2288
cb7b593c
SH
2289 if (!NODE_PARENT(n)) {
2290 if (iter->trie == trie_local)
2291 seq_puts(seq, "<local>:\n");
2292 else
2293 seq_puts(seq, "<main>:\n");
1d25cd6c
RO
2294 }
2295 seq_indent(seq, iter->depth-1);
2296 seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n",
2297 NIPQUAD(prf), tn->pos, tn->bits, tn->full_children,
2298 tn->empty_children);
2299
cb7b593c
SH
2300 } else {
2301 struct leaf *l = (struct leaf *) n;
2302 int i;
2303 u32 val = ntohl(l->key);
2304
2305 seq_indent(seq, iter->depth);
2306 seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val));
2307 for (i = 32; i >= 0; i--) {
772cb712 2308 struct leaf_info *li = find_leaf_info(l, i);
cb7b593c
SH
2309 if (li) {
2310 struct fib_alias *fa;
2311 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2312 seq_indent(seq, iter->depth+1);
2313 seq_printf(seq, " /%d %s %s", i,
2314 rtn_scope(fa->fa_scope),
2315 rtn_type(fa->fa_type));
2316 if (fa->fa_tos)
2317 seq_printf(seq, "tos =%d\n",
2318 fa->fa_tos);
2319 seq_putc(seq, '\n');
2320 }
2321 }
2322 }
19baf839 2323 }
cb7b593c 2324
19baf839
RO
2325 return 0;
2326}
2327
cb7b593c
SH
2328static struct seq_operations fib_trie_seq_ops = {
2329 .start = fib_trie_seq_start,
2330 .next = fib_trie_seq_next,
2331 .stop = fib_trie_seq_stop,
2332 .show = fib_trie_seq_show,
19baf839
RO
2333};
2334
cb7b593c 2335static int fib_trie_seq_open(struct inode *inode, struct file *file)
19baf839
RO
2336{
2337 struct seq_file *seq;
2338 int rc = -ENOMEM;
cb7b593c 2339 struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
19baf839 2340
cb7b593c
SH
2341 if (!s)
2342 goto out;
2343
2344 rc = seq_open(file, &fib_trie_seq_ops);
19baf839
RO
2345 if (rc)
2346 goto out_kfree;
2347
cb7b593c
SH
2348 seq = file->private_data;
2349 seq->private = s;
2350 memset(s, 0, sizeof(*s));
19baf839
RO
2351out:
2352 return rc;
2353out_kfree:
cb7b593c 2354 kfree(s);
19baf839
RO
2355 goto out;
2356}
2357
cb7b593c
SH
2358static struct file_operations fib_trie_fops = {
2359 .owner = THIS_MODULE,
2360 .open = fib_trie_seq_open,
2361 .read = seq_read,
2362 .llseek = seq_lseek,
c877efb2 2363 .release = seq_release_private,
19baf839
RO
2364};
2365
cb7b593c 2366static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi)
19baf839 2367{
cb7b593c
SH
2368 static unsigned type2flags[RTN_MAX + 1] = {
2369 [7] = RTF_REJECT, [8] = RTF_REJECT,
2370 };
2371 unsigned flags = type2flags[type];
19baf839 2372
cb7b593c
SH
2373 if (fi && fi->fib_nh->nh_gw)
2374 flags |= RTF_GATEWAY;
2375 if (mask == 0xFFFFFFFF)
2376 flags |= RTF_HOST;
2377 flags |= RTF_UP;
2378 return flags;
19baf839
RO
2379}
2380
cb7b593c
SH
2381/*
2382 * This outputs /proc/net/route.
2383 * The format of the file is not supposed to be changed
2384 * and needs to be same as fib_hash output to avoid breaking
2385 * legacy utilities
2386 */
2387static int fib_route_seq_show(struct seq_file *seq, void *v)
19baf839 2388{
c9e53cbe 2389 const struct fib_trie_iter *iter = seq->private;
cb7b593c
SH
2390 struct leaf *l = v;
2391 int i;
2392 char bf[128];
19baf839 2393
cb7b593c
SH
2394 if (v == SEQ_START_TOKEN) {
2395 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2396 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2397 "\tWindow\tIRTT");
2398 return 0;
2399 }
19baf839 2400
c9e53cbe
PM
2401 if (iter->trie == trie_local)
2402 return 0;
cb7b593c
SH
2403 if (IS_TNODE(l))
2404 return 0;
19baf839 2405
cb7b593c 2406 for (i=32; i>=0; i--) {
772cb712 2407 struct leaf_info *li = find_leaf_info(l, i);
cb7b593c
SH
2408 struct fib_alias *fa;
2409 u32 mask, prefix;
91b9a277 2410
cb7b593c
SH
2411 if (!li)
2412 continue;
19baf839 2413
cb7b593c
SH
2414 mask = inet_make_mask(li->plen);
2415 prefix = htonl(l->key);
19baf839 2416
cb7b593c 2417 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
1371e37d 2418 const struct fib_info *fi = fa->fa_info;
cb7b593c 2419 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
19baf839 2420
cb7b593c
SH
2421 if (fa->fa_type == RTN_BROADCAST
2422 || fa->fa_type == RTN_MULTICAST)
2423 continue;
19baf839 2424
cb7b593c
SH
2425 if (fi)
2426 snprintf(bf, sizeof(bf),
2427 "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2428 fi->fib_dev ? fi->fib_dev->name : "*",
2429 prefix,
2430 fi->fib_nh->nh_gw, flags, 0, 0,
2431 fi->fib_priority,
2432 mask,
2433 (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
2434 fi->fib_window,
2435 fi->fib_rtt >> 3);
2436 else
2437 snprintf(bf, sizeof(bf),
2438 "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2439 prefix, 0, flags, 0, 0, 0,
2440 mask, 0, 0, 0);
19baf839 2441
cb7b593c
SH
2442 seq_printf(seq, "%-127s\n", bf);
2443 }
19baf839
RO
2444 }
2445
2446 return 0;
2447}
2448
cb7b593c
SH
2449static struct seq_operations fib_route_seq_ops = {
2450 .start = fib_trie_seq_start,
2451 .next = fib_trie_seq_next,
2452 .stop = fib_trie_seq_stop,
2453 .show = fib_route_seq_show,
19baf839
RO
2454};
2455
cb7b593c 2456static int fib_route_seq_open(struct inode *inode, struct file *file)
19baf839
RO
2457{
2458 struct seq_file *seq;
2459 int rc = -ENOMEM;
cb7b593c 2460 struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
19baf839 2461
cb7b593c
SH
2462 if (!s)
2463 goto out;
2464
2465 rc = seq_open(file, &fib_route_seq_ops);
19baf839
RO
2466 if (rc)
2467 goto out_kfree;
2468
cb7b593c
SH
2469 seq = file->private_data;
2470 seq->private = s;
2471 memset(s, 0, sizeof(*s));
19baf839
RO
2472out:
2473 return rc;
2474out_kfree:
cb7b593c 2475 kfree(s);
19baf839
RO
2476 goto out;
2477}
2478
cb7b593c
SH
2479static struct file_operations fib_route_fops = {
2480 .owner = THIS_MODULE,
2481 .open = fib_route_seq_open,
2482 .read = seq_read,
2483 .llseek = seq_lseek,
2484 .release = seq_release_private,
19baf839
RO
2485};
2486
2487int __init fib_proc_init(void)
2488{
cb7b593c
SH
2489 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops))
2490 goto out1;
2491
2492 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops))
2493 goto out2;
2494
2495 if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops))
2496 goto out3;
2497
19baf839 2498 return 0;
cb7b593c
SH
2499
2500out3:
2501 proc_net_remove("fib_triestat");
2502out2:
2503 proc_net_remove("fib_trie");
2504out1:
2505 return -ENOMEM;
19baf839
RO
2506}
2507
2508void __init fib_proc_exit(void)
2509{
2510 proc_net_remove("fib_trie");
cb7b593c
SH
2511 proc_net_remove("fib_triestat");
2512 proc_net_remove("route");
19baf839
RO
2513}
2514
2515#endif /* CONFIG_PROC_FS */