mlxsw: spectrum_router: Don't rely on missing extack to symbolize dump
[linux-block.git] / net / ipv6 / ip6_fib.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4 2/*
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 * Forwarding Information Database
5 *
6 * Authors:
1ab1457c 7 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 8 *
8db46f1d
WY
9 * Changes:
10 * Yuji SEKIYA @USAGI: Support default route on router node;
11 * remove ip6_null_entry from the top of
12 * routing table.
13 * Ville Nuorvala: Fixed routing subtrees.
1da177e4 14 */
f3213831
JP
15
16#define pr_fmt(fmt) "IPv6: " fmt
17
1da177e4
LT
18#include <linux/errno.h>
19#include <linux/types.h>
20#include <linux/net.h>
21#include <linux/route.h>
22#include <linux/netdevice.h>
23#include <linux/in6.h>
24#include <linux/init.h>
c71099ac 25#include <linux/list.h>
5a0e3ad6 26#include <linux/slab.h>
1da177e4 27
cc5f0eb2 28#include <net/ip.h>
1da177e4
LT
29#include <net/ipv6.h>
30#include <net/ndisc.h>
31#include <net/addrconf.h>
19e42e45 32#include <net/lwtunnel.h>
df77fe4d 33#include <net/fib_notifier.h>
1da177e4
LT
34
35#include <net/ip6_fib.h>
36#include <net/ip6_route.h>
37
437de07c 38static struct kmem_cache *fib6_node_kmem __read_mostly;
1da177e4 39
94b2cfe0
HFS
40struct fib6_cleaner {
41 struct fib6_walker w;
ec7d43c2 42 struct net *net;
8d1c802b 43 int (*func)(struct fib6_info *, void *arg);
327571cb 44 int sernum;
1da177e4 45 void *arg;
7c6bb7d2 46 bool skip_notify;
1da177e4
LT
47};
48
1da177e4
LT
49#ifdef CONFIG_IPV6_SUBTREES
50#define FWS_INIT FWS_S
1da177e4
LT
51#else
52#define FWS_INIT FWS_L
1da177e4
LT
53#endif
54
8d1c802b 55static struct fib6_info *fib6_find_prefix(struct net *net,
66f5d6ce
WW
56 struct fib6_table *table,
57 struct fib6_node *fn);
58static struct fib6_node *fib6_repair_tree(struct net *net,
59 struct fib6_table *table,
60 struct fib6_node *fn);
9a03cd8f 61static int fib6_walk(struct net *net, struct fib6_walker *w);
94b2cfe0 62static int fib6_walk_continue(struct fib6_walker *w);
1da177e4
LT
63
64/*
65 * A routing update causes an increase of the serial number on the
66 * affected subtree. This allows for cached routes to be asynchronously
67 * tested when modifications are made to the destination cache as a
68 * result of redirects, path MTU changes, etc.
69 */
70
86cb30ec 71static void fib6_gc_timer_cb(struct timer_list *t);
5b7c931d 72
9a03cd8f
MK
73#define FOR_WALKERS(net, w) \
74 list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh)
1da177e4 75
9a03cd8f 76static void fib6_walker_link(struct net *net, struct fib6_walker *w)
90d41122 77{
9a03cd8f
MK
78 write_lock_bh(&net->ipv6.fib6_walker_lock);
79 list_add(&w->lh, &net->ipv6.fib6_walkers);
80 write_unlock_bh(&net->ipv6.fib6_walker_lock);
90d41122
AB
81}
82
9a03cd8f 83static void fib6_walker_unlink(struct net *net, struct fib6_walker *w)
90d41122 84{
9a03cd8f 85 write_lock_bh(&net->ipv6.fib6_walker_lock);
bbef49da 86 list_del(&w->lh);
9a03cd8f 87 write_unlock_bh(&net->ipv6.fib6_walker_lock);
90d41122 88}
94b2cfe0 89
812918c4 90static int fib6_new_sernum(struct net *net)
1da177e4 91{
42b18706
HFS
92 int new, old;
93
94 do {
812918c4 95 old = atomic_read(&net->ipv6.fib6_sernum);
42b18706 96 new = old < INT_MAX ? old + 1 : 1;
812918c4
HFS
97 } while (atomic_cmpxchg(&net->ipv6.fib6_sernum,
98 old, new) != old);
42b18706 99 return new;
1da177e4
LT
100}
101
327571cb
HFS
102enum {
103 FIB6_NO_SERNUM_CHANGE = 0,
104};
105
93c2fb25 106void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
180ca444 107{
180ca444
WW
108 struct fib6_node *fn;
109
93c2fb25
DA
110 fn = rcu_dereference_protected(f6i->fib6_node,
111 lockdep_is_held(&f6i->fib6_table->tb6_lock));
180ca444
WW
112 if (fn)
113 fn->fn_sernum = fib6_new_sernum(net);
180ca444
WW
114}
115
1da177e4
LT
116/*
117 * Auxiliary address test functions for the radix tree.
118 *
1ab1457c 119 * These assume a 32bit processor (although it will work on
1da177e4
LT
120 * 64bit processors)
121 */
122
123/*
124 * test bit
125 */
02cdce53
YH
126#if defined(__LITTLE_ENDIAN)
127# define BITOP_BE32_SWIZZLE (0x1F & ~7)
128#else
129# define BITOP_BE32_SWIZZLE 0
130#endif
1da177e4 131
94b2cfe0 132static __be32 addr_bit_set(const void *token, int fn_bit)
1da177e4 133{
b71d1d42 134 const __be32 *addr = token;
02cdce53
YH
135 /*
136 * Here,
8db46f1d 137 * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
02cdce53
YH
138 * is optimized version of
139 * htonl(1 << ((~fn_bit)&0x1F))
140 * See include/asm-generic/bitops/le.h.
141 */
0eae88f3
ED
142 return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) &
143 addr[fn_bit >> 5];
1da177e4
LT
144}
145
1cf844c7 146struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
a64efe14 147{
8d1c802b 148 struct fib6_info *f6i;
1cf844c7 149 size_t sz = sizeof(*f6i);
a64efe14 150
1cf844c7
DA
151 if (with_fib6_nh)
152 sz += sizeof(struct fib6_nh);
153
154 f6i = kzalloc(sz, gfp_flags);
a64efe14
DA
155 if (!f6i)
156 return NULL;
157
f88d8ea6 158 /* fib6_siblings is a union with nh_list, so this initializes both */
93c2fb25 159 INIT_LIST_HEAD(&f6i->fib6_siblings);
f05713e0 160 refcount_set(&f6i->fib6_ref, 1);
a64efe14
DA
161
162 return f6i;
163}
164
9b0a8da8 165void fib6_info_destroy_rcu(struct rcu_head *head)
a64efe14 166{
9b0a8da8 167 struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
a64efe14 168
93c2fb25 169 WARN_ON(f6i->fib6_node);
a64efe14 170
f88d8ea6
DA
171 if (f6i->nh)
172 nexthop_put(f6i->nh);
173 else
174 fib6_nh_release(f6i->fib6_nh);
175
cc5f0eb2 176 ip_fib_metrics_put(f6i->fib6_metrics);
a64efe14
DA
177 kfree(f6i);
178}
9b0a8da8 179EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu);
a64efe14 180
81eb8447 181static struct fib6_node *node_alloc(struct net *net)
1da177e4
LT
182{
183 struct fib6_node *fn;
184
c3762229 185 fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
81eb8447
WW
186 if (fn)
187 net->ipv6.rt6_stats->fib_nodes++;
1da177e4
LT
188
189 return fn;
190}
191
81eb8447 192static void node_free_immediate(struct net *net, struct fib6_node *fn)
c5cff856
WW
193{
194 kmem_cache_free(fib6_node_kmem, fn);
81eb8447 195 net->ipv6.rt6_stats->fib_nodes--;
c5cff856
WW
196}
197
198static void node_free_rcu(struct rcu_head *head)
1da177e4 199{
c5cff856
WW
200 struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
201
1da177e4
LT
202 kmem_cache_free(fib6_node_kmem, fn);
203}
204
81eb8447 205static void node_free(struct net *net, struct fib6_node *fn)
c5cff856
WW
206{
207 call_rcu(&fn->rcu, node_free_rcu);
81eb8447 208 net->ipv6.rt6_stats->fib_nodes--;
c5cff856
WW
209}
210
ba1cc08d
SD
211static void fib6_free_table(struct fib6_table *table)
212{
213 inetpeer_invalidate_tree(&table->tb6_peers);
214 kfree(table);
215}
216
58f09b78 217static void fib6_link_table(struct net *net, struct fib6_table *tb)
1b43af54
PM
218{
219 unsigned int h;
220
375216ad
TG
221 /*
222 * Initialize table lock at a single place to give lockdep a key,
223 * tables aren't visible prior to being linked to the list.
224 */
66f5d6ce 225 spin_lock_init(&tb->tb6_lock);
a33bc5c1 226 h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
1b43af54
PM
227
228 /*
229 * No protection necessary, this is the only list mutatation
230 * operation, tables never disappear once they exist.
231 */
58f09b78 232 hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]);
1b43af54 233}
c71099ac 234
1b43af54 235#ifdef CONFIG_IPV6_MULTIPLE_TABLES
e0b85590 236
8ed67789 237static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
c71099ac
TG
238{
239 struct fib6_table *table;
240
241 table = kzalloc(sizeof(*table), GFP_ATOMIC);
507c9b1e 242 if (table) {
c71099ac 243 table->tb6_id = id;
66f5d6ce 244 rcu_assign_pointer(table->tb6_root.leaf,
421842ed 245 net->ipv6.fib6_null_entry);
c71099ac 246 table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
8e773277 247 inet_peer_base_init(&table->tb6_peers);
c71099ac
TG
248 }
249
250 return table;
251}
252
58f09b78 253struct fib6_table *fib6_new_table(struct net *net, u32 id)
c71099ac
TG
254{
255 struct fib6_table *tb;
256
257 if (id == 0)
258 id = RT6_TABLE_MAIN;
58f09b78 259 tb = fib6_get_table(net, id);
c71099ac
TG
260 if (tb)
261 return tb;
262
8ed67789 263 tb = fib6_alloc_table(net, id);
507c9b1e 264 if (tb)
58f09b78 265 fib6_link_table(net, tb);
c71099ac
TG
266
267 return tb;
268}
b3b4663c 269EXPORT_SYMBOL_GPL(fib6_new_table);
c71099ac 270
58f09b78 271struct fib6_table *fib6_get_table(struct net *net, u32 id)
c71099ac
TG
272{
273 struct fib6_table *tb;
58f09b78 274 struct hlist_head *head;
c71099ac
TG
275 unsigned int h;
276
277 if (id == 0)
278 id = RT6_TABLE_MAIN;
a33bc5c1 279 h = id & (FIB6_TABLE_HASHSZ - 1);
c71099ac 280 rcu_read_lock();
58f09b78 281 head = &net->ipv6.fib_table_hash[h];
b67bfe0d 282 hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
c71099ac
TG
283 if (tb->tb6_id == id) {
284 rcu_read_unlock();
285 return tb;
286 }
287 }
288 rcu_read_unlock();
289
290 return NULL;
291}
c4850687 292EXPORT_SYMBOL_GPL(fib6_get_table);
c71099ac 293
2c8c1e72 294static void __net_init fib6_tables_init(struct net *net)
c71099ac 295{
58f09b78
DL
296 fib6_link_table(net, net->ipv6.fib6_main_tbl);
297 fib6_link_table(net, net->ipv6.fib6_local_tbl);
c71099ac 298}
c71099ac
TG
299#else
300
58f09b78 301struct fib6_table *fib6_new_table(struct net *net, u32 id)
c71099ac 302{
58f09b78 303 return fib6_get_table(net, id);
c71099ac
TG
304}
305
58f09b78 306struct fib6_table *fib6_get_table(struct net *net, u32 id)
c71099ac 307{
58f09b78 308 return net->ipv6.fib6_main_tbl;
c71099ac
TG
309}
310
4c9483b2 311struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 312 const struct sk_buff *skb,
58f09b78 313 int flags, pol_lookup_t lookup)
c71099ac 314{
ab997ad4 315 struct rt6_info *rt;
316
b75cc8f9 317 rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
07f61557 318 if (rt->dst.error == -EAGAIN) {
d64a1f57 319 ip6_rt_put_flags(rt, flags);
ab997ad4 320 rt = net->ipv6.ip6_null_entry;
7b09c2d0 321 if (!(flags & RT6_LOOKUP_F_DST_NOREF))
d64a1f57 322 dst_hold(&rt->dst);
ab997ad4 323 }
324
325 return &rt->dst;
c71099ac
TG
326}
327
138118ec 328/* called with rcu lock held; no reference taken on fib6_info */
effda4dd
DA
329int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
330 struct fib6_result *res, int flags)
138118ec 331{
effda4dd
DA
332 return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6,
333 res, flags);
138118ec
DA
334}
335
2c8c1e72 336static void __net_init fib6_tables_init(struct net *net)
c71099ac 337{
58f09b78 338 fib6_link_table(net, net->ipv6.fib6_main_tbl);
c71099ac
TG
339}
340
341#endif
342
e1ee0a5b
IS
343unsigned int fib6_tables_seq_read(struct net *net)
344{
345 unsigned int h, fib_seq = 0;
346
347 rcu_read_lock();
348 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
349 struct hlist_head *head = &net->ipv6.fib_table_hash[h];
350 struct fib6_table *tb;
351
66f5d6ce 352 hlist_for_each_entry_rcu(tb, head, tb6_hlist)
e1ee0a5b 353 fib_seq += tb->fib_seq;
e1ee0a5b
IS
354 }
355 rcu_read_unlock();
356
357 return fib_seq;
358}
359
7c550daf 360static int call_fib6_entry_notifier(struct notifier_block *nb,
e1ee0a5b 361 enum fib_event_type event_type,
8d1c802b 362 struct fib6_info *rt)
e1ee0a5b
IS
363{
364 struct fib6_entry_notifier_info info = {
365 .rt = rt,
366 };
367
7c550daf 368 return call_fib6_notifier(nb, event_type, &info.info);
e1ee0a5b
IS
369}
370
19a3b7ee
DA
371int call_fib6_entry_notifiers(struct net *net,
372 enum fib_event_type event_type,
373 struct fib6_info *rt,
374 struct netlink_ext_ack *extack)
df77fe4d
IS
375{
376 struct fib6_entry_notifier_info info = {
6c31e5a9 377 .info.extack = extack,
df77fe4d
IS
378 .rt = rt,
379 };
380
93c2fb25 381 rt->fib6_table->fib_seq++;
df77fe4d
IS
382 return call_fib6_notifiers(net, event_type, &info.info);
383}
384
d4b96c7b
IS
385int call_fib6_multipath_entry_notifiers(struct net *net,
386 enum fib_event_type event_type,
387 struct fib6_info *rt,
388 unsigned int nsiblings,
389 struct netlink_ext_ack *extack)
390{
391 struct fib6_entry_notifier_info info = {
392 .info.extack = extack,
393 .rt = rt,
394 .nsiblings = nsiblings,
d4b96c7b
IS
395 };
396
397 rt->fib6_table->fib_seq++;
398 return call_fib6_notifiers(net, event_type, &info.info);
399}
400
e1ee0a5b
IS
401struct fib6_dump_arg {
402 struct net *net;
403 struct notifier_block *nb;
404};
405
55c894f7 406static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
e1ee0a5b 407{
421842ed 408 if (rt == arg->net->ipv6.fib6_null_entry)
55c894f7
JP
409 return 0;
410 return call_fib6_entry_notifier(arg->nb, FIB_EVENT_ENTRY_ADD, rt);
e1ee0a5b
IS
411}
412
413static int fib6_node_dump(struct fib6_walker *w)
414{
8d1c802b 415 struct fib6_info *rt;
55c894f7 416 int err = 0;
e1ee0a5b 417
55c894f7
JP
418 for_each_fib6_walker_rt(w) {
419 err = fib6_rt_dump(rt, w->args);
420 if (err)
421 break;
422 }
e1ee0a5b 423 w->leaf = NULL;
55c894f7 424 return err;
e1ee0a5b
IS
425}
426
55c894f7
JP
427static int fib6_table_dump(struct net *net, struct fib6_table *tb,
428 struct fib6_walker *w)
e1ee0a5b 429{
55c894f7
JP
430 int err;
431
e1ee0a5b 432 w->root = &tb->tb6_root;
66f5d6ce 433 spin_lock_bh(&tb->tb6_lock);
55c894f7 434 err = fib6_walk(net, w);
66f5d6ce 435 spin_unlock_bh(&tb->tb6_lock);
55c894f7 436 return err;
e1ee0a5b
IS
437}
438
439/* Called with rcu_read_lock() */
440int fib6_tables_dump(struct net *net, struct notifier_block *nb)
441{
442 struct fib6_dump_arg arg;
443 struct fib6_walker *w;
444 unsigned int h;
55c894f7 445 int err = 0;
e1ee0a5b
IS
446
447 w = kzalloc(sizeof(*w), GFP_ATOMIC);
448 if (!w)
449 return -ENOMEM;
450
451 w->func = fib6_node_dump;
452 arg.net = net;
453 arg.nb = nb;
454 w->args = &arg;
455
456 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
457 struct hlist_head *head = &net->ipv6.fib_table_hash[h];
458 struct fib6_table *tb;
459
55c894f7
JP
460 hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
461 err = fib6_table_dump(net, tb, w);
462 if (err < 0)
463 goto out;
464 }
e1ee0a5b
IS
465 }
466
55c894f7 467out:
e1ee0a5b
IS
468 kfree(w);
469
55c894f7 470 return err;
e1ee0a5b
IS
471}
472
94b2cfe0 473static int fib6_dump_node(struct fib6_walker *w)
1b43af54
PM
474{
475 int res;
8d1c802b 476 struct fib6_info *rt;
1b43af54 477
66f5d6ce 478 for_each_fib6_walker_rt(w) {
1e47b483 479 res = rt6_dump_route(rt, w->args, w->skip_in_node);
bf9a8a06 480 if (res >= 0) {
1b43af54
PM
481 /* Frame is full, suspend walking */
482 w->leaf = rt;
1e47b483
SB
483
484 /* We'll restart from this node, so if some routes were
485 * already dumped, skip them next time.
486 */
487 w->skip_in_node += res;
488
1b43af54
PM
489 return 1;
490 }
1e47b483 491 w->skip_in_node = 0;
beb1afac
DA
492
493 /* Multipath routes are dumped in one route with the
494 * RTA_MULTIPATH attribute. Jump 'rt' to point to the
495 * last sibling of this route (no need to dump the
496 * sibling routes again)
497 */
93c2fb25
DA
498 if (rt->fib6_nsiblings)
499 rt = list_last_entry(&rt->fib6_siblings,
8d1c802b 500 struct fib6_info,
93c2fb25 501 fib6_siblings);
1b43af54
PM
502 }
503 w->leaf = NULL;
504 return 0;
505}
506
507static void fib6_dump_end(struct netlink_callback *cb)
508{
9a03cd8f 509 struct net *net = sock_net(cb->skb->sk);
94b2cfe0 510 struct fib6_walker *w = (void *)cb->args[2];
1b43af54
PM
511
512 if (w) {
7891cc81
HX
513 if (cb->args[4]) {
514 cb->args[4] = 0;
9a03cd8f 515 fib6_walker_unlink(net, w);
7891cc81 516 }
1b43af54
PM
517 cb->args[2] = 0;
518 kfree(w);
519 }
437de07c 520 cb->done = (void *)cb->args[3];
1b43af54
PM
521 cb->args[1] = 3;
522}
523
524static int fib6_dump_done(struct netlink_callback *cb)
525{
526 fib6_dump_end(cb);
527 return cb->done ? cb->done(cb) : 0;
528}
529
530static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
531 struct netlink_callback *cb)
532{
9a03cd8f 533 struct net *net = sock_net(skb->sk);
94b2cfe0 534 struct fib6_walker *w;
1b43af54
PM
535 int res;
536
537 w = (void *)cb->args[2];
538 w->root = &table->tb6_root;
539
540 if (cb->args[4] == 0) {
2bec5a36
PM
541 w->count = 0;
542 w->skip = 0;
1e47b483 543 w->skip_in_node = 0;
2bec5a36 544
66f5d6ce 545 spin_lock_bh(&table->tb6_lock);
9a03cd8f 546 res = fib6_walk(net, w);
66f5d6ce 547 spin_unlock_bh(&table->tb6_lock);
2bec5a36 548 if (res > 0) {
1b43af54 549 cb->args[4] = 1;
2bec5a36
PM
550 cb->args[5] = w->root->fn_sernum;
551 }
1b43af54 552 } else {
2bec5a36
PM
553 if (cb->args[5] != w->root->fn_sernum) {
554 /* Begin at the root if the tree changed */
555 cb->args[5] = w->root->fn_sernum;
556 w->state = FWS_INIT;
557 w->node = w->root;
558 w->skip = w->count;
1e47b483 559 w->skip_in_node = 0;
2bec5a36
PM
560 } else
561 w->skip = 0;
562
66f5d6ce 563 spin_lock_bh(&table->tb6_lock);
1b43af54 564 res = fib6_walk_continue(w);
66f5d6ce 565 spin_unlock_bh(&table->tb6_lock);
7891cc81 566 if (res <= 0) {
9a03cd8f 567 fib6_walker_unlink(net, w);
7891cc81 568 cb->args[4] = 0;
1b43af54 569 }
1b43af54 570 }
7891cc81 571
1b43af54
PM
572 return res;
573}
574
c127ea2c 575static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1b43af54 576{
564c91f7
SB
577 struct rt6_rtnl_dump_arg arg = { .filter.dump_exceptions = true,
578 .filter.dump_routes = true };
e8ba330a 579 const struct nlmsghdr *nlh = cb->nlh;
3b1e0a65 580 struct net *net = sock_net(skb->sk);
1b43af54
PM
581 unsigned int h, s_h;
582 unsigned int e = 0, s_e;
94b2cfe0 583 struct fib6_walker *w;
1b43af54 584 struct fib6_table *tb;
58f09b78 585 struct hlist_head *head;
1b43af54
PM
586 int res = 0;
587
e8ba330a 588 if (cb->strict_check) {
4724676d 589 int err;
e8ba330a 590
effe6792 591 err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb);
e8ba330a
DA
592 if (err < 0)
593 return err;
13e38901
DA
594 } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
595 struct rtmsg *rtm = nlmsg_data(nlh);
e8ba330a 596
ef11209d
SB
597 if (rtm->rtm_flags & RTM_F_PREFIX)
598 arg.filter.flags = RTM_F_PREFIX;
13e38901 599 }
1b43af54
PM
600
601 w = (void *)cb->args[2];
507c9b1e 602 if (!w) {
1b43af54
PM
603 /* New dump:
604 *
605 * 1. hook callback destructor.
606 */
607 cb->args[3] = (long)cb->done;
608 cb->done = fib6_dump_done;
609
610 /*
611 * 2. allocate and initialize walker.
612 */
613 w = kzalloc(sizeof(*w), GFP_ATOMIC);
507c9b1e 614 if (!w)
1b43af54
PM
615 return -ENOMEM;
616 w->func = fib6_dump_node;
617 cb->args[2] = (long)w;
618 }
619
620 arg.skb = skb;
621 arg.cb = cb;
191cd582 622 arg.net = net;
1b43af54
PM
623 w->args = &arg;
624
13e38901
DA
625 if (arg.filter.table_id) {
626 tb = fib6_get_table(net, arg.filter.table_id);
627 if (!tb) {
ae677bbb 628 if (arg.filter.dump_all_families)
e22d0bfa 629 goto out;
ae677bbb 630
13e38901
DA
631 NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
632 return -ENOENT;
633 }
634
73155879
DA
635 if (!cb->args[0]) {
636 res = fib6_dump_table(tb, skb, cb);
637 if (!res)
638 cb->args[0] = 1;
639 }
13e38901
DA
640 goto out;
641 }
642
643 s_h = cb->args[0];
644 s_e = cb->args[1];
645
e67f88dd 646 rcu_read_lock();
a33bc5c1 647 for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
1b43af54 648 e = 0;
58f09b78 649 head = &net->ipv6.fib_table_hash[h];
b67bfe0d 650 hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
1b43af54
PM
651 if (e < s_e)
652 goto next;
653 res = fib6_dump_table(tb, skb, cb);
654 if (res != 0)
13e38901 655 goto out_unlock;
1b43af54
PM
656next:
657 e++;
658 }
659 }
13e38901 660out_unlock:
e67f88dd 661 rcu_read_unlock();
1b43af54
PM
662 cb->args[1] = e;
663 cb->args[0] = h;
13e38901 664out:
1b43af54
PM
665 res = res < 0 ? res : skb->len;
666 if (res <= 0)
667 fib6_dump_end(cb);
668 return res;
669}
1da177e4 670
8d1c802b 671void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val)
d4ead6b3
DA
672{
673 if (!f6i)
674 return;
675
676 if (f6i->fib6_metrics == &dst_default_metrics) {
677 struct dst_metrics *p = kzalloc(sizeof(*p), GFP_ATOMIC);
678
679 if (!p)
680 return;
681
682 refcount_set(&p->refcnt, 1);
683 f6i->fib6_metrics = p;
684 }
685
686 f6i->fib6_metrics->metrics[metric - 1] = val;
687}
688
1da177e4
LT
689/*
690 * Routing Table
691 *
692 * return the appropriate node for a routing tree "add" operation
693 * by either creating and inserting or by returning an existing
694 * node.
695 */
696
81eb8447
WW
697static struct fib6_node *fib6_add_1(struct net *net,
698 struct fib6_table *table,
66f5d6ce
WW
699 struct fib6_node *root,
700 struct in6_addr *addr, int plen,
701 int offset, int allow_create,
702 int replace_required,
703 struct netlink_ext_ack *extack)
1da177e4
LT
704{
705 struct fib6_node *fn, *in, *ln;
706 struct fib6_node *pn = NULL;
707 struct rt6key *key;
708 int bit;
1ab1457c 709 __be32 dir = 0;
1da177e4
LT
710
711 RT6_TRACE("fib6_add_1\n");
712
713 /* insert node in tree */
714
715 fn = root;
716
717 do {
8d1c802b 718 struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
66f5d6ce
WW
719 lockdep_is_held(&table->tb6_lock));
720 key = (struct rt6key *)((u8 *)leaf + offset);
1da177e4
LT
721
722 /*
723 * Prefix match
724 */
725 if (plen < fn->fn_bit ||
4a287eba 726 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) {
14df015b
MV
727 if (!allow_create) {
728 if (replace_required) {
d5d531cb
DA
729 NL_SET_ERR_MSG(extack,
730 "Can not replace route - no match found");
f3213831 731 pr_warn("Can't replace route, no match found\n");
14df015b
MV
732 return ERR_PTR(-ENOENT);
733 }
f3213831 734 pr_warn("NLM_F_CREATE should be set when creating new route\n");
14df015b 735 }
1da177e4 736 goto insert_above;
4a287eba 737 }
1ab1457c 738
1da177e4
LT
739 /*
740 * Exact match ?
741 */
1ab1457c 742
1da177e4
LT
743 if (plen == fn->fn_bit) {
744 /* clean up an intermediate node */
507c9b1e 745 if (!(fn->fn_flags & RTN_RTINFO)) {
66f5d6ce 746 RCU_INIT_POINTER(fn->leaf, NULL);
93531c67 747 fib6_info_release(leaf);
4512c43e
WW
748 /* remove null_entry in the root node */
749 } else if (fn->fn_flags & RTN_TL_ROOT &&
750 rcu_access_pointer(fn->leaf) ==
421842ed 751 net->ipv6.fib6_null_entry) {
4512c43e 752 RCU_INIT_POINTER(fn->leaf, NULL);
1da177e4 753 }
1ab1457c 754
1da177e4
LT
755 return fn;
756 }
757
758 /*
759 * We have more bits to go
760 */
1ab1457c 761
1da177e4 762 /* Try to walk down on tree. */
1da177e4
LT
763 dir = addr_bit_set(addr, fn->fn_bit);
764 pn = fn;
66f5d6ce
WW
765 fn = dir ?
766 rcu_dereference_protected(fn->right,
767 lockdep_is_held(&table->tb6_lock)) :
768 rcu_dereference_protected(fn->left,
769 lockdep_is_held(&table->tb6_lock));
1da177e4
LT
770 } while (fn);
771
14df015b 772 if (!allow_create) {
4a287eba
MV
773 /* We should not create new node because
774 * NLM_F_REPLACE was specified without NLM_F_CREATE
775 * I assume it is safe to require NLM_F_CREATE when
776 * REPLACE flag is used! Later we may want to remove the
777 * check for replace_required, because according
778 * to netlink specification, NLM_F_CREATE
779 * MUST be specified if new route is created.
780 * That would keep IPv6 consistent with IPv4
781 */
14df015b 782 if (replace_required) {
d5d531cb
DA
783 NL_SET_ERR_MSG(extack,
784 "Can not replace route - no match found");
f3213831 785 pr_warn("Can't replace route, no match found\n");
14df015b
MV
786 return ERR_PTR(-ENOENT);
787 }
f3213831 788 pr_warn("NLM_F_CREATE should be set when creating new route\n");
4a287eba 789 }
1da177e4
LT
790 /*
791 * We walked to the bottom of tree.
792 * Create new leaf node without children.
793 */
794
81eb8447 795 ln = node_alloc(net);
1da177e4 796
507c9b1e 797 if (!ln)
188c517a 798 return ERR_PTR(-ENOMEM);
1da177e4 799 ln->fn_bit = plen;
66f5d6ce 800 RCU_INIT_POINTER(ln->parent, pn);
1da177e4
LT
801
802 if (dir)
66f5d6ce 803 rcu_assign_pointer(pn->right, ln);
1da177e4 804 else
66f5d6ce 805 rcu_assign_pointer(pn->left, ln);
1da177e4
LT
806
807 return ln;
808
809
810insert_above:
811 /*
1ab1457c 812 * split since we don't have a common prefix anymore or
1da177e4
LT
813 * we have a less significant route.
814 * we've to insert an intermediate node on the list
815 * this new node will point to the one we need to create
816 * and the current
817 */
818
66f5d6ce
WW
819 pn = rcu_dereference_protected(fn->parent,
820 lockdep_is_held(&table->tb6_lock));
1da177e4
LT
821
822 /* find 1st bit in difference between the 2 addrs.
823
971f359d 824 See comment in __ipv6_addr_diff: bit may be an invalid value,
1da177e4
LT
825 but if it is >= plen, the value is ignored in any case.
826 */
1ab1457c 827
9225b230 828 bit = __ipv6_addr_diff(addr, &key->addr, sizeof(*addr));
1da177e4 829
1ab1457c
YH
830 /*
831 * (intermediate)[in]
1da177e4
LT
832 * / \
833 * (new leaf node)[ln] (old node)[fn]
834 */
835 if (plen > bit) {
81eb8447
WW
836 in = node_alloc(net);
837 ln = node_alloc(net);
1ab1457c 838
507c9b1e 839 if (!in || !ln) {
1da177e4 840 if (in)
81eb8447 841 node_free_immediate(net, in);
1da177e4 842 if (ln)
81eb8447 843 node_free_immediate(net, ln);
188c517a 844 return ERR_PTR(-ENOMEM);
1da177e4
LT
845 }
846
1ab1457c
YH
847 /*
848 * new intermediate node.
1da177e4
LT
849 * RTN_RTINFO will
850 * be off since that an address that chooses one of
851 * the branches would not match less specific routes
852 * in the other branch
853 */
854
855 in->fn_bit = bit;
856
66f5d6ce 857 RCU_INIT_POINTER(in->parent, pn);
1da177e4 858 in->leaf = fn->leaf;
5ea71528
ED
859 fib6_info_hold(rcu_dereference_protected(in->leaf,
860 lockdep_is_held(&table->tb6_lock)));
1da177e4 861
1da177e4
LT
862 /* update parent pointer */
863 if (dir)
66f5d6ce 864 rcu_assign_pointer(pn->right, in);
1da177e4 865 else
66f5d6ce 866 rcu_assign_pointer(pn->left, in);
1da177e4
LT
867
868 ln->fn_bit = plen;
869
66f5d6ce
WW
870 RCU_INIT_POINTER(ln->parent, in);
871 rcu_assign_pointer(fn->parent, in);
1da177e4 872
1da177e4 873 if (addr_bit_set(addr, bit)) {
66f5d6ce
WW
874 rcu_assign_pointer(in->right, ln);
875 rcu_assign_pointer(in->left, fn);
1da177e4 876 } else {
66f5d6ce
WW
877 rcu_assign_pointer(in->left, ln);
878 rcu_assign_pointer(in->right, fn);
1da177e4
LT
879 }
880 } else { /* plen <= bit */
881
1ab1457c 882 /*
1da177e4
LT
883 * (new leaf node)[ln]
884 * / \
885 * (old node)[fn] NULL
886 */
887
81eb8447 888 ln = node_alloc(net);
1da177e4 889
507c9b1e 890 if (!ln)
188c517a 891 return ERR_PTR(-ENOMEM);
1da177e4
LT
892
893 ln->fn_bit = plen;
894
66f5d6ce 895 RCU_INIT_POINTER(ln->parent, pn);
1da177e4
LT
896
897 if (addr_bit_set(&key->addr, plen))
66f5d6ce 898 RCU_INIT_POINTER(ln->right, fn);
1da177e4 899 else
66f5d6ce
WW
900 RCU_INIT_POINTER(ln->left, fn);
901
902 rcu_assign_pointer(fn->parent, ln);
1da177e4 903
66f5d6ce
WW
904 if (dir)
905 rcu_assign_pointer(pn->right, ln);
906 else
907 rcu_assign_pointer(pn->left, ln);
1da177e4
LT
908 }
909 return ln;
910}
911
7d88d8b5
DA
912static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
913 const struct fib6_info *match,
914 const struct fib6_table *table)
e715b6d3 915{
5bcaa41b 916 int cpu;
e715b6d3 917
f40b6ae2
DA
918 if (!fib6_nh->rt6i_pcpu)
919 return;
920
5bcaa41b
DA
921 /* release the reference to this fib entry from
922 * all of its cached pcpu routes
923 */
924 for_each_possible_cpu(cpu) {
925 struct rt6_info **ppcpu_rt;
926 struct rt6_info *pcpu_rt;
e715b6d3 927
f40b6ae2 928 ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
5bcaa41b 929 pcpu_rt = *ppcpu_rt;
7d88d8b5
DA
930
931 /* only dropping the 'from' reference if the cached route
932 * is using 'match'. The cached pcpu_rt->from only changes
933 * from a fib6_info to NULL (ip6_dst_destroy); it can never
934 * change from one fib6_info reference to another
935 */
936 if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) {
a68886a6 937 struct fib6_info *from;
e715b6d3 938
0e233874 939 from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
a68886a6 940 fib6_info_release(from);
5bcaa41b 941 }
e5fd387a 942 }
e5fd387a
MK
943}
944
2ab75bfb
DA
945struct fib6_nh_pcpu_arg {
946 struct fib6_info *from;
947 const struct fib6_table *table;
948};
949
950static int fib6_nh_drop_pcpu_from(struct fib6_nh *nh, void *_arg)
951{
952 struct fib6_nh_pcpu_arg *arg = _arg;
953
954 __fib6_drop_pcpu_from(nh, arg->from, arg->table);
955 return 0;
956}
957
7d88d8b5
DA
958static void fib6_drop_pcpu_from(struct fib6_info *f6i,
959 const struct fib6_table *table)
960{
7d88d8b5
DA
961 /* Make sure rt6_make_pcpu_route() wont add other percpu routes
962 * while we are cleaning them here.
963 */
964 f6i->fib6_destroying = 1;
965 mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
966
2ab75bfb
DA
967 if (f6i->nh) {
968 struct fib6_nh_pcpu_arg arg = {
969 .from = f6i,
970 .table = table
971 };
972
973 nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from,
974 &arg);
975 } else {
976 struct fib6_nh *fib6_nh;
977
978 fib6_nh = f6i->fib6_nh;
979 __fib6_drop_pcpu_from(fib6_nh, f6i, table);
980 }
7d88d8b5
DA
981}
982
8d1c802b 983static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
6e9e16e6
HFS
984 struct net *net)
985{
93c2fb25 986 struct fib6_table *table = rt->fib6_table;
66f5d6ce 987
f40b6ae2 988 fib6_drop_pcpu_from(rt, table);
61fb0d01 989
f88d8ea6
DA
990 if (rt->nh && !list_empty(&rt->nh_list))
991 list_del_init(&rt->nh_list);
992
f05713e0 993 if (refcount_read(&rt->fib6_ref) != 1) {
6e9e16e6
HFS
994 /* This route is used as dummy address holder in some split
995 * nodes. It is not leaked, but it still holds other resources,
996 * which must be released in time. So, scan ascendant nodes
997 * and replace dummy references to this route with references
998 * to still alive ones.
999 */
1000 while (fn) {
8d1c802b 1001 struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
66f5d6ce 1002 lockdep_is_held(&table->tb6_lock));
8d1c802b 1003 struct fib6_info *new_leaf;
66f5d6ce
WW
1004 if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
1005 new_leaf = fib6_find_prefix(net, table, fn);
5ea71528 1006 fib6_info_hold(new_leaf);
93531c67 1007
66f5d6ce 1008 rcu_assign_pointer(fn->leaf, new_leaf);
93531c67 1009 fib6_info_release(rt);
6e9e16e6 1010 }
66f5d6ce
WW
1011 fn = rcu_dereference_protected(fn->parent,
1012 lockdep_is_held(&table->tb6_lock));
6e9e16e6 1013 }
6e9e16e6
HFS
1014 }
1015}
1016
1da177e4
LT
1017/*
1018 * Insert routing information in a node.
1019 */
1020
8d1c802b 1021static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
d4ead6b3 1022 struct nl_info *info,
6c31e5a9 1023 struct netlink_ext_ack *extack)
1da177e4 1024{
8d1c802b 1025 struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
93c2fb25 1026 lockdep_is_held(&rt->fib6_table->tb6_lock));
33bd5ac5 1027 struct fib6_info *iter = NULL;
8d1c802b 1028 struct fib6_info __rcu **ins;
33bd5ac5 1029 struct fib6_info __rcu **fallback_ins = NULL;
507c9b1e
DM
1030 int replace = (info->nlh &&
1031 (info->nlh->nlmsg_flags & NLM_F_REPLACE));
1032 int add = (!info->nlh ||
1033 (info->nlh->nlmsg_flags & NLM_F_CREATE));
4a287eba 1034 int found = 0;
33bd5ac5 1035 bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
73483c12 1036 u16 nlflags = NLM_F_EXCL;
e5fd387a 1037 int err;
1da177e4 1038
33bd5ac5 1039 if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND))
1f5e29ce
DA
1040 nlflags |= NLM_F_APPEND;
1041
1da177e4
LT
1042 ins = &fn->leaf;
1043
66f5d6ce 1044 for (iter = leaf; iter;
8fb11a9a 1045 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 1046 lockdep_is_held(&rt->fib6_table->tb6_lock))) {
1da177e4
LT
1047 /*
1048 * Search for duplicates
1049 */
1050
93c2fb25 1051 if (iter->fib6_metric == rt->fib6_metric) {
1da177e4
LT
1052 /*
1053 * Same priority level
1054 */
507c9b1e
DM
1055 if (info->nlh &&
1056 (info->nlh->nlmsg_flags & NLM_F_EXCL))
4a287eba 1057 return -EEXIST;
73483c12
GN
1058
1059 nlflags &= ~NLM_F_EXCL;
4a287eba 1060 if (replace) {
33bd5ac5
DA
1061 if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
1062 found++;
1063 break;
1064 }
1065 if (rt_can_ecmp)
1066 fallback_ins = fallback_ins ?: ins;
1067 goto next_iter;
4a287eba 1068 }
1da177e4 1069
f06b7549 1070 if (rt6_duplicate_nexthop(iter, rt)) {
93c2fb25
DA
1071 if (rt->fib6_nsiblings)
1072 rt->fib6_nsiblings = 0;
1073 if (!(iter->fib6_flags & RTF_EXPIRES))
1da177e4 1074 return -EEXIST;
93c2fb25 1075 if (!(rt->fib6_flags & RTF_EXPIRES))
14895687 1076 fib6_clean_expires(iter);
1716a961 1077 else
14895687 1078 fib6_set_expires(iter, rt->expires);
15a81b41
DA
1079
1080 if (rt->fib6_pmtu)
1081 fib6_metric_set(iter, RTAX_MTU,
1082 rt->fib6_pmtu);
1da177e4
LT
1083 return -EEXIST;
1084 }
33bd5ac5
DA
1085 /* If we have the same destination and the same metric,
1086 * but not the same gateway, then the route we try to
1087 * add is sibling to this route, increment our counter
1088 * of siblings, and later we will add our route to the
1089 * list.
1090 * Only static routes (which don't have flag
1091 * RTF_EXPIRES) are used for ECMPv6.
1092 *
1093 * To avoid long list, we only had siblings if the
1094 * route have a gateway.
1095 */
1096 if (rt_can_ecmp &&
1097 rt6_qualify_for_ecmp(iter))
1098 rt->fib6_nsiblings++;
1da177e4
LT
1099 }
1100
93c2fb25 1101 if (iter->fib6_metric > rt->fib6_metric)
1da177e4
LT
1102 break;
1103
33bd5ac5 1104next_iter:
8fb11a9a 1105 ins = &iter->fib6_next;
27596472
MK
1106 }
1107
33bd5ac5
DA
1108 if (fallback_ins && !found) {
1109 /* No ECMP-able route found, replace first non-ECMP one */
1110 ins = fallback_ins;
1111 iter = rcu_dereference_protected(*ins,
1112 lockdep_is_held(&rt->fib6_table->tb6_lock));
1113 found++;
1114 }
1115
f11e6659
DM
1116 /* Reset round-robin state, if necessary */
1117 if (ins == &fn->leaf)
1118 fn->rr_ptr = NULL;
1119
51ebd318 1120 /* Link this route to others same route. */
33bd5ac5
DA
1121 if (rt->fib6_nsiblings) {
1122 unsigned int fib6_nsiblings;
8d1c802b 1123 struct fib6_info *sibling, *temp_sibling;
51ebd318 1124
33bd5ac5
DA
1125 /* Find the first route that have the same metric */
1126 sibling = leaf;
1127 while (sibling) {
1128 if (sibling->fib6_metric == rt->fib6_metric &&
1129 rt6_qualify_for_ecmp(sibling)) {
1130 list_add_tail(&rt->fib6_siblings,
1131 &sibling->fib6_siblings);
1132 break;
1133 }
1134 sibling = rcu_dereference_protected(sibling->fib6_next,
1135 lockdep_is_held(&rt->fib6_table->tb6_lock));
51ebd318
ND
1136 }
1137 /* For each sibling in the list, increment the counter of
1138 * siblings. BUG() if counters does not match, list of siblings
1139 * is broken!
1140 */
33bd5ac5 1141 fib6_nsiblings = 0;
51ebd318 1142 list_for_each_entry_safe(sibling, temp_sibling,
33bd5ac5 1143 &rt->fib6_siblings, fib6_siblings) {
93c2fb25 1144 sibling->fib6_nsiblings++;
33bd5ac5
DA
1145 BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
1146 fib6_nsiblings++;
51ebd318 1147 }
33bd5ac5
DA
1148 BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
1149 rt6_multipath_rebalance(temp_sibling);
51ebd318
ND
1150 }
1151
1da177e4
LT
1152 /*
1153 * insert node
1154 */
4a287eba
MV
1155 if (!replace) {
1156 if (!add)
f3213831 1157 pr_warn("NLM_F_CREATE should be set when creating new route\n");
4a287eba
MV
1158
1159add:
73483c12 1160 nlflags |= NLM_F_CREATE;
e715b6d3 1161
d5382fef
IS
1162 if (!info->skip_notify_kernel) {
1163 err = call_fib6_entry_notifiers(info->nl_net,
1164 FIB_EVENT_ENTRY_ADD,
1165 rt, extack);
54851aa9
IS
1166 if (err) {
1167 struct fib6_info *sibling, *next_sibling;
1168
1169 /* If the route has siblings, then it first
1170 * needs to be unlinked from them.
1171 */
1172 if (!rt->fib6_nsiblings)
1173 return err;
1174
1175 list_for_each_entry_safe(sibling, next_sibling,
1176 &rt->fib6_siblings,
1177 fib6_siblings)
1178 sibling->fib6_nsiblings--;
1179 rt->fib6_nsiblings = 0;
1180 list_del_init(&rt->fib6_siblings);
1181 rt6_multipath_rebalance(next_sibling);
d5382fef 1182 return err;
54851aa9 1183 }
d5382fef 1184 }
2233000c 1185
8fb11a9a 1186 rcu_assign_pointer(rt->fib6_next, iter);
5ea71528 1187 fib6_info_hold(rt);
93c2fb25 1188 rcu_assign_pointer(rt->fib6_node, fn);
66f5d6ce 1189 rcu_assign_pointer(*ins, rt);
3b1137fe
DA
1190 if (!info->skip_notify)
1191 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4a287eba
MV
1192 info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
1193
507c9b1e 1194 if (!(fn->fn_flags & RTN_RTINFO)) {
4a287eba
MV
1195 info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
1196 fn->fn_flags |= RTN_RTINFO;
1197 }
1da177e4 1198
4a287eba 1199 } else {
33bd5ac5 1200 int nsiblings;
27596472 1201
4a287eba
MV
1202 if (!found) {
1203 if (add)
1204 goto add;
f3213831 1205 pr_warn("NLM_F_REPLACE set, but no existing node found!\n");
4a287eba
MV
1206 return -ENOENT;
1207 }
e715b6d3 1208
d5382fef
IS
1209 if (!info->skip_notify_kernel) {
1210 err = call_fib6_entry_notifiers(info->nl_net,
1211 FIB_EVENT_ENTRY_REPLACE,
1212 rt, extack);
1213 if (err)
1214 return err;
1215 }
2233000c 1216
5ea71528 1217 fib6_info_hold(rt);
93c2fb25 1218 rcu_assign_pointer(rt->fib6_node, fn);
33bd5ac5 1219 rt->fib6_next = iter->fib6_next;
66f5d6ce 1220 rcu_assign_pointer(*ins, rt);
3b1137fe
DA
1221 if (!info->skip_notify)
1222 inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
507c9b1e 1223 if (!(fn->fn_flags & RTN_RTINFO)) {
4a287eba
MV
1224 info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
1225 fn->fn_flags |= RTN_RTINFO;
1226 }
33bd5ac5
DA
1227 nsiblings = iter->fib6_nsiblings;
1228 iter->fib6_node = NULL;
1229 fib6_purge_rt(iter, fn, info->nl_net);
1230 if (rcu_access_pointer(fn->rr_ptr) == iter)
1231 fn->rr_ptr = NULL;
1232 fib6_info_release(iter);
27596472 1233
33bd5ac5 1234 if (nsiblings) {
27596472 1235 /* Replacing an ECMP route, remove all siblings */
33bd5ac5
DA
1236 ins = &rt->fib6_next;
1237 iter = rcu_dereference_protected(*ins,
1238 lockdep_is_held(&rt->fib6_table->tb6_lock));
1239 while (iter) {
1240 if (iter->fib6_metric > rt->fib6_metric)
1241 break;
1242 if (rt6_qualify_for_ecmp(iter)) {
1243 *ins = iter->fib6_next;
1244 iter->fib6_node = NULL;
1245 fib6_purge_rt(iter, fn, info->nl_net);
1246 if (rcu_access_pointer(fn->rr_ptr) == iter)
1247 fn->rr_ptr = NULL;
1248 fib6_info_release(iter);
1249 nsiblings--;
1250 info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
1251 } else {
1252 ins = &iter->fib6_next;
1253 }
1254 iter = rcu_dereference_protected(*ins,
1255 lockdep_is_held(&rt->fib6_table->tb6_lock));
27596472 1256 }
33bd5ac5 1257 WARN_ON(nsiblings != 0);
27596472 1258 }
1da177e4
LT
1259 }
1260
1261 return 0;
1262}
1263
8d1c802b 1264static void fib6_start_gc(struct net *net, struct fib6_info *rt)
1da177e4 1265{
417f28bb 1266 if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
93c2fb25 1267 (rt->fib6_flags & RTF_EXPIRES))
417f28bb 1268 mod_timer(&net->ipv6.ip6_fib_timer,
847499ce 1269 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
1da177e4
LT
1270}
1271
63152fc0 1272void fib6_force_start_gc(struct net *net)
1da177e4 1273{
417f28bb
SH
1274 if (!timer_pending(&net->ipv6.ip6_fib_timer))
1275 mod_timer(&net->ipv6.ip6_fib_timer,
847499ce 1276 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
1da177e4
LT
1277}
1278
8d1c802b 1279static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
4a8e56ee 1280 int sernum)
bbd63f06 1281{
93c2fb25
DA
1282 struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node,
1283 lockdep_is_held(&rt->fib6_table->tb6_lock));
bbd63f06
WW
1284
1285 /* paired with smp_rmb() in rt6_get_cookie_safe() */
1286 smp_wmb();
1287 while (fn) {
1288 fn->fn_sernum = sernum;
66f5d6ce 1289 fn = rcu_dereference_protected(fn->parent,
93c2fb25 1290 lockdep_is_held(&rt->fib6_table->tb6_lock));
bbd63f06
WW
1291 }
1292}
1293
8d1c802b 1294void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt)
4a8e56ee
IS
1295{
1296 __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
1297}
1298
cdaa16a4
DA
1299/* allow ipv4 to update sernum via ipv6_stub */
1300void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i)
1301{
1302 spin_lock_bh(&f6i->fib6_table->tb6_lock);
1303 fib6_update_sernum_upto_root(net, f6i);
1304 spin_unlock_bh(&f6i->fib6_table->tb6_lock);
1305}
1306
1da177e4
LT
1307/*
1308 * Add routing information to the routing tree.
1309 * <destination addr>/<source addr>
1310 * with source addr info in sub-trees
66f5d6ce 1311 * Need to own table->tb6_lock
1da177e4
LT
1312 */
1313
8d1c802b 1314int fib6_add(struct fib6_node *root, struct fib6_info *rt,
d4ead6b3 1315 struct nl_info *info, struct netlink_ext_ack *extack)
1da177e4 1316{
93c2fb25 1317 struct fib6_table *table = rt->fib6_table;
66729e18 1318 struct fib6_node *fn, *pn = NULL;
1da177e4 1319 int err = -ENOMEM;
4a287eba
MV
1320 int allow_create = 1;
1321 int replace_required = 0;
812918c4 1322 int sernum = fib6_new_sernum(info->nl_net);
507c9b1e
DM
1323
1324 if (info->nlh) {
1325 if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
4a287eba 1326 allow_create = 0;
507c9b1e 1327 if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
4a287eba
MV
1328 replace_required = 1;
1329 }
1330 if (!allow_create && !replace_required)
f3213831 1331 pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
1da177e4 1332
81eb8447 1333 fn = fib6_add_1(info->nl_net, table, root,
93c2fb25
DA
1334 &rt->fib6_dst.addr, rt->fib6_dst.plen,
1335 offsetof(struct fib6_info, fib6_dst), allow_create,
bbd63f06 1336 replace_required, extack);
4a287eba
MV
1337 if (IS_ERR(fn)) {
1338 err = PTR_ERR(fn);
ae7b4e1f 1339 fn = NULL;
1da177e4 1340 goto out;
188c517a 1341 }
1da177e4 1342
66729e18
YH
1343 pn = fn;
1344
1da177e4 1345#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 1346 if (rt->fib6_src.plen) {
1da177e4
LT
1347 struct fib6_node *sn;
1348
66f5d6ce 1349 if (!rcu_access_pointer(fn->subtree)) {
1da177e4
LT
1350 struct fib6_node *sfn;
1351
1352 /*
1353 * Create subtree.
1354 *
1355 * fn[main tree]
1356 * |
1357 * sfn[subtree root]
1358 * \
1359 * sn[new leaf node]
1360 */
1361
1362 /* Create subtree root node */
81eb8447 1363 sfn = node_alloc(info->nl_net);
507c9b1e 1364 if (!sfn)
348a4002 1365 goto failure;
1da177e4 1366
5ea71528 1367 fib6_info_hold(info->nl_net->ipv6.fib6_null_entry);
66f5d6ce 1368 rcu_assign_pointer(sfn->leaf,
421842ed 1369 info->nl_net->ipv6.fib6_null_entry);
1da177e4 1370 sfn->fn_flags = RTN_ROOT;
1da177e4
LT
1371
1372 /* Now add the first leaf node to new subtree */
1373
81eb8447 1374 sn = fib6_add_1(info->nl_net, table, sfn,
93c2fb25
DA
1375 &rt->fib6_src.addr, rt->fib6_src.plen,
1376 offsetof(struct fib6_info, fib6_src),
bbd63f06 1377 allow_create, replace_required, extack);
1da177e4 1378
f950c0ec 1379 if (IS_ERR(sn)) {
1da177e4 1380 /* If it is failed, discard just allocated
348a4002 1381 root, and then (in failure) stale node
1da177e4
LT
1382 in main tree.
1383 */
81eb8447 1384 node_free_immediate(info->nl_net, sfn);
188c517a 1385 err = PTR_ERR(sn);
348a4002 1386 goto failure;
1da177e4
LT
1387 }
1388
1389 /* Now link new subtree to main tree */
66f5d6ce
WW
1390 rcu_assign_pointer(sfn->parent, fn);
1391 rcu_assign_pointer(fn->subtree, sfn);
1da177e4 1392 } else {
81eb8447 1393 sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
93c2fb25
DA
1394 &rt->fib6_src.addr, rt->fib6_src.plen,
1395 offsetof(struct fib6_info, fib6_src),
bbd63f06 1396 allow_create, replace_required, extack);
1da177e4 1397
4a287eba
MV
1398 if (IS_ERR(sn)) {
1399 err = PTR_ERR(sn);
348a4002 1400 goto failure;
188c517a 1401 }
1da177e4
LT
1402 }
1403
66f5d6ce 1404 if (!rcu_access_pointer(fn->leaf)) {
591ff9ea
WW
1405 if (fn->fn_flags & RTN_TL_ROOT) {
1406 /* put back null_entry for root node */
1407 rcu_assign_pointer(fn->leaf,
421842ed 1408 info->nl_net->ipv6.fib6_null_entry);
591ff9ea 1409 } else {
5ea71528 1410 fib6_info_hold(rt);
591ff9ea
WW
1411 rcu_assign_pointer(fn->leaf, rt);
1412 }
66729e18 1413 }
1da177e4
LT
1414 fn = sn;
1415 }
1416#endif
1417
d4ead6b3 1418 err = fib6_add_rt2node(fn, rt, info, extack);
bbd63f06 1419 if (!err) {
f88d8ea6
DA
1420 if (rt->nh)
1421 list_add(&rt->nh_list, &rt->nh->f6i_list);
4a8e56ee 1422 __fib6_update_sernum_upto_root(rt, sernum);
63152fc0 1423 fib6_start_gc(info->nl_net, rt);
bbd63f06 1424 }
1da177e4
LT
1425
1426out:
66729e18
YH
1427 if (err) {
1428#ifdef CONFIG_IPV6_SUBTREES
1429 /*
1430 * If fib6_add_1 has cleared the old leaf pointer in the
1431 * super-tree leaf node we have to find a new one for it.
1432 */
7bbfe00e 1433 if (pn != fn) {
8d1c802b 1434 struct fib6_info *pn_leaf =
7bbfe00e
WW
1435 rcu_dereference_protected(pn->leaf,
1436 lockdep_is_held(&table->tb6_lock));
1437 if (pn_leaf == rt) {
1438 pn_leaf = NULL;
1439 RCU_INIT_POINTER(pn->leaf, NULL);
93531c67 1440 fib6_info_release(rt);
66729e18 1441 }
7bbfe00e
WW
1442 if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
1443 pn_leaf = fib6_find_prefix(info->nl_net, table,
1444 pn);
1445#if RT6_DEBUG >= 2
1446 if (!pn_leaf) {
1447 WARN_ON(!pn_leaf);
1448 pn_leaf =
421842ed 1449 info->nl_net->ipv6.fib6_null_entry;
7bbfe00e 1450 }
66729e18 1451#endif
93531c67 1452 fib6_info_hold(pn_leaf);
7bbfe00e
WW
1453 rcu_assign_pointer(pn->leaf, pn_leaf);
1454 }
66729e18
YH
1455 }
1456#endif
348a4002 1457 goto failure;
66729e18 1458 }
1da177e4
LT
1459 return err;
1460
348a4002 1461failure:
4512c43e
WW
1462 /* fn->leaf could be NULL and fib6_repair_tree() needs to be called if:
1463 * 1. fn is an intermediate node and we failed to add the new
1464 * route to it in both subtree creation failure and fib6_add_rt2node()
1465 * failure case.
1466 * 2. fn is the root node in the table and we fail to add the first
1467 * default route to it.
1da177e4 1468 */
4512c43e
WW
1469 if (fn &&
1470 (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
1471 (fn->fn_flags & RTN_TL_ROOT &&
1472 !rcu_access_pointer(fn->leaf))))
66f5d6ce 1473 fib6_repair_tree(info->nl_net, table, fn);
1da177e4 1474 return err;
1da177e4
LT
1475}
1476
1477/*
1478 * Routing tree lookup
1479 *
1480 */
1481
1482struct lookup_args {
8d1c802b 1483 int offset; /* key offset on fib6_info */
b71d1d42 1484 const struct in6_addr *addr; /* search key */
1da177e4
LT
1485};
1486
6454743b
DA
1487static struct fib6_node *fib6_node_lookup_1(struct fib6_node *root,
1488 struct lookup_args *args)
1da177e4
LT
1489{
1490 struct fib6_node *fn;
e69a4adc 1491 __be32 dir;
1da177e4 1492
825e288e
YH
1493 if (unlikely(args->offset == 0))
1494 return NULL;
1495
1da177e4
LT
1496 /*
1497 * Descend on a tree
1498 */
1499
1500 fn = root;
1501
1502 for (;;) {
1503 struct fib6_node *next;
1504
1505 dir = addr_bit_set(args->addr, fn->fn_bit);
1506
66f5d6ce
WW
1507 next = dir ? rcu_dereference(fn->right) :
1508 rcu_dereference(fn->left);
1da177e4
LT
1509
1510 if (next) {
1511 fn = next;
1512 continue;
1513 }
1da177e4
LT
1514 break;
1515 }
1516
507c9b1e 1517 while (fn) {
66f5d6ce
WW
1518 struct fib6_node *subtree = FIB6_SUBTREE(fn);
1519
1520 if (subtree || fn->fn_flags & RTN_RTINFO) {
8d1c802b 1521 struct fib6_info *leaf = rcu_dereference(fn->leaf);
1da177e4
LT
1522 struct rt6key *key;
1523
8d1040e8
WW
1524 if (!leaf)
1525 goto backtrack;
1526
1527 key = (struct rt6key *) ((u8 *)leaf + args->offset);
1da177e4 1528
3fc5e044
YH
1529 if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
1530#ifdef CONFIG_IPV6_SUBTREES
66f5d6ce 1531 if (subtree) {
3e3be275 1532 struct fib6_node *sfn;
6454743b
DA
1533 sfn = fib6_node_lookup_1(subtree,
1534 args + 1);
3e3be275
HFS
1535 if (!sfn)
1536 goto backtrack;
1537 fn = sfn;
1538 }
3fc5e044 1539#endif
3e3be275 1540 if (fn->fn_flags & RTN_RTINFO)
3fc5e044
YH
1541 return fn;
1542 }
1da177e4 1543 }
3e3be275 1544backtrack:
3fc5e044
YH
1545 if (fn->fn_flags & RTN_ROOT)
1546 break;
1547
66f5d6ce 1548 fn = rcu_dereference(fn->parent);
1da177e4
LT
1549 }
1550
1551 return NULL;
1552}
1553
66f5d6ce
WW
1554/* called with rcu_read_lock() held
1555 */
6454743b
DA
1556struct fib6_node *fib6_node_lookup(struct fib6_node *root,
1557 const struct in6_addr *daddr,
1558 const struct in6_addr *saddr)
1da177e4 1559{
1da177e4 1560 struct fib6_node *fn;
825e288e
YH
1561 struct lookup_args args[] = {
1562 {
93c2fb25 1563 .offset = offsetof(struct fib6_info, fib6_dst),
825e288e
YH
1564 .addr = daddr,
1565 },
1da177e4 1566#ifdef CONFIG_IPV6_SUBTREES
825e288e 1567 {
93c2fb25 1568 .offset = offsetof(struct fib6_info, fib6_src),
825e288e
YH
1569 .addr = saddr,
1570 },
1da177e4 1571#endif
825e288e
YH
1572 {
1573 .offset = 0, /* sentinel */
1574 }
1575 };
1da177e4 1576
6454743b 1577 fn = fib6_node_lookup_1(root, daddr ? args : args + 1);
507c9b1e 1578 if (!fn || fn->fn_flags & RTN_TL_ROOT)
1da177e4
LT
1579 fn = root;
1580
1581 return fn;
1582}
1583
1584/*
1585 * Get node with specified destination prefix (and source prefix,
1586 * if subtrees are used)
38fbeeee
WW
1587 * exact_match == true means we try to find fn with exact match of
1588 * the passed in prefix addr
1589 * exact_match == false means we try to find fn with longest prefix
1590 * match of the passed in prefix addr. This is useful for finding fn
1591 * for cached route as it will be stored in the exception table under
1592 * the node with longest prefix length.
1da177e4
LT
1593 */
1594
1595
437de07c
WY
1596static struct fib6_node *fib6_locate_1(struct fib6_node *root,
1597 const struct in6_addr *addr,
38fbeeee
WW
1598 int plen, int offset,
1599 bool exact_match)
1da177e4 1600{
38fbeeee 1601 struct fib6_node *fn, *prev = NULL;
1da177e4
LT
1602
1603 for (fn = root; fn ; ) {
8d1c802b 1604 struct fib6_info *leaf = rcu_dereference(fn->leaf);
8d1040e8
WW
1605 struct rt6key *key;
1606
1607 /* This node is being deleted */
1608 if (!leaf) {
1609 if (plen <= fn->fn_bit)
1610 goto out;
1611 else
1612 goto next;
1613 }
1614
1615 key = (struct rt6key *)((u8 *)leaf + offset);
1da177e4
LT
1616
1617 /*
1618 * Prefix match
1619 */
1620 if (plen < fn->fn_bit ||
1621 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
38fbeeee 1622 goto out;
1da177e4
LT
1623
1624 if (plen == fn->fn_bit)
1625 return fn;
1626
40cb35d5
SB
1627 if (fn->fn_flags & RTN_RTINFO)
1628 prev = fn;
38fbeeee 1629
8d1040e8 1630next:
1da177e4
LT
1631 /*
1632 * We have more bits to go
1633 */
1634 if (addr_bit_set(addr, fn->fn_bit))
66f5d6ce 1635 fn = rcu_dereference(fn->right);
1da177e4 1636 else
66f5d6ce 1637 fn = rcu_dereference(fn->left);
1da177e4 1638 }
38fbeeee
WW
1639out:
1640 if (exact_match)
1641 return NULL;
1642 else
1643 return prev;
1da177e4
LT
1644}
1645
437de07c
WY
1646struct fib6_node *fib6_locate(struct fib6_node *root,
1647 const struct in6_addr *daddr, int dst_len,
38fbeeee
WW
1648 const struct in6_addr *saddr, int src_len,
1649 bool exact_match)
1da177e4
LT
1650{
1651 struct fib6_node *fn;
1652
1653 fn = fib6_locate_1(root, daddr, dst_len,
93c2fb25 1654 offsetof(struct fib6_info, fib6_dst),
38fbeeee 1655 exact_match);
1da177e4
LT
1656
1657#ifdef CONFIG_IPV6_SUBTREES
1658 if (src_len) {
547b792c 1659 WARN_ON(saddr == NULL);
0e80193b
WW
1660 if (fn) {
1661 struct fib6_node *subtree = FIB6_SUBTREE(fn);
1662
1663 if (subtree) {
1664 fn = fib6_locate_1(subtree, saddr, src_len,
93c2fb25 1665 offsetof(struct fib6_info, fib6_src),
38fbeeee 1666 exact_match);
0e80193b
WW
1667 }
1668 }
1da177e4
LT
1669 }
1670#endif
1671
507c9b1e 1672 if (fn && fn->fn_flags & RTN_RTINFO)
1da177e4
LT
1673 return fn;
1674
1675 return NULL;
1676}
1677
1678
1679/*
1680 * Deletion
1681 *
1682 */
1683
8d1c802b 1684static struct fib6_info *fib6_find_prefix(struct net *net,
66f5d6ce
WW
1685 struct fib6_table *table,
1686 struct fib6_node *fn)
1da177e4 1687{
66f5d6ce
WW
1688 struct fib6_node *child_left, *child_right;
1689
507c9b1e 1690 if (fn->fn_flags & RTN_ROOT)
421842ed 1691 return net->ipv6.fib6_null_entry;
1da177e4 1692
507c9b1e 1693 while (fn) {
66f5d6ce
WW
1694 child_left = rcu_dereference_protected(fn->left,
1695 lockdep_is_held(&table->tb6_lock));
1696 child_right = rcu_dereference_protected(fn->right,
1697 lockdep_is_held(&table->tb6_lock));
1698 if (child_left)
1699 return rcu_dereference_protected(child_left->leaf,
1700 lockdep_is_held(&table->tb6_lock));
1701 if (child_right)
1702 return rcu_dereference_protected(child_right->leaf,
1703 lockdep_is_held(&table->tb6_lock));
1da177e4 1704
7fc33165 1705 fn = FIB6_SUBTREE(fn);
1da177e4
LT
1706 }
1707 return NULL;
1708}
1709
1710/*
1711 * Called to trim the tree of intermediate nodes when possible. "fn"
1712 * is the node we want to try and remove.
66f5d6ce 1713 * Need to own table->tb6_lock
1da177e4
LT
1714 */
1715
8ed67789 1716static struct fib6_node *fib6_repair_tree(struct net *net,
66f5d6ce
WW
1717 struct fib6_table *table,
1718 struct fib6_node *fn)
1da177e4
LT
1719{
1720 int children;
1721 int nstate;
66f5d6ce 1722 struct fib6_node *child;
94b2cfe0 1723 struct fib6_walker *w;
1da177e4
LT
1724 int iter = 0;
1725
4512c43e
WW
1726 /* Set fn->leaf to null_entry for root node. */
1727 if (fn->fn_flags & RTN_TL_ROOT) {
421842ed 1728 rcu_assign_pointer(fn->leaf, net->ipv6.fib6_null_entry);
4512c43e
WW
1729 return fn;
1730 }
1731
1da177e4 1732 for (;;) {
66f5d6ce
WW
1733 struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
1734 lockdep_is_held(&table->tb6_lock));
1735 struct fib6_node *fn_l = rcu_dereference_protected(fn->left,
1736 lockdep_is_held(&table->tb6_lock));
1737 struct fib6_node *pn = rcu_dereference_protected(fn->parent,
1738 lockdep_is_held(&table->tb6_lock));
1739 struct fib6_node *pn_r = rcu_dereference_protected(pn->right,
1740 lockdep_is_held(&table->tb6_lock));
1741 struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
1742 lockdep_is_held(&table->tb6_lock));
8d1c802b 1743 struct fib6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
66f5d6ce 1744 lockdep_is_held(&table->tb6_lock));
8d1c802b 1745 struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
66f5d6ce 1746 lockdep_is_held(&table->tb6_lock));
8d1c802b 1747 struct fib6_info *new_fn_leaf;
66f5d6ce 1748
1da177e4
LT
1749 RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
1750 iter++;
1751
547b792c
IJ
1752 WARN_ON(fn->fn_flags & RTN_RTINFO);
1753 WARN_ON(fn->fn_flags & RTN_TL_ROOT);
66f5d6ce 1754 WARN_ON(fn_leaf);
1da177e4
LT
1755
1756 children = 0;
1757 child = NULL;
66f5d6ce
WW
1758 if (fn_r)
1759 child = fn_r, children |= 1;
1760 if (fn_l)
1761 child = fn_l, children |= 2;
1da177e4 1762
7fc33165 1763 if (children == 3 || FIB6_SUBTREE(fn)
1da177e4
LT
1764#ifdef CONFIG_IPV6_SUBTREES
1765 /* Subtree root (i.e. fn) may have one child */
507c9b1e 1766 || (children && fn->fn_flags & RTN_ROOT)
1da177e4
LT
1767#endif
1768 ) {
66f5d6ce 1769 new_fn_leaf = fib6_find_prefix(net, table, fn);
1da177e4 1770#if RT6_DEBUG >= 2
66f5d6ce
WW
1771 if (!new_fn_leaf) {
1772 WARN_ON(!new_fn_leaf);
421842ed 1773 new_fn_leaf = net->ipv6.fib6_null_entry;
1da177e4
LT
1774 }
1775#endif
93531c67 1776 fib6_info_hold(new_fn_leaf);
66f5d6ce
WW
1777 rcu_assign_pointer(fn->leaf, new_fn_leaf);
1778 return pn;
1da177e4
LT
1779 }
1780
1da177e4 1781#ifdef CONFIG_IPV6_SUBTREES
7fc33165 1782 if (FIB6_SUBTREE(pn) == fn) {
547b792c 1783 WARN_ON(!(fn->fn_flags & RTN_ROOT));
66f5d6ce 1784 RCU_INIT_POINTER(pn->subtree, NULL);
1da177e4
LT
1785 nstate = FWS_L;
1786 } else {
547b792c 1787 WARN_ON(fn->fn_flags & RTN_ROOT);
1da177e4 1788#endif
66f5d6ce
WW
1789 if (pn_r == fn)
1790 rcu_assign_pointer(pn->right, child);
1791 else if (pn_l == fn)
1792 rcu_assign_pointer(pn->left, child);
1da177e4 1793#if RT6_DEBUG >= 2
547b792c
IJ
1794 else
1795 WARN_ON(1);
1da177e4
LT
1796#endif
1797 if (child)
66f5d6ce 1798 rcu_assign_pointer(child->parent, pn);
1da177e4
LT
1799 nstate = FWS_R;
1800#ifdef CONFIG_IPV6_SUBTREES
1801 }
1802#endif
1803
9a03cd8f
MK
1804 read_lock(&net->ipv6.fib6_walker_lock);
1805 FOR_WALKERS(net, w) {
507c9b1e 1806 if (!child) {
2b760fcf 1807 if (w->node == fn) {
1da177e4
LT
1808 RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
1809 w->node = pn;
1810 w->state = nstate;
1811 }
1812 } else {
1da177e4
LT
1813 if (w->node == fn) {
1814 w->node = child;
1815 if (children&2) {
1816 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
8db46f1d 1817 w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
1da177e4
LT
1818 } else {
1819 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
8db46f1d 1820 w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
1da177e4
LT
1821 }
1822 }
1823 }
1824 }
9a03cd8f 1825 read_unlock(&net->ipv6.fib6_walker_lock);
1da177e4 1826
81eb8447 1827 node_free(net, fn);
507c9b1e 1828 if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
1da177e4
LT
1829 return pn;
1830
66f5d6ce 1831 RCU_INIT_POINTER(pn->leaf, NULL);
93531c67 1832 fib6_info_release(pn_leaf);
1da177e4
LT
1833 fn = pn;
1834 }
1835}
1836
66f5d6ce 1837static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
8d1c802b 1838 struct fib6_info __rcu **rtp, struct nl_info *info)
1da177e4 1839{
94b2cfe0 1840 struct fib6_walker *w;
8d1c802b 1841 struct fib6_info *rt = rcu_dereference_protected(*rtp,
66f5d6ce 1842 lockdep_is_held(&table->tb6_lock));
c572872f 1843 struct net *net = info->nl_net;
1da177e4
LT
1844
1845 RT6_TRACE("fib6_del_route\n");
1846
1847 /* Unlink it */
8fb11a9a 1848 *rtp = rt->fib6_next;
93c2fb25 1849 rt->fib6_node = NULL;
c572872f
BT
1850 net->ipv6.rt6_stats->fib_rt_entries--;
1851 net->ipv6.rt6_stats->fib_discarded_routes++;
1da177e4 1852
2b760fcf
WW
1853 /* Flush all cached dst in exception table */
1854 rt6_flush_exceptions(rt);
1855
f11e6659 1856 /* Reset round-robin state, if necessary */
66f5d6ce 1857 if (rcu_access_pointer(fn->rr_ptr) == rt)
f11e6659
DM
1858 fn->rr_ptr = NULL;
1859
51ebd318 1860 /* Remove this entry from other siblings */
93c2fb25 1861 if (rt->fib6_nsiblings) {
8d1c802b 1862 struct fib6_info *sibling, *next_sibling;
51ebd318
ND
1863
1864 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
1865 &rt->fib6_siblings, fib6_siblings)
1866 sibling->fib6_nsiblings--;
1867 rt->fib6_nsiblings = 0;
1868 list_del_init(&rt->fib6_siblings);
d7dedee1 1869 rt6_multipath_rebalance(next_sibling);
51ebd318
ND
1870 }
1871
1da177e4 1872 /* Adjust walkers */
9a03cd8f
MK
1873 read_lock(&net->ipv6.fib6_walker_lock);
1874 FOR_WALKERS(net, w) {
1da177e4
LT
1875 if (w->state == FWS_C && w->leaf == rt) {
1876 RT6_TRACE("walker %p adjusted by delroute\n", w);
8fb11a9a 1877 w->leaf = rcu_dereference_protected(rt->fib6_next,
66f5d6ce 1878 lockdep_is_held(&table->tb6_lock));
507c9b1e 1879 if (!w->leaf)
1da177e4
LT
1880 w->state = FWS_U;
1881 }
1882 }
9a03cd8f 1883 read_unlock(&net->ipv6.fib6_walker_lock);
1da177e4 1884
4512c43e
WW
1885 /* If it was last route, call fib6_repair_tree() to:
1886 * 1. For root node, put back null_entry as how the table was created.
1887 * 2. For other nodes, expunge its radix tree node.
1888 */
66f5d6ce 1889 if (!rcu_access_pointer(fn->leaf)) {
4512c43e
WW
1890 if (!(fn->fn_flags & RTN_TL_ROOT)) {
1891 fn->fn_flags &= ~RTN_RTINFO;
1892 net->ipv6.rt6_stats->fib_route_nodes--;
1893 }
66f5d6ce 1894 fn = fib6_repair_tree(net, table, fn);
1da177e4
LT
1895 }
1896
6e9e16e6 1897 fib6_purge_rt(rt, fn, net);
1da177e4 1898
d5382fef
IS
1899 if (!info->skip_notify_kernel)
1900 call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
16a16cd3
DA
1901 if (!info->skip_notify)
1902 inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
d5382fef 1903
93531c67 1904 fib6_info_release(rt);
1da177e4
LT
1905}
1906
66f5d6ce 1907/* Need to own table->tb6_lock */
8d1c802b 1908int fib6_del(struct fib6_info *rt, struct nl_info *info)
1da177e4 1909{
93c2fb25
DA
1910 struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node,
1911 lockdep_is_held(&rt->fib6_table->tb6_lock));
1912 struct fib6_table *table = rt->fib6_table;
8ed67789 1913 struct net *net = info->nl_net;
8d1c802b
DA
1914 struct fib6_info __rcu **rtp;
1915 struct fib6_info __rcu **rtp_next;
1da177e4 1916
421842ed 1917 if (!fn || rt == net->ipv6.fib6_null_entry)
1da177e4
LT
1918 return -ENOENT;
1919
547b792c 1920 WARN_ON(!(fn->fn_flags & RTN_RTINFO));
1da177e4 1921
1da177e4
LT
1922 /*
1923 * Walk the leaf entries looking for ourself
1924 */
1925
66f5d6ce 1926 for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
8d1c802b 1927 struct fib6_info *cur = rcu_dereference_protected(*rtp,
66f5d6ce
WW
1928 lockdep_is_held(&table->tb6_lock));
1929 if (rt == cur) {
1930 fib6_del_route(table, fn, rtp, info);
1da177e4
LT
1931 return 0;
1932 }
8fb11a9a 1933 rtp_next = &cur->fib6_next;
1da177e4
LT
1934 }
1935 return -ENOENT;
1936}
1937
1938/*
1939 * Tree traversal function.
1940 *
1941 * Certainly, it is not interrupt safe.
1942 * However, it is internally reenterable wrt itself and fib6_add/fib6_del.
1943 * It means, that we can modify tree during walking
1944 * and use this function for garbage collection, clone pruning,
1ab1457c 1945 * cleaning tree when a device goes down etc. etc.
1da177e4
LT
1946 *
1947 * It guarantees that every node will be traversed,
1948 * and that it will be traversed only once.
1949 *
1950 * Callback function w->func may return:
1951 * 0 -> continue walking.
1952 * positive value -> walking is suspended (used by tree dumps,
1953 * and probably by gc, if it will be split to several slices)
1954 * negative value -> terminate walking.
1955 *
1956 * The function itself returns:
1957 * 0 -> walk is complete.
1958 * >0 -> walk is incomplete (i.e. suspended)
1959 * <0 -> walk is terminated by an error.
66f5d6ce
WW
1960 *
1961 * This function is called with tb6_lock held.
1da177e4
LT
1962 */
1963
94b2cfe0 1964static int fib6_walk_continue(struct fib6_walker *w)
1da177e4 1965{
66f5d6ce 1966 struct fib6_node *fn, *pn, *left, *right;
1da177e4 1967
2b760fcf
WW
1968 /* w->root should always be table->tb6_root */
1969 WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT));
1970
1da177e4
LT
1971 for (;;) {
1972 fn = w->node;
507c9b1e 1973 if (!fn)
1da177e4
LT
1974 return 0;
1975
1da177e4
LT
1976 switch (w->state) {
1977#ifdef CONFIG_IPV6_SUBTREES
1978 case FWS_S:
7fc33165
YH
1979 if (FIB6_SUBTREE(fn)) {
1980 w->node = FIB6_SUBTREE(fn);
1da177e4
LT
1981 continue;
1982 }
1983 w->state = FWS_L;
1ab1457c 1984#endif
275757e6 1985 /* fall through */
1da177e4 1986 case FWS_L:
66f5d6ce
WW
1987 left = rcu_dereference_protected(fn->left, 1);
1988 if (left) {
1989 w->node = left;
1da177e4
LT
1990 w->state = FWS_INIT;
1991 continue;
1992 }
1993 w->state = FWS_R;
275757e6 1994 /* fall through */
1da177e4 1995 case FWS_R:
66f5d6ce
WW
1996 right = rcu_dereference_protected(fn->right, 1);
1997 if (right) {
1998 w->node = right;
1da177e4
LT
1999 w->state = FWS_INIT;
2000 continue;
2001 }
2002 w->state = FWS_C;
66f5d6ce 2003 w->leaf = rcu_dereference_protected(fn->leaf, 1);
275757e6 2004 /* fall through */
1da177e4 2005 case FWS_C:
507c9b1e 2006 if (w->leaf && fn->fn_flags & RTN_RTINFO) {
2bec5a36
PM
2007 int err;
2008
fa809e2f
ED
2009 if (w->skip) {
2010 w->skip--;
1c265854 2011 goto skip;
2bec5a36
PM
2012 }
2013
2014 err = w->func(w);
1da177e4
LT
2015 if (err)
2016 return err;
2bec5a36
PM
2017
2018 w->count++;
1da177e4
LT
2019 continue;
2020 }
1c265854 2021skip:
1da177e4 2022 w->state = FWS_U;
275757e6 2023 /* fall through */
1da177e4
LT
2024 case FWS_U:
2025 if (fn == w->root)
2026 return 0;
66f5d6ce
WW
2027 pn = rcu_dereference_protected(fn->parent, 1);
2028 left = rcu_dereference_protected(pn->left, 1);
2029 right = rcu_dereference_protected(pn->right, 1);
1da177e4
LT
2030 w->node = pn;
2031#ifdef CONFIG_IPV6_SUBTREES
7fc33165 2032 if (FIB6_SUBTREE(pn) == fn) {
547b792c 2033 WARN_ON(!(fn->fn_flags & RTN_ROOT));
1da177e4
LT
2034 w->state = FWS_L;
2035 continue;
2036 }
2037#endif
66f5d6ce 2038 if (left == fn) {
1da177e4
LT
2039 w->state = FWS_R;
2040 continue;
2041 }
66f5d6ce 2042 if (right == fn) {
1da177e4 2043 w->state = FWS_C;
66f5d6ce 2044 w->leaf = rcu_dereference_protected(w->node->leaf, 1);
1da177e4
LT
2045 continue;
2046 }
2047#if RT6_DEBUG >= 2
547b792c 2048 WARN_ON(1);
1da177e4
LT
2049#endif
2050 }
2051 }
2052}
2053
9a03cd8f 2054static int fib6_walk(struct net *net, struct fib6_walker *w)
1da177e4
LT
2055{
2056 int res;
2057
2058 w->state = FWS_INIT;
2059 w->node = w->root;
2060
9a03cd8f 2061 fib6_walker_link(net, w);
1da177e4
LT
2062 res = fib6_walk_continue(w);
2063 if (res <= 0)
9a03cd8f 2064 fib6_walker_unlink(net, w);
1da177e4
LT
2065 return res;
2066}
2067
94b2cfe0 2068static int fib6_clean_node(struct fib6_walker *w)
1da177e4
LT
2069{
2070 int res;
8d1c802b 2071 struct fib6_info *rt;
94b2cfe0 2072 struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w);
ec7d43c2
BT
2073 struct nl_info info = {
2074 .nl_net = c->net,
7c6bb7d2 2075 .skip_notify = c->skip_notify,
ec7d43c2 2076 };
1da177e4 2077
327571cb
HFS
2078 if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
2079 w->node->fn_sernum != c->sernum)
2080 w->node->fn_sernum = c->sernum;
2081
2082 if (!c->func) {
2083 WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE);
2084 w->leaf = NULL;
2085 return 0;
2086 }
2087
66f5d6ce 2088 for_each_fib6_walker_rt(w) {
1da177e4 2089 res = c->func(rt, c->arg);
b5cb5a75 2090 if (res == -1) {
1da177e4 2091 w->leaf = rt;
528c4ceb 2092 res = fib6_del(rt, &info);
1da177e4
LT
2093 if (res) {
2094#if RT6_DEBUG >= 2
91df42be 2095 pr_debug("%s: del failed: rt=%p@%p err=%d\n",
4e587ea7 2096 __func__, rt,
93c2fb25 2097 rcu_access_pointer(rt->fib6_node),
4e587ea7 2098 res);
1da177e4
LT
2099#endif
2100 continue;
2101 }
2102 return 0;
b5cb5a75 2103 } else if (res == -2) {
93c2fb25 2104 if (WARN_ON(!rt->fib6_nsiblings))
b5cb5a75 2105 continue;
93c2fb25
DA
2106 rt = list_last_entry(&rt->fib6_siblings,
2107 struct fib6_info, fib6_siblings);
b5cb5a75 2108 continue;
1da177e4 2109 }
547b792c 2110 WARN_ON(res != 0);
1da177e4
LT
2111 }
2112 w->leaf = rt;
2113 return 0;
2114}
2115
2116/*
2117 * Convenient frontend to tree walker.
1ab1457c 2118 *
1da177e4 2119 * func is called on each route.
b5cb5a75
IS
2120 * It may return -2 -> skip multipath route.
2121 * -1 -> delete this route.
1da177e4 2122 * 0 -> continue walking
1da177e4
LT
2123 */
2124
ec7d43c2 2125static void fib6_clean_tree(struct net *net, struct fib6_node *root,
8d1c802b 2126 int (*func)(struct fib6_info *, void *arg),
7c6bb7d2 2127 int sernum, void *arg, bool skip_notify)
1da177e4 2128{
94b2cfe0 2129 struct fib6_cleaner c;
1da177e4
LT
2130
2131 c.w.root = root;
2132 c.w.func = fib6_clean_node;
2bec5a36
PM
2133 c.w.count = 0;
2134 c.w.skip = 0;
1e47b483 2135 c.w.skip_in_node = 0;
1da177e4 2136 c.func = func;
327571cb 2137 c.sernum = sernum;
1da177e4 2138 c.arg = arg;
ec7d43c2 2139 c.net = net;
7c6bb7d2 2140 c.skip_notify = skip_notify;
1da177e4 2141
9a03cd8f 2142 fib6_walk(net, &c.w);
1da177e4
LT
2143}
2144
327571cb 2145static void __fib6_clean_all(struct net *net,
8d1c802b 2146 int (*func)(struct fib6_info *, void *),
7c6bb7d2 2147 int sernum, void *arg, bool skip_notify)
c71099ac 2148{
c71099ac 2149 struct fib6_table *table;
58f09b78 2150 struct hlist_head *head;
1b43af54 2151 unsigned int h;
c71099ac 2152
1b43af54 2153 rcu_read_lock();
a33bc5c1 2154 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
f3db4851 2155 head = &net->ipv6.fib_table_hash[h];
b67bfe0d 2156 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
66f5d6ce 2157 spin_lock_bh(&table->tb6_lock);
ec7d43c2 2158 fib6_clean_tree(net, &table->tb6_root,
7c6bb7d2 2159 func, sernum, arg, skip_notify);
66f5d6ce 2160 spin_unlock_bh(&table->tb6_lock);
c71099ac
TG
2161 }
2162 }
1b43af54 2163 rcu_read_unlock();
c71099ac
TG
2164}
2165
8d1c802b 2166void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *),
327571cb
HFS
2167 void *arg)
2168{
7c6bb7d2
DA
2169 __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, false);
2170}
2171
2172void fib6_clean_all_skip_notify(struct net *net,
2173 int (*func)(struct fib6_info *, void *),
2174 void *arg)
2175{
2176 __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, true);
327571cb
HFS
2177}
2178
705f1c86
HFS
2179static void fib6_flush_trees(struct net *net)
2180{
812918c4 2181 int new_sernum = fib6_new_sernum(net);
705f1c86 2182
7c6bb7d2 2183 __fib6_clean_all(net, NULL, new_sernum, NULL, false);
705f1c86
HFS
2184}
2185
1da177e4
LT
2186/*
2187 * Garbage collection
2188 */
2189
8d1c802b 2190static int fib6_age(struct fib6_info *rt, void *arg)
1da177e4 2191{
3570df91 2192 struct fib6_gc_args *gc_args = arg;
1da177e4
LT
2193 unsigned long now = jiffies;
2194
2195 /*
2196 * check addrconf expiration here.
2197 * Routes are expired even if they are in use.
1da177e4
LT
2198 */
2199
93c2fb25 2200 if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
14895687 2201 if (time_after(now, rt->expires)) {
1da177e4 2202 RT6_TRACE("expiring %p\n", rt);
1da177e4
LT
2203 return -1;
2204 }
3570df91 2205 gc_args->more++;
1da177e4
LT
2206 }
2207
c757faa8
WW
2208 /* Also age clones in the exception table.
2209 * Note, that clones are aged out
2210 * only if they are not in use now.
2211 */
2212 rt6_age_exceptions(rt, gc_args, now);
2213
1da177e4
LT
2214 return 0;
2215}
2216
2ac3ac8f 2217void fib6_run_gc(unsigned long expires, struct net *net, bool force)
1da177e4 2218{
3570df91 2219 struct fib6_gc_args gc_args;
49a18d86
MK
2220 unsigned long now;
2221
2ac3ac8f 2222 if (force) {
3dc94f93
MK
2223 spin_lock_bh(&net->ipv6.fib6_gc_lock);
2224 } else if (!spin_trylock_bh(&net->ipv6.fib6_gc_lock)) {
2ac3ac8f
MK
2225 mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
2226 return;
1da177e4 2227 }
2ac3ac8f
MK
2228 gc_args.timeout = expires ? (int)expires :
2229 net->ipv6.sysctl.ip6_rt_gc_interval;
db916649 2230 gc_args.more = 0;
f3db4851 2231
3570df91 2232 fib6_clean_all(net, fib6_age, &gc_args);
49a18d86
MK
2233 now = jiffies;
2234 net->ipv6.ip6_rt_last_gc = now;
1da177e4
LT
2235
2236 if (gc_args.more)
c8a45222 2237 mod_timer(&net->ipv6.ip6_fib_timer,
49a18d86 2238 round_jiffies(now
c8a45222 2239 + net->ipv6.sysctl.ip6_rt_gc_interval));
417f28bb
SH
2240 else
2241 del_timer(&net->ipv6.ip6_fib_timer);
3dc94f93 2242 spin_unlock_bh(&net->ipv6.fib6_gc_lock);
1da177e4
LT
2243}
2244
86cb30ec 2245static void fib6_gc_timer_cb(struct timer_list *t)
5b7c931d 2246{
86cb30ec
KC
2247 struct net *arg = from_timer(arg, t, ipv6.ip6_fib_timer);
2248
2249 fib6_run_gc(0, arg, true);
5b7c931d
DL
2250}
2251
2c8c1e72 2252static int __net_init fib6_net_init(struct net *net)
1da177e4 2253{
10da66f7 2254 size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
16ab6d7d
IS
2255 int err;
2256
2257 err = fib6_notifier_init(net);
2258 if (err)
2259 return err;
10da66f7 2260
3dc94f93 2261 spin_lock_init(&net->ipv6.fib6_gc_lock);
9a03cd8f
MK
2262 rwlock_init(&net->ipv6.fib6_walker_lock);
2263 INIT_LIST_HEAD(&net->ipv6.fib6_walkers);
86cb30ec 2264 timer_setup(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, 0);
63152fc0 2265
c572872f
BT
2266 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
2267 if (!net->ipv6.rt6_stats)
2268 goto out_timer;
2269
10da66f7
ED
2270 /* Avoid false sharing : Use at least a full cache line */
2271 size = max_t(size_t, size, L1_CACHE_BYTES);
2272
2273 net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL);
58f09b78 2274 if (!net->ipv6.fib_table_hash)
c572872f 2275 goto out_rt6_stats;
e0b85590 2276
58f09b78
DL
2277 net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl),
2278 GFP_KERNEL);
2279 if (!net->ipv6.fib6_main_tbl)
e0b85590
DL
2280 goto out_fib_table_hash;
2281
58f09b78 2282 net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
66f5d6ce 2283 rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf,
421842ed 2284 net->ipv6.fib6_null_entry);
58f09b78
DL
2285 net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
2286 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
8e773277 2287 inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
e0b85590
DL
2288
2289#ifdef CONFIG_IPV6_MULTIPLE_TABLES
58f09b78
DL
2290 net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
2291 GFP_KERNEL);
2292 if (!net->ipv6.fib6_local_tbl)
e0b85590 2293 goto out_fib6_main_tbl;
58f09b78 2294 net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
66f5d6ce 2295 rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf,
421842ed 2296 net->ipv6.fib6_null_entry);
58f09b78
DL
2297 net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
2298 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
8e773277 2299 inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
e0b85590 2300#endif
58f09b78 2301 fib6_tables_init(net);
f845ab6b 2302
417f28bb 2303 return 0;
d63bddbe 2304
e0b85590 2305#ifdef CONFIG_IPV6_MULTIPLE_TABLES
e0b85590 2306out_fib6_main_tbl:
58f09b78 2307 kfree(net->ipv6.fib6_main_tbl);
e0b85590 2308#endif
e0b85590 2309out_fib_table_hash:
58f09b78 2310 kfree(net->ipv6.fib_table_hash);
c572872f
BT
2311out_rt6_stats:
2312 kfree(net->ipv6.rt6_stats);
63152fc0 2313out_timer:
16ab6d7d 2314 fib6_notifier_exit(net);
417f28bb 2315 return -ENOMEM;
8db46f1d 2316}
58f09b78
DL
2317
2318static void fib6_net_exit(struct net *net)
2319{
ba1cc08d
SD
2320 unsigned int i;
2321
417f28bb
SH
2322 del_timer_sync(&net->ipv6.ip6_fib_timer);
2323
32a805ba 2324 for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
ba1cc08d
SD
2325 struct hlist_head *head = &net->ipv6.fib_table_hash[i];
2326 struct hlist_node *tmp;
2327 struct fib6_table *tb;
2328
2329 hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) {
2330 hlist_del(&tb->tb6_hlist);
2331 fib6_free_table(tb);
2332 }
2333 }
2334
58f09b78 2335 kfree(net->ipv6.fib_table_hash);
c572872f 2336 kfree(net->ipv6.rt6_stats);
16ab6d7d 2337 fib6_notifier_exit(net);
58f09b78
DL
2338}
2339
2340static struct pernet_operations fib6_net_ops = {
2341 .init = fib6_net_init,
2342 .exit = fib6_net_exit,
2343};
2344
2345int __init fib6_init(void)
2346{
2347 int ret = -ENOMEM;
63152fc0 2348
58f09b78
DL
2349 fib6_node_kmem = kmem_cache_create("fib6_nodes",
2350 sizeof(struct fib6_node),
2351 0, SLAB_HWCACHE_ALIGN,
2352 NULL);
2353 if (!fib6_node_kmem)
2354 goto out;
2355
2356 ret = register_pernet_subsys(&fib6_net_ops);
2357 if (ret)
c572872f 2358 goto out_kmem_cache_create;
e8803b6c 2359
16feebcf
FW
2360 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
2361 inet6_dump_fib, 0);
e8803b6c
DM
2362 if (ret)
2363 goto out_unregister_subsys;
705f1c86
HFS
2364
2365 __fib6_flush_trees = fib6_flush_trees;
58f09b78
DL
2366out:
2367 return ret;
2368
e8803b6c
DM
2369out_unregister_subsys:
2370 unregister_pernet_subsys(&fib6_net_ops);
d63bddbe
DL
2371out_kmem_cache_create:
2372 kmem_cache_destroy(fib6_node_kmem);
2373 goto out;
1da177e4
LT
2374}
2375
2376void fib6_gc_cleanup(void)
2377{
58f09b78 2378 unregister_pernet_subsys(&fib6_net_ops);
1da177e4
LT
2379 kmem_cache_destroy(fib6_node_kmem);
2380}
8d2ca1d7
HFS
2381
2382#ifdef CONFIG_PROC_FS
8d2ca1d7
HFS
2383static int ipv6_route_seq_show(struct seq_file *seq, void *v)
2384{
8d1c802b 2385 struct fib6_info *rt = v;
8d2ca1d7 2386 struct ipv6_route_iter *iter = seq->private;
f88d8ea6 2387 struct fib6_nh *fib6_nh = rt->fib6_nh;
2b2450ca 2388 unsigned int flags = rt->fib6_flags;
5e670d84 2389 const struct net_device *dev;
8d2ca1d7 2390
f88d8ea6
DA
2391 if (rt->nh)
2392 fib6_nh = nexthop_fib6_nh(rt->nh);
2393
93c2fb25 2394 seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
8d2ca1d7
HFS
2395
2396#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 2397 seq_printf(seq, "%pi6 %02x ", &rt->fib6_src.addr, rt->fib6_src.plen);
8d2ca1d7
HFS
2398#else
2399 seq_puts(seq, "00000000000000000000000000000000 00 ");
2400#endif
f88d8ea6 2401 if (fib6_nh->fib_nh_gw_family) {
2b2450ca 2402 flags |= RTF_GATEWAY;
f88d8ea6 2403 seq_printf(seq, "%pi6", &fib6_nh->fib_nh_gw6);
2b2450ca 2404 } else {
8d2ca1d7 2405 seq_puts(seq, "00000000000000000000000000000000");
2b2450ca 2406 }
8d2ca1d7 2407
f88d8ea6 2408 dev = fib6_nh->fib_nh_dev;
8d2ca1d7 2409 seq_printf(seq, " %08x %08x %08x %08x %8s\n",
f05713e0 2410 rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
2b2450ca 2411 flags, dev ? dev->name : "");
8d2ca1d7
HFS
2412 iter->w.leaf = NULL;
2413 return 0;
2414}
2415
94b2cfe0 2416static int ipv6_route_yield(struct fib6_walker *w)
8d2ca1d7
HFS
2417{
2418 struct ipv6_route_iter *iter = w->args;
2419
2420 if (!iter->skip)
2421 return 1;
2422
2423 do {
66f5d6ce 2424 iter->w.leaf = rcu_dereference_protected(
8fb11a9a 2425 iter->w.leaf->fib6_next,
66f5d6ce 2426 lockdep_is_held(&iter->tbl->tb6_lock));
8d2ca1d7
HFS
2427 iter->skip--;
2428 if (!iter->skip && iter->w.leaf)
2429 return 1;
2430 } while (iter->w.leaf);
2431
2432 return 0;
2433}
2434
9a03cd8f
MK
2435static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
2436 struct net *net)
8d2ca1d7
HFS
2437{
2438 memset(&iter->w, 0, sizeof(iter->w));
2439 iter->w.func = ipv6_route_yield;
2440 iter->w.root = &iter->tbl->tb6_root;
2441 iter->w.state = FWS_INIT;
2442 iter->w.node = iter->w.root;
2443 iter->w.args = iter;
0a67d3ef 2444 iter->sernum = iter->w.root->fn_sernum;
8d2ca1d7 2445 INIT_LIST_HEAD(&iter->w.lh);
9a03cd8f 2446 fib6_walker_link(net, &iter->w);
8d2ca1d7
HFS
2447}
2448
2449static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
2450 struct net *net)
2451{
2452 unsigned int h;
2453 struct hlist_node *node;
2454
2455 if (tbl) {
2456 h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1;
2457 node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist));
2458 } else {
2459 h = 0;
2460 node = NULL;
2461 }
2462
2463 while (!node && h < FIB6_TABLE_HASHSZ) {
2464 node = rcu_dereference_bh(
2465 hlist_first_rcu(&net->ipv6.fib_table_hash[h++]));
2466 }
2467 return hlist_entry_safe(node, struct fib6_table, tb6_hlist);
2468}
2469
0a67d3ef
HFS
2470static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
2471{
2472 if (iter->sernum != iter->w.root->fn_sernum) {
2473 iter->sernum = iter->w.root->fn_sernum;
2474 iter->w.state = FWS_INIT;
2475 iter->w.node = iter->w.root;
2476 WARN_ON(iter->w.skip);
2477 iter->w.skip = iter->w.count;
2478 }
2479}
2480
8d2ca1d7
HFS
2481static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2482{
2483 int r;
8d1c802b 2484 struct fib6_info *n;
8d2ca1d7
HFS
2485 struct net *net = seq_file_net(seq);
2486 struct ipv6_route_iter *iter = seq->private;
2487
2488 if (!v)
2489 goto iter_table;
2490
8fb11a9a 2491 n = rcu_dereference_bh(((struct fib6_info *)v)->fib6_next);
8d2ca1d7
HFS
2492 if (n) {
2493 ++*pos;
2494 return n;
2495 }
2496
2497iter_table:
0a67d3ef 2498 ipv6_route_check_sernum(iter);
66f5d6ce 2499 spin_lock_bh(&iter->tbl->tb6_lock);
8d2ca1d7 2500 r = fib6_walk_continue(&iter->w);
66f5d6ce 2501 spin_unlock_bh(&iter->tbl->tb6_lock);
8d2ca1d7
HFS
2502 if (r > 0) {
2503 if (v)
2504 ++*pos;
2505 return iter->w.leaf;
2506 } else if (r < 0) {
9a03cd8f 2507 fib6_walker_unlink(net, &iter->w);
8d2ca1d7
HFS
2508 return NULL;
2509 }
9a03cd8f 2510 fib6_walker_unlink(net, &iter->w);
8d2ca1d7
HFS
2511
2512 iter->tbl = ipv6_route_seq_next_table(iter->tbl, net);
2513 if (!iter->tbl)
2514 return NULL;
2515
9a03cd8f 2516 ipv6_route_seq_setup_walk(iter, net);
8d2ca1d7
HFS
2517 goto iter_table;
2518}
2519
2520static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
2521 __acquires(RCU_BH)
2522{
2523 struct net *net = seq_file_net(seq);
2524 struct ipv6_route_iter *iter = seq->private;
2525
2526 rcu_read_lock_bh();
2527 iter->tbl = ipv6_route_seq_next_table(NULL, net);
2528 iter->skip = *pos;
2529
2530 if (iter->tbl) {
9a03cd8f 2531 ipv6_route_seq_setup_walk(iter, net);
8d2ca1d7
HFS
2532 return ipv6_route_seq_next(seq, NULL, pos);
2533 } else {
2534 return NULL;
2535 }
2536}
2537
2538static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
2539{
94b2cfe0 2540 struct fib6_walker *w = &iter->w;
8d2ca1d7
HFS
2541 return w->node && !(w->state == FWS_U && w->node == w->root);
2542}
2543
2544static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
2545 __releases(RCU_BH)
2546{
9a03cd8f 2547 struct net *net = seq_file_net(seq);
8d2ca1d7
HFS
2548 struct ipv6_route_iter *iter = seq->private;
2549
2550 if (ipv6_route_iter_active(iter))
9a03cd8f 2551 fib6_walker_unlink(net, &iter->w);
8d2ca1d7
HFS
2552
2553 rcu_read_unlock_bh();
2554}
2555
c3506372 2556const struct seq_operations ipv6_route_seq_ops = {
8d2ca1d7
HFS
2557 .start = ipv6_route_seq_start,
2558 .next = ipv6_route_seq_next,
2559 .stop = ipv6_route_seq_stop,
2560 .show = ipv6_route_seq_show
2561};
8d2ca1d7 2562#endif /* CONFIG_PROC_FS */