ipv6: check fn->leaf before it is used
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
83a09abd 81static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 82static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 83static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 84static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
85static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
569d3645 89static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
90
91static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 92static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 93static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 94static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 95static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
96static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
97 struct sk_buff *skb, u32 mtu);
98static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 struct sk_buff *skb);
4b32b5ad 100static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 101static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3
DA
102static size_t rt6_nlmsg_size(struct rt6_info *rt);
103static int rt6_fill_node(struct net *net,
104 struct sk_buff *skb, struct rt6_info *rt,
105 struct in6_addr *dst, struct in6_addr *src,
106 int iif, int type, u32 portid, u32 seq,
107 unsigned int flags);
35732d01
WW
108static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
109 struct in6_addr *daddr,
110 struct in6_addr *saddr);
1da177e4 111
70ceb4f5 112#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 113static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 114 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
115 const struct in6_addr *gwaddr,
116 struct net_device *dev,
95c96174 117 unsigned int pref);
efa2cea0 118static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 119 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
120 const struct in6_addr *gwaddr,
121 struct net_device *dev);
70ceb4f5
YH
122#endif
123
8d0b94af
MKL
124struct uncached_list {
125 spinlock_t lock;
126 struct list_head head;
127};
128
129static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
130
131static void rt6_uncached_list_add(struct rt6_info *rt)
132{
133 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
134
8d0b94af
MKL
135 rt->rt6i_uncached_list = ul;
136
137 spin_lock_bh(&ul->lock);
138 list_add_tail(&rt->rt6i_uncached, &ul->head);
139 spin_unlock_bh(&ul->lock);
140}
141
142static void rt6_uncached_list_del(struct rt6_info *rt)
143{
144 if (!list_empty(&rt->rt6i_uncached)) {
145 struct uncached_list *ul = rt->rt6i_uncached_list;
146
147 spin_lock_bh(&ul->lock);
148 list_del(&rt->rt6i_uncached);
149 spin_unlock_bh(&ul->lock);
150 }
151}
152
153static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154{
155 struct net_device *loopback_dev = net->loopback_dev;
156 int cpu;
157
e332bc67
EB
158 if (dev == loopback_dev)
159 return;
160
8d0b94af
MKL
161 for_each_possible_cpu(cpu) {
162 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 struct rt6_info *rt;
164
165 spin_lock_bh(&ul->lock);
166 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 struct inet6_dev *rt_idev = rt->rt6i_idev;
168 struct net_device *rt_dev = rt->dst.dev;
169
e332bc67 170 if (rt_idev->dev == dev) {
8d0b94af
MKL
171 rt->rt6i_idev = in6_dev_get(loopback_dev);
172 in6_dev_put(rt_idev);
173 }
174
e332bc67 175 if (rt_dev == dev) {
8d0b94af
MKL
176 rt->dst.dev = loopback_dev;
177 dev_hold(rt->dst.dev);
178 dev_put(rt_dev);
179 }
180 }
181 spin_unlock_bh(&ul->lock);
182 }
183}
184
d52d3997
MKL
185static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
186{
187 return dst_metrics_write_ptr(rt->dst.from);
188}
189
06582540
DM
190static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
191{
4b32b5ad 192 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 193
d52d3997
MKL
194 if (rt->rt6i_flags & RTF_PCPU)
195 return rt6_pcpu_cow_metrics(rt);
196 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
197 return NULL;
198 else
3b471175 199 return dst_cow_metrics_generic(dst, old);
06582540
DM
200}
201
f894cbf8
DM
202static inline const void *choose_neigh_daddr(struct rt6_info *rt,
203 struct sk_buff *skb,
204 const void *daddr)
39232973
DM
205{
206 struct in6_addr *p = &rt->rt6i_gateway;
207
a7563f34 208 if (!ipv6_addr_any(p))
39232973 209 return (const void *) p;
f894cbf8
DM
210 else if (skb)
211 return &ipv6_hdr(skb)->daddr;
39232973
DM
212 return daddr;
213}
214
f894cbf8
DM
215static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
216 struct sk_buff *skb,
217 const void *daddr)
d3aaeb38 218{
39232973
DM
219 struct rt6_info *rt = (struct rt6_info *) dst;
220 struct neighbour *n;
221
f894cbf8 222 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 223 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
224 if (n)
225 return n;
226 return neigh_create(&nd_tbl, daddr, dst->dev);
227}
228
63fca65d
JA
229static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
230{
231 struct net_device *dev = dst->dev;
232 struct rt6_info *rt = (struct rt6_info *)dst;
233
234 daddr = choose_neigh_daddr(rt, NULL, daddr);
235 if (!daddr)
236 return;
237 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
238 return;
239 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
240 return;
241 __ipv6_confirm_neigh(dev, daddr);
242}
243
9a7ec3a9 244static struct dst_ops ip6_dst_ops_template = {
1da177e4 245 .family = AF_INET6,
1da177e4
LT
246 .gc = ip6_dst_gc,
247 .gc_thresh = 1024,
248 .check = ip6_dst_check,
0dbaee3b 249 .default_advmss = ip6_default_advmss,
ebb762f2 250 .mtu = ip6_mtu,
06582540 251 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
252 .destroy = ip6_dst_destroy,
253 .ifdown = ip6_dst_ifdown,
254 .negative_advice = ip6_negative_advice,
255 .link_failure = ip6_link_failure,
256 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 257 .redirect = rt6_do_redirect,
9f8955cc 258 .local_out = __ip6_local_out,
d3aaeb38 259 .neigh_lookup = ip6_neigh_lookup,
63fca65d 260 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
261};
262
ebb762f2 263static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 264{
618f9bc7
SK
265 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
266
267 return mtu ? : dst->dev->mtu;
ec831ea7
RD
268}
269
6700c270
DM
270static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
271 struct sk_buff *skb, u32 mtu)
14e50e57
DM
272{
273}
274
6700c270
DM
275static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
276 struct sk_buff *skb)
b587ee3b
DM
277{
278}
279
14e50e57
DM
280static struct dst_ops ip6_dst_blackhole_ops = {
281 .family = AF_INET6,
14e50e57
DM
282 .destroy = ip6_dst_destroy,
283 .check = ip6_dst_check,
ebb762f2 284 .mtu = ip6_blackhole_mtu,
214f45c9 285 .default_advmss = ip6_default_advmss,
14e50e57 286 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 287 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 288 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 289 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
290};
291
62fa8a84 292static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 293 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
294};
295
fb0af4c7 296static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
297 .dst = {
298 .__refcnt = ATOMIC_INIT(1),
299 .__use = 1,
2c20cbd7 300 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 301 .error = -ENETUNREACH,
d8d1f30b
CG
302 .input = ip6_pkt_discard,
303 .output = ip6_pkt_discard_out,
1da177e4
LT
304 },
305 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 306 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
307 .rt6i_metric = ~(u32) 0,
308 .rt6i_ref = ATOMIC_INIT(1),
309};
310
101367c2
TG
311#ifdef CONFIG_IPV6_MULTIPLE_TABLES
312
fb0af4c7 313static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
314 .dst = {
315 .__refcnt = ATOMIC_INIT(1),
316 .__use = 1,
2c20cbd7 317 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 318 .error = -EACCES,
d8d1f30b
CG
319 .input = ip6_pkt_prohibit,
320 .output = ip6_pkt_prohibit_out,
101367c2
TG
321 },
322 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 323 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
324 .rt6i_metric = ~(u32) 0,
325 .rt6i_ref = ATOMIC_INIT(1),
326};
327
fb0af4c7 328static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
329 .dst = {
330 .__refcnt = ATOMIC_INIT(1),
331 .__use = 1,
2c20cbd7 332 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 333 .error = -EINVAL,
d8d1f30b 334 .input = dst_discard,
ede2059d 335 .output = dst_discard_out,
101367c2
TG
336 },
337 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 338 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
339 .rt6i_metric = ~(u32) 0,
340 .rt6i_ref = ATOMIC_INIT(1),
341};
342
343#endif
344
ebfa45f0
MKL
345static void rt6_info_init(struct rt6_info *rt)
346{
347 struct dst_entry *dst = &rt->dst;
348
349 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
350 INIT_LIST_HEAD(&rt->rt6i_siblings);
351 INIT_LIST_HEAD(&rt->rt6i_uncached);
352}
353
1da177e4 354/* allocate dst with ip6_dst_ops */
d52d3997
MKL
355static struct rt6_info *__ip6_dst_alloc(struct net *net,
356 struct net_device *dev,
ad706862 357 int flags)
1da177e4 358{
97bab73f 359 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 360 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 361
ebfa45f0
MKL
362 if (rt)
363 rt6_info_init(rt);
8104891b 364
cf911662 365 return rt;
1da177e4
LT
366}
367
9ab179d8
DA
368struct rt6_info *ip6_dst_alloc(struct net *net,
369 struct net_device *dev,
370 int flags)
d52d3997 371{
ad706862 372 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
373
374 if (rt) {
375 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
376 if (rt->rt6i_pcpu) {
377 int cpu;
378
379 for_each_possible_cpu(cpu) {
380 struct rt6_info **p;
381
382 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
383 /* no one shares rt */
384 *p = NULL;
385 }
386 } else {
587fea74 387 dst_release_immediate(&rt->dst);
d52d3997
MKL
388 return NULL;
389 }
390 }
391
392 return rt;
393}
9ab179d8 394EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 395
1da177e4
LT
396static void ip6_dst_destroy(struct dst_entry *dst)
397{
398 struct rt6_info *rt = (struct rt6_info *)dst;
35732d01 399 struct rt6_exception_bucket *bucket;
ecd98837 400 struct dst_entry *from = dst->from;
8d0b94af 401 struct inet6_dev *idev;
1da177e4 402
4b32b5ad 403 dst_destroy_metrics_generic(dst);
87775312 404 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
405 rt6_uncached_list_del(rt);
406
407 idev = rt->rt6i_idev;
38308473 408 if (idev) {
1da177e4
LT
409 rt->rt6i_idev = NULL;
410 in6_dev_put(idev);
1ab1457c 411 }
35732d01
WW
412 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
413 if (bucket) {
414 rt->rt6i_exception_bucket = NULL;
415 kfree(bucket);
416 }
1716a961 417
ecd98837
YH
418 dst->from = NULL;
419 dst_release(from);
b3419363
DM
420}
421
1da177e4
LT
422static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
423 int how)
424{
425 struct rt6_info *rt = (struct rt6_info *)dst;
426 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 427 struct net_device *loopback_dev =
c346dca1 428 dev_net(dev)->loopback_dev;
1da177e4 429
e5645f51
WW
430 if (idev && idev->dev != loopback_dev) {
431 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
432 if (loopback_idev) {
433 rt->rt6i_idev = loopback_idev;
434 in6_dev_put(idev);
97cac082 435 }
1da177e4
LT
436 }
437}
438
5973fb1e
MKL
439static bool __rt6_check_expired(const struct rt6_info *rt)
440{
441 if (rt->rt6i_flags & RTF_EXPIRES)
442 return time_after(jiffies, rt->dst.expires);
443 else
444 return false;
445}
446
a50feda5 447static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 448{
1716a961
G
449 if (rt->rt6i_flags & RTF_EXPIRES) {
450 if (time_after(jiffies, rt->dst.expires))
a50feda5 451 return true;
1716a961 452 } else if (rt->dst.from) {
1e2ea8ad
XL
453 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
454 rt6_check_expired((struct rt6_info *)rt->dst.from);
1716a961 455 }
a50feda5 456 return false;
1da177e4
LT
457}
458
51ebd318 459static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
460 struct flowi6 *fl6, int oif,
461 int strict)
51ebd318
ND
462{
463 struct rt6_info *sibling, *next_sibling;
464 int route_choosen;
465
b673d6cc
JS
466 /* We might have already computed the hash for ICMPv6 errors. In such
467 * case it will always be non-zero. Otherwise now is the time to do it.
468 */
469 if (!fl6->mp_hash)
470 fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
471
472 route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
51ebd318
ND
473 /* Don't change the route, if route_choosen == 0
474 * (siblings does not include ourself)
475 */
476 if (route_choosen)
477 list_for_each_entry_safe(sibling, next_sibling,
478 &match->rt6i_siblings, rt6i_siblings) {
479 route_choosen--;
480 if (route_choosen == 0) {
52bd4c0c
ND
481 if (rt6_score_route(sibling, oif, strict) < 0)
482 break;
51ebd318
ND
483 match = sibling;
484 break;
485 }
486 }
487 return match;
488}
489
1da177e4 490/*
c71099ac 491 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
492 */
493
8ed67789
DL
494static inline struct rt6_info *rt6_device_match(struct net *net,
495 struct rt6_info *rt,
b71d1d42 496 const struct in6_addr *saddr,
1da177e4 497 int oif,
d420895e 498 int flags)
1da177e4
LT
499{
500 struct rt6_info *local = NULL;
501 struct rt6_info *sprt;
502
dd3abc4e
YH
503 if (!oif && ipv6_addr_any(saddr))
504 goto out;
505
d8d1f30b 506 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 507 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
508
509 if (oif) {
1da177e4
LT
510 if (dev->ifindex == oif)
511 return sprt;
512 if (dev->flags & IFF_LOOPBACK) {
38308473 513 if (!sprt->rt6i_idev ||
1da177e4 514 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 515 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 516 continue;
17fb0b2b
DA
517 if (local &&
518 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
519 continue;
520 }
521 local = sprt;
522 }
dd3abc4e
YH
523 } else {
524 if (ipv6_chk_addr(net, saddr, dev,
525 flags & RT6_LOOKUP_F_IFACE))
526 return sprt;
1da177e4 527 }
dd3abc4e 528 }
1da177e4 529
dd3abc4e 530 if (oif) {
1da177e4
LT
531 if (local)
532 return local;
533
d420895e 534 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 535 return net->ipv6.ip6_null_entry;
1da177e4 536 }
dd3abc4e 537out:
1da177e4
LT
538 return rt;
539}
540
27097255 541#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
542struct __rt6_probe_work {
543 struct work_struct work;
544 struct in6_addr target;
545 struct net_device *dev;
546};
547
548static void rt6_probe_deferred(struct work_struct *w)
549{
550 struct in6_addr mcaddr;
551 struct __rt6_probe_work *work =
552 container_of(w, struct __rt6_probe_work, work);
553
554 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 555 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 556 dev_put(work->dev);
662f5533 557 kfree(work);
c2f17e82
HFS
558}
559
27097255
YH
560static void rt6_probe(struct rt6_info *rt)
561{
990edb42 562 struct __rt6_probe_work *work;
f2c31e32 563 struct neighbour *neigh;
27097255
YH
564 /*
565 * Okay, this does not seem to be appropriate
566 * for now, however, we need to check if it
567 * is really so; aka Router Reachability Probing.
568 *
569 * Router Reachability Probe MUST be rate-limited
570 * to no more than one per minute.
571 */
2152caea 572 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 573 return;
2152caea
YH
574 rcu_read_lock_bh();
575 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
576 if (neigh) {
8d6c31bf
MKL
577 if (neigh->nud_state & NUD_VALID)
578 goto out;
579
990edb42 580 work = NULL;
2152caea 581 write_lock(&neigh->lock);
990edb42
MKL
582 if (!(neigh->nud_state & NUD_VALID) &&
583 time_after(jiffies,
584 neigh->updated +
585 rt->rt6i_idev->cnf.rtr_probe_interval)) {
586 work = kmalloc(sizeof(*work), GFP_ATOMIC);
587 if (work)
588 __neigh_set_probe_once(neigh);
c2f17e82 589 }
2152caea 590 write_unlock(&neigh->lock);
990edb42
MKL
591 } else {
592 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 593 }
990edb42
MKL
594
595 if (work) {
596 INIT_WORK(&work->work, rt6_probe_deferred);
597 work->target = rt->rt6i_gateway;
598 dev_hold(rt->dst.dev);
599 work->dev = rt->dst.dev;
600 schedule_work(&work->work);
601 }
602
8d6c31bf 603out:
2152caea 604 rcu_read_unlock_bh();
27097255
YH
605}
606#else
607static inline void rt6_probe(struct rt6_info *rt)
608{
27097255
YH
609}
610#endif
611
1da177e4 612/*
554cfb7e 613 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 614 */
b6f99a21 615static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 616{
d1918542 617 struct net_device *dev = rt->dst.dev;
161980f4 618 if (!oif || dev->ifindex == oif)
554cfb7e 619 return 2;
161980f4
DM
620 if ((dev->flags & IFF_LOOPBACK) &&
621 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
622 return 1;
623 return 0;
554cfb7e 624}
1da177e4 625
afc154e9 626static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 627{
f2c31e32 628 struct neighbour *neigh;
afc154e9 629 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 630
4d0c5911
YH
631 if (rt->rt6i_flags & RTF_NONEXTHOP ||
632 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 633 return RT6_NUD_SUCCEED;
145a3621
YH
634
635 rcu_read_lock_bh();
636 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
637 if (neigh) {
638 read_lock(&neigh->lock);
554cfb7e 639 if (neigh->nud_state & NUD_VALID)
afc154e9 640 ret = RT6_NUD_SUCCEED;
398bcbeb 641#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 642 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 643 ret = RT6_NUD_SUCCEED;
7e980569
JB
644 else
645 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 646#endif
145a3621 647 read_unlock(&neigh->lock);
afc154e9
HFS
648 } else {
649 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 650 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 651 }
145a3621
YH
652 rcu_read_unlock_bh();
653
a5a81f0b 654 return ret;
1da177e4
LT
655}
656
554cfb7e
YH
657static int rt6_score_route(struct rt6_info *rt, int oif,
658 int strict)
1da177e4 659{
a5a81f0b 660 int m;
1ab1457c 661
4d0c5911 662 m = rt6_check_dev(rt, oif);
77d16f45 663 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 664 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
665#ifdef CONFIG_IPV6_ROUTER_PREF
666 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
667#endif
afc154e9
HFS
668 if (strict & RT6_LOOKUP_F_REACHABLE) {
669 int n = rt6_check_neigh(rt);
670 if (n < 0)
671 return n;
672 }
554cfb7e
YH
673 return m;
674}
675
f11e6659 676static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
677 int *mpri, struct rt6_info *match,
678 bool *do_rr)
554cfb7e 679{
f11e6659 680 int m;
afc154e9 681 bool match_do_rr = false;
35103d11
AG
682 struct inet6_dev *idev = rt->rt6i_idev;
683 struct net_device *dev = rt->dst.dev;
684
685 if (dev && !netif_carrier_ok(dev) &&
d5d32e4b
DA
686 idev->cnf.ignore_routes_with_linkdown &&
687 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 688 goto out;
f11e6659
DM
689
690 if (rt6_check_expired(rt))
691 goto out;
692
693 m = rt6_score_route(rt, oif, strict);
7e980569 694 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
695 match_do_rr = true;
696 m = 0; /* lowest valid score */
7e980569 697 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 698 goto out;
afc154e9
HFS
699 }
700
701 if (strict & RT6_LOOKUP_F_REACHABLE)
702 rt6_probe(rt);
f11e6659 703
7e980569 704 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 705 if (m > *mpri) {
afc154e9 706 *do_rr = match_do_rr;
f11e6659
DM
707 *mpri = m;
708 match = rt;
f11e6659 709 }
f11e6659
DM
710out:
711 return match;
712}
713
714static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
8d1040e8 715 struct rt6_info *leaf,
f11e6659 716 struct rt6_info *rr_head,
afc154e9
HFS
717 u32 metric, int oif, int strict,
718 bool *do_rr)
f11e6659 719{
9fbdcfaf 720 struct rt6_info *rt, *match, *cont;
554cfb7e 721 int mpri = -1;
1da177e4 722
f11e6659 723 match = NULL;
9fbdcfaf
SK
724 cont = NULL;
725 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
726 if (rt->rt6i_metric != metric) {
727 cont = rt;
728 break;
729 }
730
731 match = find_match(rt, oif, strict, &mpri, match, do_rr);
732 }
733
8d1040e8 734 for (rt = leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
9fbdcfaf
SK
735 if (rt->rt6i_metric != metric) {
736 cont = rt;
737 break;
738 }
739
afc154e9 740 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
741 }
742
743 if (match || !cont)
744 return match;
745
746 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 747 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 748
f11e6659
DM
749 return match;
750}
1da177e4 751
8d1040e8
WW
752static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
753 int oif, int strict)
f11e6659 754{
8d1040e8 755 struct rt6_info *leaf = fn->leaf;
f11e6659 756 struct rt6_info *match, *rt0;
afc154e9 757 bool do_rr = false;
1da177e4 758
8d1040e8
WW
759 if (!leaf)
760 return net->ipv6.ip6_null_entry;
761
f11e6659
DM
762 rt0 = fn->rr_ptr;
763 if (!rt0)
8d1040e8 764 fn->rr_ptr = rt0 = leaf;
1da177e4 765
8d1040e8 766 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
afc154e9 767 &do_rr);
1da177e4 768
afc154e9 769 if (do_rr) {
d8d1f30b 770 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 771
554cfb7e 772 /* no entries matched; do round-robin */
f11e6659 773 if (!next || next->rt6i_metric != rt0->rt6i_metric)
8d1040e8 774 next = leaf;
f11e6659
DM
775
776 if (next != rt0)
777 fn->rr_ptr = next;
1da177e4 778 }
1da177e4 779
a02cec21 780 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
781}
782
8b9df265
MKL
783static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
784{
785 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
786}
787
70ceb4f5
YH
788#ifdef CONFIG_IPV6_ROUTE_INFO
789int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 790 const struct in6_addr *gwaddr)
70ceb4f5 791{
c346dca1 792 struct net *net = dev_net(dev);
70ceb4f5
YH
793 struct route_info *rinfo = (struct route_info *) opt;
794 struct in6_addr prefix_buf, *prefix;
795 unsigned int pref;
4bed72e4 796 unsigned long lifetime;
70ceb4f5
YH
797 struct rt6_info *rt;
798
799 if (len < sizeof(struct route_info)) {
800 return -EINVAL;
801 }
802
803 /* Sanity check for prefix_len and length */
804 if (rinfo->length > 3) {
805 return -EINVAL;
806 } else if (rinfo->prefix_len > 128) {
807 return -EINVAL;
808 } else if (rinfo->prefix_len > 64) {
809 if (rinfo->length < 2) {
810 return -EINVAL;
811 }
812 } else if (rinfo->prefix_len > 0) {
813 if (rinfo->length < 1) {
814 return -EINVAL;
815 }
816 }
817
818 pref = rinfo->route_pref;
819 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 820 return -EINVAL;
70ceb4f5 821
4bed72e4 822 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
823
824 if (rinfo->length == 3)
825 prefix = (struct in6_addr *)rinfo->prefix;
826 else {
827 /* this function is safe */
828 ipv6_addr_prefix(&prefix_buf,
829 (struct in6_addr *)rinfo->prefix,
830 rinfo->prefix_len);
831 prefix = &prefix_buf;
832 }
833
f104a567
DJ
834 if (rinfo->prefix_len == 0)
835 rt = rt6_get_dflt_router(gwaddr, dev);
836 else
837 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 838 gwaddr, dev);
70ceb4f5
YH
839
840 if (rt && !lifetime) {
e0a1ad73 841 ip6_del_rt(rt);
70ceb4f5
YH
842 rt = NULL;
843 }
844
845 if (!rt && lifetime)
830218c1
DA
846 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
847 dev, pref);
70ceb4f5
YH
848 else if (rt)
849 rt->rt6i_flags = RTF_ROUTEINFO |
850 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
851
852 if (rt) {
1716a961
G
853 if (!addrconf_finite_timeout(lifetime))
854 rt6_clean_expires(rt);
855 else
856 rt6_set_expires(rt, jiffies + HZ * lifetime);
857
94e187c0 858 ip6_rt_put(rt);
70ceb4f5
YH
859 }
860 return 0;
861}
862#endif
863
a3c00e46
MKL
864static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
865 struct in6_addr *saddr)
866{
867 struct fib6_node *pn;
868 while (1) {
869 if (fn->fn_flags & RTN_TL_ROOT)
870 return NULL;
871 pn = fn->parent;
872 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
873 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
874 else
875 fn = pn;
876 if (fn->fn_flags & RTN_RTINFO)
877 return fn;
878 }
879}
c71099ac 880
d3843fe5
WW
881static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
882 bool null_fallback)
883{
884 struct rt6_info *rt = *prt;
885
886 if (dst_hold_safe(&rt->dst))
887 return true;
888 if (null_fallback) {
889 rt = net->ipv6.ip6_null_entry;
890 dst_hold(&rt->dst);
891 } else {
892 rt = NULL;
893 }
894 *prt = rt;
895 return false;
896}
897
8ed67789
DL
898static struct rt6_info *ip6_pol_route_lookup(struct net *net,
899 struct fib6_table *table,
4c9483b2 900 struct flowi6 *fl6, int flags)
1da177e4 901{
2b760fcf 902 struct rt6_info *rt, *rt_cache;
1da177e4 903 struct fib6_node *fn;
1da177e4 904
c71099ac 905 read_lock_bh(&table->tb6_lock);
4c9483b2 906 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
907restart:
908 rt = fn->leaf;
4c9483b2 909 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 910 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 911 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
912 if (rt == net->ipv6.ip6_null_entry) {
913 fn = fib6_backtrack(fn, &fl6->saddr);
914 if (fn)
915 goto restart;
916 }
2b760fcf
WW
917 /* Search through exception table */
918 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
919 if (rt_cache)
920 rt = rt_cache;
921
d3843fe5
WW
922 if (ip6_hold_safe(net, &rt, true))
923 dst_use_noref(&rt->dst, jiffies);
924
c71099ac 925 read_unlock_bh(&table->tb6_lock);
b811580d
DA
926
927 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
928
c71099ac
TG
929 return rt;
930
931}
932
67ba4152 933struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
934 int flags)
935{
936 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
937}
938EXPORT_SYMBOL_GPL(ip6_route_lookup);
939
9acd9f3a
YH
940struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
941 const struct in6_addr *saddr, int oif, int strict)
c71099ac 942{
4c9483b2
DM
943 struct flowi6 fl6 = {
944 .flowi6_oif = oif,
945 .daddr = *daddr,
c71099ac
TG
946 };
947 struct dst_entry *dst;
77d16f45 948 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 949
adaa70bb 950 if (saddr) {
4c9483b2 951 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
952 flags |= RT6_LOOKUP_F_HAS_SADDR;
953 }
954
4c9483b2 955 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
956 if (dst->error == 0)
957 return (struct rt6_info *) dst;
958
959 dst_release(dst);
960
1da177e4
LT
961 return NULL;
962}
7159039a
YH
963EXPORT_SYMBOL(rt6_lookup);
964
c71099ac 965/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
966 * It takes new route entry, the addition fails by any reason the
967 * route is released.
968 * Caller must hold dst before calling it.
1da177e4
LT
969 */
970
e5fd387a 971static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301
DA
972 struct mx6_config *mxc,
973 struct netlink_ext_ack *extack)
1da177e4
LT
974{
975 int err;
c71099ac 976 struct fib6_table *table;
1da177e4 977
c71099ac
TG
978 table = rt->rt6i_table;
979 write_lock_bh(&table->tb6_lock);
333c4301 980 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
c71099ac 981 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
982
983 return err;
984}
985
40e22e8f
TG
986int ip6_ins_rt(struct rt6_info *rt)
987{
e715b6d3
FW
988 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
989 struct mx6_config mxc = { .mx = NULL, };
990
1cfb71ee
WW
991 /* Hold dst to account for the reference from the fib6 tree */
992 dst_hold(&rt->dst);
333c4301 993 return __ip6_ins_rt(rt, &info, &mxc, NULL);
40e22e8f
TG
994}
995
4832c30d
DA
996/* called with rcu_lock held */
997static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
998{
999 struct net_device *dev = rt->dst.dev;
1000
1001 if (rt->rt6i_flags & RTF_LOCAL) {
1002 /* for copies of local routes, dst->dev needs to be the
1003 * device if it is a master device, the master device if
1004 * device is enslaved, and the loopback as the default
1005 */
1006 if (netif_is_l3_slave(dev) &&
1007 !rt6_need_strict(&rt->rt6i_dst.addr))
1008 dev = l3mdev_master_dev_rcu(dev);
1009 else if (!netif_is_l3_master(dev))
1010 dev = dev_net(dev)->loopback_dev;
1011 /* last case is netif_is_l3_master(dev) is true in which
1012 * case we want dev returned to be dev
1013 */
1014 }
1015
1016 return dev;
1017}
1018
8b9df265
MKL
1019static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1020 const struct in6_addr *daddr,
1021 const struct in6_addr *saddr)
1da177e4 1022{
4832c30d 1023 struct net_device *dev;
1da177e4
LT
1024 struct rt6_info *rt;
1025
1026 /*
1027 * Clone the route.
1028 */
1029
d52d3997 1030 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 1031 ort = (struct rt6_info *)ort->dst.from;
1da177e4 1032
4832c30d
DA
1033 rcu_read_lock();
1034 dev = ip6_rt_get_dev_rcu(ort);
1035 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1036 rcu_read_unlock();
83a09abd
MKL
1037 if (!rt)
1038 return NULL;
1039
1040 ip6_rt_copy_init(rt, ort);
1041 rt->rt6i_flags |= RTF_CACHE;
1042 rt->rt6i_metric = 0;
1043 rt->dst.flags |= DST_HOST;
1044 rt->rt6i_dst.addr = *daddr;
1045 rt->rt6i_dst.plen = 128;
1da177e4 1046
83a09abd
MKL
1047 if (!rt6_is_gw_or_nonexthop(ort)) {
1048 if (ort->rt6i_dst.plen != 128 &&
1049 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1050 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1051#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1052 if (rt->rt6i_src.plen && saddr) {
1053 rt->rt6i_src.addr = *saddr;
1054 rt->rt6i_src.plen = 128;
8b9df265 1055 }
83a09abd 1056#endif
95a9a5ba 1057 }
1da177e4 1058
95a9a5ba
YH
1059 return rt;
1060}
1da177e4 1061
d52d3997
MKL
1062static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1063{
4832c30d 1064 struct net_device *dev;
d52d3997
MKL
1065 struct rt6_info *pcpu_rt;
1066
4832c30d
DA
1067 rcu_read_lock();
1068 dev = ip6_rt_get_dev_rcu(rt);
1069 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1070 rcu_read_unlock();
d52d3997
MKL
1071 if (!pcpu_rt)
1072 return NULL;
1073 ip6_rt_copy_init(pcpu_rt, rt);
1074 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1075 pcpu_rt->rt6i_flags |= RTF_PCPU;
1076 return pcpu_rt;
1077}
1078
1079/* It should be called with read_lock_bh(&tb6_lock) acquired */
1080static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1081{
a73e4195 1082 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1083
1084 p = this_cpu_ptr(rt->rt6i_pcpu);
1085 pcpu_rt = *p;
1086
d3843fe5 1087 if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
a73e4195 1088 rt6_dst_from_metrics_check(pcpu_rt);
d3843fe5 1089
a73e4195
MKL
1090 return pcpu_rt;
1091}
1092
1093static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1094{
1095 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1096
1097 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1098 if (!pcpu_rt) {
1099 struct net *net = dev_net(rt->dst.dev);
1100
9c7370a1
MKL
1101 dst_hold(&net->ipv6.ip6_null_entry->dst);
1102 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1103 }
1104
a94b9367
WW
1105 dst_hold(&pcpu_rt->dst);
1106 p = this_cpu_ptr(rt->rt6i_pcpu);
1107 prev = cmpxchg(p, NULL, pcpu_rt);
1108 if (prev) {
1109 /* If someone did it before us, return prev instead */
1110 /* release refcnt taken by ip6_rt_pcpu_alloc() */
587fea74 1111 dst_release_immediate(&pcpu_rt->dst);
a94b9367
WW
1112 /* release refcnt taken by above dst_hold() */
1113 dst_release_immediate(&pcpu_rt->dst);
1114 dst_hold(&prev->dst);
1115 pcpu_rt = prev;
d52d3997 1116 }
a94b9367 1117
d52d3997
MKL
1118 rt6_dst_from_metrics_check(pcpu_rt);
1119 return pcpu_rt;
1120}
1121
35732d01
WW
1122/* exception hash table implementation
1123 */
1124static DEFINE_SPINLOCK(rt6_exception_lock);
1125
1126/* Remove rt6_ex from hash table and free the memory
1127 * Caller must hold rt6_exception_lock
1128 */
1129static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1130 struct rt6_exception *rt6_ex)
1131{
1132 if (!bucket || !rt6_ex)
1133 return;
1134 rt6_ex->rt6i->rt6i_node = NULL;
1135 hlist_del_rcu(&rt6_ex->hlist);
1136 rt6_release(rt6_ex->rt6i);
1137 kfree_rcu(rt6_ex, rcu);
1138 WARN_ON_ONCE(!bucket->depth);
1139 bucket->depth--;
1140}
1141
1142/* Remove oldest rt6_ex in bucket and free the memory
1143 * Caller must hold rt6_exception_lock
1144 */
1145static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1146{
1147 struct rt6_exception *rt6_ex, *oldest = NULL;
1148
1149 if (!bucket)
1150 return;
1151
1152 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1153 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1154 oldest = rt6_ex;
1155 }
1156 rt6_remove_exception(bucket, oldest);
1157}
1158
1159static u32 rt6_exception_hash(const struct in6_addr *dst,
1160 const struct in6_addr *src)
1161{
1162 static u32 seed __read_mostly;
1163 u32 val;
1164
1165 net_get_random_once(&seed, sizeof(seed));
1166 val = jhash(dst, sizeof(*dst), seed);
1167
1168#ifdef CONFIG_IPV6_SUBTREES
1169 if (src)
1170 val = jhash(src, sizeof(*src), val);
1171#endif
1172 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1173}
1174
1175/* Helper function to find the cached rt in the hash table
1176 * and update bucket pointer to point to the bucket for this
1177 * (daddr, saddr) pair
1178 * Caller must hold rt6_exception_lock
1179 */
1180static struct rt6_exception *
1181__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1182 const struct in6_addr *daddr,
1183 const struct in6_addr *saddr)
1184{
1185 struct rt6_exception *rt6_ex;
1186 u32 hval;
1187
1188 if (!(*bucket) || !daddr)
1189 return NULL;
1190
1191 hval = rt6_exception_hash(daddr, saddr);
1192 *bucket += hval;
1193
1194 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1195 struct rt6_info *rt6 = rt6_ex->rt6i;
1196 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1197
1198#ifdef CONFIG_IPV6_SUBTREES
1199 if (matched && saddr)
1200 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1201#endif
1202 if (matched)
1203 return rt6_ex;
1204 }
1205 return NULL;
1206}
1207
1208/* Helper function to find the cached rt in the hash table
1209 * and update bucket pointer to point to the bucket for this
1210 * (daddr, saddr) pair
1211 * Caller must hold rcu_read_lock()
1212 */
1213static struct rt6_exception *
1214__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1215 const struct in6_addr *daddr,
1216 const struct in6_addr *saddr)
1217{
1218 struct rt6_exception *rt6_ex;
1219 u32 hval;
1220
1221 WARN_ON_ONCE(!rcu_read_lock_held());
1222
1223 if (!(*bucket) || !daddr)
1224 return NULL;
1225
1226 hval = rt6_exception_hash(daddr, saddr);
1227 *bucket += hval;
1228
1229 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1230 struct rt6_info *rt6 = rt6_ex->rt6i;
1231 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1232
1233#ifdef CONFIG_IPV6_SUBTREES
1234 if (matched && saddr)
1235 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1236#endif
1237 if (matched)
1238 return rt6_ex;
1239 }
1240 return NULL;
1241}
1242
1243static int rt6_insert_exception(struct rt6_info *nrt,
1244 struct rt6_info *ort)
1245{
1246 struct rt6_exception_bucket *bucket;
1247 struct in6_addr *src_key = NULL;
1248 struct rt6_exception *rt6_ex;
1249 int err = 0;
1250
1251 /* ort can't be a cache or pcpu route */
1252 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
1253 ort = (struct rt6_info *)ort->dst.from;
1254 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1255
1256 spin_lock_bh(&rt6_exception_lock);
1257
1258 if (ort->exception_bucket_flushed) {
1259 err = -EINVAL;
1260 goto out;
1261 }
1262
1263 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1264 lockdep_is_held(&rt6_exception_lock));
1265 if (!bucket) {
1266 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1267 GFP_ATOMIC);
1268 if (!bucket) {
1269 err = -ENOMEM;
1270 goto out;
1271 }
1272 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1273 }
1274
1275#ifdef CONFIG_IPV6_SUBTREES
1276 /* rt6i_src.plen != 0 indicates ort is in subtree
1277 * and exception table is indexed by a hash of
1278 * both rt6i_dst and rt6i_src.
1279 * Otherwise, the exception table is indexed by
1280 * a hash of only rt6i_dst.
1281 */
1282 if (ort->rt6i_src.plen)
1283 src_key = &nrt->rt6i_src.addr;
1284#endif
60006a48
WW
1285
1286 /* Update rt6i_prefsrc as it could be changed
1287 * in rt6_remove_prefsrc()
1288 */
1289 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
f5bbe7ee
WW
1290 /* rt6_mtu_change() might lower mtu on ort.
1291 * Only insert this exception route if its mtu
1292 * is less than ort's mtu value.
1293 */
1294 if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
1295 err = -EINVAL;
1296 goto out;
1297 }
60006a48 1298
35732d01
WW
1299 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1300 src_key);
1301 if (rt6_ex)
1302 rt6_remove_exception(bucket, rt6_ex);
1303
1304 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1305 if (!rt6_ex) {
1306 err = -ENOMEM;
1307 goto out;
1308 }
1309 rt6_ex->rt6i = nrt;
1310 rt6_ex->stamp = jiffies;
1311 atomic_inc(&nrt->rt6i_ref);
1312 nrt->rt6i_node = ort->rt6i_node;
1313 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1314 bucket->depth++;
1315
1316 if (bucket->depth > FIB6_MAX_DEPTH)
1317 rt6_exception_remove_oldest(bucket);
1318
1319out:
1320 spin_unlock_bh(&rt6_exception_lock);
1321
1322 /* Update fn->fn_sernum to invalidate all cached dst */
1323 if (!err)
1324 fib6_update_sernum(ort);
1325
1326 return err;
1327}
1328
1329void rt6_flush_exceptions(struct rt6_info *rt)
1330{
1331 struct rt6_exception_bucket *bucket;
1332 struct rt6_exception *rt6_ex;
1333 struct hlist_node *tmp;
1334 int i;
1335
1336 spin_lock_bh(&rt6_exception_lock);
1337 /* Prevent rt6_insert_exception() to recreate the bucket list */
1338 rt->exception_bucket_flushed = 1;
1339
1340 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1341 lockdep_is_held(&rt6_exception_lock));
1342 if (!bucket)
1343 goto out;
1344
1345 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1346 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1347 rt6_remove_exception(bucket, rt6_ex);
1348 WARN_ON_ONCE(bucket->depth);
1349 bucket++;
1350 }
1351
1352out:
1353 spin_unlock_bh(&rt6_exception_lock);
1354}
1355
1356/* Find cached rt in the hash table inside passed in rt
1357 * Caller has to hold rcu_read_lock()
1358 */
1359static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1360 struct in6_addr *daddr,
1361 struct in6_addr *saddr)
1362{
1363 struct rt6_exception_bucket *bucket;
1364 struct in6_addr *src_key = NULL;
1365 struct rt6_exception *rt6_ex;
1366 struct rt6_info *res = NULL;
1367
1368 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1369
1370#ifdef CONFIG_IPV6_SUBTREES
1371 /* rt6i_src.plen != 0 indicates rt is in subtree
1372 * and exception table is indexed by a hash of
1373 * both rt6i_dst and rt6i_src.
1374 * Otherwise, the exception table is indexed by
1375 * a hash of only rt6i_dst.
1376 */
1377 if (rt->rt6i_src.plen)
1378 src_key = saddr;
1379#endif
1380 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1381
1382 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1383 res = rt6_ex->rt6i;
1384
1385 return res;
1386}
1387
1388/* Remove the passed in cached rt from the hash table that contains it */
1389int rt6_remove_exception_rt(struct rt6_info *rt)
1390{
1391 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1392 struct rt6_exception_bucket *bucket;
1393 struct in6_addr *src_key = NULL;
1394 struct rt6_exception *rt6_ex;
1395 int err;
1396
1397 if (!from ||
1398 !(rt->rt6i_flags | RTF_CACHE))
1399 return -EINVAL;
1400
1401 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1402 return -ENOENT;
1403
1404 spin_lock_bh(&rt6_exception_lock);
1405 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1406 lockdep_is_held(&rt6_exception_lock));
1407#ifdef CONFIG_IPV6_SUBTREES
1408 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1409 * and exception table is indexed by a hash of
1410 * both rt6i_dst and rt6i_src.
1411 * Otherwise, the exception table is indexed by
1412 * a hash of only rt6i_dst.
1413 */
1414 if (from->rt6i_src.plen)
1415 src_key = &rt->rt6i_src.addr;
1416#endif
1417 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1418 &rt->rt6i_dst.addr,
1419 src_key);
1420 if (rt6_ex) {
1421 rt6_remove_exception(bucket, rt6_ex);
1422 err = 0;
1423 } else {
1424 err = -ENOENT;
1425 }
1426
1427 spin_unlock_bh(&rt6_exception_lock);
1428 return err;
1429}
1430
1431/* Find rt6_ex which contains the passed in rt cache and
1432 * refresh its stamp
1433 */
1434static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1435{
1436 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1437 struct rt6_exception_bucket *bucket;
1438 struct in6_addr *src_key = NULL;
1439 struct rt6_exception *rt6_ex;
1440
1441 if (!from ||
1442 !(rt->rt6i_flags | RTF_CACHE))
1443 return;
1444
1445 rcu_read_lock();
1446 bucket = rcu_dereference(from->rt6i_exception_bucket);
1447
1448#ifdef CONFIG_IPV6_SUBTREES
1449 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1450 * and exception table is indexed by a hash of
1451 * both rt6i_dst and rt6i_src.
1452 * Otherwise, the exception table is indexed by
1453 * a hash of only rt6i_dst.
1454 */
1455 if (from->rt6i_src.plen)
1456 src_key = &rt->rt6i_src.addr;
1457#endif
1458 rt6_ex = __rt6_find_exception_rcu(&bucket,
1459 &rt->rt6i_dst.addr,
1460 src_key);
1461 if (rt6_ex)
1462 rt6_ex->stamp = jiffies;
1463
1464 rcu_read_unlock();
1465}
1466
60006a48
WW
1467static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1468{
1469 struct rt6_exception_bucket *bucket;
1470 struct rt6_exception *rt6_ex;
1471 int i;
1472
1473 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1474 lockdep_is_held(&rt6_exception_lock));
1475
1476 if (bucket) {
1477 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1478 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1479 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1480 }
1481 bucket++;
1482 }
1483 }
1484}
1485
f5bbe7ee
WW
1486static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
1487{
1488 struct rt6_exception_bucket *bucket;
1489 struct rt6_exception *rt6_ex;
1490 int i;
1491
1492 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1493 lockdep_is_held(&rt6_exception_lock));
1494
1495 if (bucket) {
1496 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1497 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1498 struct rt6_info *entry = rt6_ex->rt6i;
1499 /* For RTF_CACHE with rt6i_pmtu == 0
1500 * (i.e. a redirected route),
1501 * the metrics of its rt->dst.from has already
1502 * been updated.
1503 */
1504 if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
1505 entry->rt6i_pmtu = mtu;
1506 }
1507 bucket++;
1508 }
1509 }
1510}
1511
b16cb459
WW
1512#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1513
1514static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1515 struct in6_addr *gateway)
1516{
1517 struct rt6_exception_bucket *bucket;
1518 struct rt6_exception *rt6_ex;
1519 struct hlist_node *tmp;
1520 int i;
1521
1522 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1523 return;
1524
1525 spin_lock_bh(&rt6_exception_lock);
1526 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1527 lockdep_is_held(&rt6_exception_lock));
1528
1529 if (bucket) {
1530 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1531 hlist_for_each_entry_safe(rt6_ex, tmp,
1532 &bucket->chain, hlist) {
1533 struct rt6_info *entry = rt6_ex->rt6i;
1534
1535 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1536 RTF_CACHE_GATEWAY &&
1537 ipv6_addr_equal(gateway,
1538 &entry->rt6i_gateway)) {
1539 rt6_remove_exception(bucket, rt6_ex);
1540 }
1541 }
1542 bucket++;
1543 }
1544 }
1545
1546 spin_unlock_bh(&rt6_exception_lock);
1547}
1548
c757faa8
WW
1549static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1550 struct rt6_exception *rt6_ex,
1551 struct fib6_gc_args *gc_args,
1552 unsigned long now)
1553{
1554 struct rt6_info *rt = rt6_ex->rt6i;
1555
1556 if (atomic_read(&rt->dst.__refcnt) == 1 &&
1557 time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1558 RT6_TRACE("aging clone %p\n", rt);
1559 rt6_remove_exception(bucket, rt6_ex);
1560 return;
1561 } else if (rt->rt6i_flags & RTF_GATEWAY) {
1562 struct neighbour *neigh;
1563 __u8 neigh_flags = 0;
1564
1565 neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
1566 if (neigh) {
1567 neigh_flags = neigh->flags;
1568 neigh_release(neigh);
1569 }
1570 if (!(neigh_flags & NTF_ROUTER)) {
1571 RT6_TRACE("purging route %p via non-router but gateway\n",
1572 rt);
1573 rt6_remove_exception(bucket, rt6_ex);
1574 return;
1575 }
1576 }
1577 gc_args->more++;
1578}
1579
1580void rt6_age_exceptions(struct rt6_info *rt,
1581 struct fib6_gc_args *gc_args,
1582 unsigned long now)
1583{
1584 struct rt6_exception_bucket *bucket;
1585 struct rt6_exception *rt6_ex;
1586 struct hlist_node *tmp;
1587 int i;
1588
1589 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1590 return;
1591
1592 spin_lock_bh(&rt6_exception_lock);
1593 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1594 lockdep_is_held(&rt6_exception_lock));
1595
1596 if (bucket) {
1597 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1598 hlist_for_each_entry_safe(rt6_ex, tmp,
1599 &bucket->chain, hlist) {
1600 rt6_age_examine_exception(bucket, rt6_ex,
1601 gc_args, now);
1602 }
1603 bucket++;
1604 }
1605 }
1606 spin_unlock_bh(&rt6_exception_lock);
1607}
1608
9ff74384
DA
1609struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1610 int oif, struct flowi6 *fl6, int flags)
1da177e4 1611{
367efcb9 1612 struct fib6_node *fn, *saved_fn;
2b760fcf 1613 struct rt6_info *rt, *rt_cache;
c71099ac 1614 int strict = 0;
1da177e4 1615
77d16f45 1616 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1617 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1618 if (net->ipv6.devconf_all->forwarding == 0)
1619 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1620
c71099ac 1621 read_lock_bh(&table->tb6_lock);
1da177e4 1622
4c9483b2 1623 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1624 saved_fn = fn;
1da177e4 1625
ca254490
DA
1626 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1627 oif = 0;
1628
a3c00e46 1629redo_rt6_select:
8d1040e8 1630 rt = rt6_select(net, fn, oif, strict);
52bd4c0c 1631 if (rt->rt6i_nsiblings)
367efcb9 1632 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1633 if (rt == net->ipv6.ip6_null_entry) {
1634 fn = fib6_backtrack(fn, &fl6->saddr);
1635 if (fn)
1636 goto redo_rt6_select;
367efcb9
MKL
1637 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1638 /* also consider unreachable route */
1639 strict &= ~RT6_LOOKUP_F_REACHABLE;
1640 fn = saved_fn;
1641 goto redo_rt6_select;
367efcb9 1642 }
a3c00e46
MKL
1643 }
1644
2b760fcf
WW
1645 /*Search through exception table */
1646 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1647 if (rt_cache)
1648 rt = rt_cache;
fb9de91e 1649
d3843fe5
WW
1650 if (rt == net->ipv6.ip6_null_entry) {
1651 read_unlock_bh(&table->tb6_lock);
1652 dst_hold(&rt->dst);
1653 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1654 return rt;
1655 } else if (rt->rt6i_flags & RTF_CACHE) {
1656 if (ip6_hold_safe(net, &rt, true)) {
1657 dst_use_noref(&rt->dst, jiffies);
1658 rt6_dst_from_metrics_check(rt);
1659 }
d52d3997 1660 read_unlock_bh(&table->tb6_lock);
b811580d 1661 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
d52d3997 1662 return rt;
3da59bd9
MKL
1663 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1664 !(rt->rt6i_flags & RTF_GATEWAY))) {
1665 /* Create a RTF_CACHE clone which will not be
1666 * owned by the fib6 tree. It is for the special case where
1667 * the daddr in the skb during the neighbor look-up is different
1668 * from the fl6->daddr used to look-up route here.
1669 */
1670
1671 struct rt6_info *uncached_rt;
1672
d3843fe5
WW
1673 if (ip6_hold_safe(net, &rt, true)) {
1674 dst_use_noref(&rt->dst, jiffies);
1675 } else {
1676 read_unlock_bh(&table->tb6_lock);
1677 uncached_rt = rt;
1678 goto uncached_rt_out;
1679 }
d52d3997
MKL
1680 read_unlock_bh(&table->tb6_lock);
1681
3da59bd9
MKL
1682 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1683 dst_release(&rt->dst);
c71099ac 1684
1cfb71ee
WW
1685 if (uncached_rt) {
1686 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1687 * No need for another dst_hold()
1688 */
8d0b94af 1689 rt6_uncached_list_add(uncached_rt);
1cfb71ee 1690 } else {
3da59bd9 1691 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1692 dst_hold(&uncached_rt->dst);
1693 }
b811580d 1694
d3843fe5 1695uncached_rt_out:
b811580d 1696 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
3da59bd9 1697 return uncached_rt;
3da59bd9 1698
d52d3997
MKL
1699 } else {
1700 /* Get a percpu copy */
1701
1702 struct rt6_info *pcpu_rt;
1703
d3843fe5 1704 dst_use_noref(&rt->dst, jiffies);
d52d3997 1705 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1706
9c7370a1
MKL
1707 if (pcpu_rt) {
1708 read_unlock_bh(&table->tb6_lock);
1709 } else {
a94b9367
WW
1710 /* atomic_inc_not_zero() is needed when using rcu */
1711 if (atomic_inc_not_zero(&rt->rt6i_ref)) {
1712 /* We have to do the read_unlock first
1713 * because rt6_make_pcpu_route() may trigger
1714 * ip6_dst_gc() which will take the write_lock.
1715 *
1716 * No dst_hold() on rt is needed because grabbing
1717 * rt->rt6i_ref makes sure rt can't be released.
1718 */
1719 read_unlock_bh(&table->tb6_lock);
1720 pcpu_rt = rt6_make_pcpu_route(rt);
1721 rt6_release(rt);
1722 } else {
1723 /* rt is already removed from tree */
1724 read_unlock_bh(&table->tb6_lock);
1725 pcpu_rt = net->ipv6.ip6_null_entry;
1726 dst_hold(&pcpu_rt->dst);
1727 }
9c7370a1 1728 }
d52d3997 1729
b811580d 1730 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
d52d3997
MKL
1731 return pcpu_rt;
1732 }
1da177e4 1733}
9ff74384 1734EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1735
8ed67789 1736static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1737 struct flowi6 *fl6, int flags)
4acad72d 1738{
4c9483b2 1739 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1740}
1741
d409b847
MB
1742struct dst_entry *ip6_route_input_lookup(struct net *net,
1743 struct net_device *dev,
1744 struct flowi6 *fl6, int flags)
72331bc0
SL
1745{
1746 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1747 flags |= RT6_LOOKUP_F_IFACE;
1748
1749 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1750}
d409b847 1751EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1752
23aebdac
JS
1753static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1754 struct flow_keys *keys)
1755{
1756 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1757 const struct ipv6hdr *key_iph = outer_iph;
1758 const struct ipv6hdr *inner_iph;
1759 const struct icmp6hdr *icmph;
1760 struct ipv6hdr _inner_iph;
1761
1762 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1763 goto out;
1764
1765 icmph = icmp6_hdr(skb);
1766 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1767 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1768 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1769 icmph->icmp6_type != ICMPV6_PARAMPROB)
1770 goto out;
1771
1772 inner_iph = skb_header_pointer(skb,
1773 skb_transport_offset(skb) + sizeof(*icmph),
1774 sizeof(_inner_iph), &_inner_iph);
1775 if (!inner_iph)
1776 goto out;
1777
1778 key_iph = inner_iph;
1779out:
1780 memset(keys, 0, sizeof(*keys));
1781 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1782 keys->addrs.v6addrs.src = key_iph->saddr;
1783 keys->addrs.v6addrs.dst = key_iph->daddr;
1784 keys->tags.flow_label = ip6_flowinfo(key_iph);
1785 keys->basic.ip_proto = key_iph->nexthdr;
1786}
1787
1788/* if skb is set it will be used and fl6 can be NULL */
1789u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1790{
1791 struct flow_keys hash_keys;
1792
1793 if (skb) {
1794 ip6_multipath_l3_keys(skb, &hash_keys);
1795 return flow_hash_from_keys(&hash_keys);
1796 }
1797
1798 return get_hash_from_flowi6(fl6);
1799}
1800
c71099ac
TG
1801void ip6_route_input(struct sk_buff *skb)
1802{
b71d1d42 1803 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1804 struct net *net = dev_net(skb->dev);
adaa70bb 1805 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1806 struct ip_tunnel_info *tun_info;
4c9483b2 1807 struct flowi6 fl6 = {
e0d56fdd 1808 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
1809 .daddr = iph->daddr,
1810 .saddr = iph->saddr,
6502ca52 1811 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1812 .flowi6_mark = skb->mark,
1813 .flowi6_proto = iph->nexthdr,
c71099ac 1814 };
adaa70bb 1815
904af04d 1816 tun_info = skb_tunnel_info(skb);
46fa062a 1817 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1818 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
23aebdac
JS
1819 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1820 fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
06e9d040 1821 skb_dst_drop(skb);
72331bc0 1822 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1823}
1824
8ed67789 1825static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1826 struct flowi6 *fl6, int flags)
1da177e4 1827{
4c9483b2 1828 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1829}
1830
6f21c96a
PA
1831struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1832 struct flowi6 *fl6, int flags)
c71099ac 1833{
d46a9d67 1834 bool any_src;
c71099ac 1835
4c1feac5
DA
1836 if (rt6_need_strict(&fl6->daddr)) {
1837 struct dst_entry *dst;
1838
1839 dst = l3mdev_link_scope_lookup(net, fl6);
1840 if (dst)
1841 return dst;
1842 }
ca254490 1843
1fb9489b 1844 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1845
d46a9d67 1846 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1847 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1848 (fl6->flowi6_oif && any_src))
77d16f45 1849 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1850
d46a9d67 1851 if (!any_src)
adaa70bb 1852 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1853 else if (sk)
1854 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1855
4c9483b2 1856 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1857}
6f21c96a 1858EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1859
2774c131 1860struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1861{
5c1e6aa3 1862 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 1863 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
1864 struct dst_entry *new = NULL;
1865
1dbe3252 1866 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
b2a9c0ed 1867 DST_OBSOLETE_NONE, 0);
14e50e57 1868 if (rt) {
0a1f5962 1869 rt6_info_init(rt);
8104891b 1870
0a1f5962 1871 new = &rt->dst;
14e50e57 1872 new->__use = 1;
352e512c 1873 new->input = dst_discard;
ede2059d 1874 new->output = dst_discard_out;
14e50e57 1875
0a1f5962 1876 dst_copy_metrics(new, &ort->dst);
14e50e57 1877
1dbe3252 1878 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 1879 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1880 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1881 rt->rt6i_metric = 0;
1882
1883 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1884#ifdef CONFIG_IPV6_SUBTREES
1885 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1886#endif
14e50e57
DM
1887 }
1888
69ead7af
DM
1889 dst_release(dst_orig);
1890 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1891}
14e50e57 1892
1da177e4
LT
1893/*
1894 * Destination cache support functions
1895 */
1896
4b32b5ad
MKL
1897static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1898{
1899 if (rt->dst.from &&
1900 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1901 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1902}
1903
3da59bd9
MKL
1904static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1905{
36143645 1906 u32 rt_cookie = 0;
c5cff856
WW
1907
1908 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
3da59bd9
MKL
1909 return NULL;
1910
1911 if (rt6_check_expired(rt))
1912 return NULL;
1913
1914 return &rt->dst;
1915}
1916
1917static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1918{
5973fb1e
MKL
1919 if (!__rt6_check_expired(rt) &&
1920 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3da59bd9
MKL
1921 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1922 return &rt->dst;
1923 else
1924 return NULL;
1925}
1926
1da177e4
LT
1927static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1928{
1929 struct rt6_info *rt;
1930
1931 rt = (struct rt6_info *) dst;
1932
6f3118b5
ND
1933 /* All IPV6 dsts are created with ->obsolete set to the value
1934 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1935 * into this function always.
1936 */
e3bc10bd 1937
4b32b5ad
MKL
1938 rt6_dst_from_metrics_check(rt);
1939
02bcf4e0 1940 if (rt->rt6i_flags & RTF_PCPU ||
a4c2fd7f 1941 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
3da59bd9
MKL
1942 return rt6_dst_from_check(rt, cookie);
1943 else
1944 return rt6_check(rt, cookie);
1da177e4
LT
1945}
1946
1947static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1948{
1949 struct rt6_info *rt = (struct rt6_info *) dst;
1950
1951 if (rt) {
54c1a859
YH
1952 if (rt->rt6i_flags & RTF_CACHE) {
1953 if (rt6_check_expired(rt)) {
1954 ip6_del_rt(rt);
1955 dst = NULL;
1956 }
1957 } else {
1da177e4 1958 dst_release(dst);
54c1a859
YH
1959 dst = NULL;
1960 }
1da177e4 1961 }
54c1a859 1962 return dst;
1da177e4
LT
1963}
1964
1965static void ip6_link_failure(struct sk_buff *skb)
1966{
1967 struct rt6_info *rt;
1968
3ffe533c 1969 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1970
adf30907 1971 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1972 if (rt) {
1eb4f758 1973 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0
WW
1974 if (dst_hold_safe(&rt->dst))
1975 ip6_del_rt(rt);
c5cff856
WW
1976 } else {
1977 struct fib6_node *fn;
1978
1979 rcu_read_lock();
1980 fn = rcu_dereference(rt->rt6i_node);
1981 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
1982 fn->fn_sernum = -1;
1983 rcu_read_unlock();
1eb4f758 1984 }
1da177e4
LT
1985 }
1986}
1987
45e4fd26
MKL
1988static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1989{
1990 struct net *net = dev_net(rt->dst.dev);
1991
1992 rt->rt6i_flags |= RTF_MODIFIED;
1993 rt->rt6i_pmtu = mtu;
1994 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1995}
1996
0d3f6d29
MKL
1997static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1998{
1999 return !(rt->rt6i_flags & RTF_CACHE) &&
4e587ea7
WW
2000 (rt->rt6i_flags & RTF_PCPU ||
2001 rcu_access_pointer(rt->rt6i_node));
0d3f6d29
MKL
2002}
2003
45e4fd26
MKL
2004static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2005 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2006{
0dec879f 2007 const struct in6_addr *daddr, *saddr;
67ba4152 2008 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2009
45e4fd26
MKL
2010 if (rt6->rt6i_flags & RTF_LOCAL)
2011 return;
81aded24 2012
19bda36c
XL
2013 if (dst_metric_locked(dst, RTAX_MTU))
2014 return;
2015
0dec879f
JA
2016 if (iph) {
2017 daddr = &iph->daddr;
2018 saddr = &iph->saddr;
2019 } else if (sk) {
2020 daddr = &sk->sk_v6_daddr;
2021 saddr = &inet6_sk(sk)->saddr;
2022 } else {
2023 daddr = NULL;
2024 saddr = NULL;
2025 }
2026 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2027 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2028 if (mtu >= dst_mtu(dst))
2029 return;
9d289715 2030
0d3f6d29 2031 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2032 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2033 /* update rt6_ex->stamp for cache */
2034 if (rt6->rt6i_flags & RTF_CACHE)
2035 rt6_update_exception_stamp_rt(rt6);
0dec879f 2036 } else if (daddr) {
45e4fd26
MKL
2037 struct rt6_info *nrt6;
2038
45e4fd26
MKL
2039 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
2040 if (nrt6) {
2041 rt6_do_update_pmtu(nrt6, mtu);
2b760fcf
WW
2042 if (rt6_insert_exception(nrt6, rt6))
2043 dst_release_immediate(&nrt6->dst);
45e4fd26 2044 }
1da177e4
LT
2045 }
2046}
2047
45e4fd26
MKL
2048static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2049 struct sk_buff *skb, u32 mtu)
2050{
2051 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2052}
2053
42ae66c8 2054void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2055 int oif, u32 mark, kuid_t uid)
81aded24
DM
2056{
2057 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2058 struct dst_entry *dst;
2059 struct flowi6 fl6;
2060
2061 memset(&fl6, 0, sizeof(fl6));
2062 fl6.flowi6_oif = oif;
1b3c61dc 2063 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2064 fl6.daddr = iph->daddr;
2065 fl6.saddr = iph->saddr;
6502ca52 2066 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2067 fl6.flowi6_uid = uid;
81aded24
DM
2068
2069 dst = ip6_route_output(net, NULL, &fl6);
2070 if (!dst->error)
45e4fd26 2071 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2072 dst_release(dst);
2073}
2074EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2075
2076void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2077{
33c162a9
MKL
2078 struct dst_entry *dst;
2079
81aded24 2080 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2081 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2082
2083 dst = __sk_dst_get(sk);
2084 if (!dst || !dst->obsolete ||
2085 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2086 return;
2087
2088 bh_lock_sock(sk);
2089 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2090 ip6_datagram_dst_update(sk, false);
2091 bh_unlock_sock(sk);
81aded24
DM
2092}
2093EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2094
b55b76b2
DJ
2095/* Handle redirects */
2096struct ip6rd_flowi {
2097 struct flowi6 fl6;
2098 struct in6_addr gateway;
2099};
2100
2101static struct rt6_info *__ip6_route_redirect(struct net *net,
2102 struct fib6_table *table,
2103 struct flowi6 *fl6,
2104 int flags)
2105{
2106 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2b760fcf 2107 struct rt6_info *rt, *rt_cache;
b55b76b2
DJ
2108 struct fib6_node *fn;
2109
2110 /* Get the "current" route for this destination and
67c408cf 2111 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2112 *
2113 * RFC 4861 specifies that redirects should only be
2114 * accepted if they come from the nexthop to the target.
2115 * Due to the way the routes are chosen, this notion
2116 * is a bit fuzzy and one might need to check all possible
2117 * routes.
2118 */
2119
2120 read_lock_bh(&table->tb6_lock);
2121 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2122restart:
2123 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2124 if (rt6_check_expired(rt))
2125 continue;
2126 if (rt->dst.error)
2127 break;
2128 if (!(rt->rt6i_flags & RTF_GATEWAY))
2129 continue;
2130 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
2131 continue;
2b760fcf
WW
2132 /* rt_cache's gateway might be different from its 'parent'
2133 * in the case of an ip redirect.
2134 * So we keep searching in the exception table if the gateway
2135 * is different.
2136 */
2137 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
2138 rt_cache = rt6_find_cached_rt(rt,
2139 &fl6->daddr,
2140 &fl6->saddr);
2141 if (rt_cache &&
2142 ipv6_addr_equal(&rdfl->gateway,
2143 &rt_cache->rt6i_gateway)) {
2144 rt = rt_cache;
2145 break;
2146 }
b55b76b2 2147 continue;
2b760fcf 2148 }
b55b76b2
DJ
2149 break;
2150 }
2151
2152 if (!rt)
2153 rt = net->ipv6.ip6_null_entry;
2154 else if (rt->dst.error) {
2155 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2156 goto out;
2157 }
2158
2159 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
2160 fn = fib6_backtrack(fn, &fl6->saddr);
2161 if (fn)
2162 goto restart;
b55b76b2 2163 }
a3c00e46 2164
b0a1ba59 2165out:
d3843fe5 2166 ip6_hold_safe(net, &rt, true);
b55b76b2
DJ
2167
2168 read_unlock_bh(&table->tb6_lock);
2169
b811580d 2170 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
b55b76b2
DJ
2171 return rt;
2172};
2173
2174static struct dst_entry *ip6_route_redirect(struct net *net,
2175 const struct flowi6 *fl6,
2176 const struct in6_addr *gateway)
2177{
2178 int flags = RT6_LOOKUP_F_HAS_SADDR;
2179 struct ip6rd_flowi rdfl;
2180
2181 rdfl.fl6 = *fl6;
2182 rdfl.gateway = *gateway;
2183
2184 return fib6_rule_lookup(net, &rdfl.fl6,
2185 flags, __ip6_route_redirect);
2186}
2187
e2d118a1
LC
2188void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2189 kuid_t uid)
3a5ad2ee
DM
2190{
2191 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2192 struct dst_entry *dst;
2193 struct flowi6 fl6;
2194
2195 memset(&fl6, 0, sizeof(fl6));
e374c618 2196 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2197 fl6.flowi6_oif = oif;
2198 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2199 fl6.daddr = iph->daddr;
2200 fl6.saddr = iph->saddr;
6502ca52 2201 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2202 fl6.flowi6_uid = uid;
3a5ad2ee 2203
b55b76b2
DJ
2204 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
2205 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2206 dst_release(dst);
2207}
2208EXPORT_SYMBOL_GPL(ip6_redirect);
2209
c92a59ec
DJ
2210void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2211 u32 mark)
2212{
2213 const struct ipv6hdr *iph = ipv6_hdr(skb);
2214 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2215 struct dst_entry *dst;
2216 struct flowi6 fl6;
2217
2218 memset(&fl6, 0, sizeof(fl6));
e374c618 2219 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2220 fl6.flowi6_oif = oif;
2221 fl6.flowi6_mark = mark;
c92a59ec
DJ
2222 fl6.daddr = msg->dest;
2223 fl6.saddr = iph->daddr;
e2d118a1 2224 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2225
b55b76b2
DJ
2226 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
2227 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2228 dst_release(dst);
2229}
2230
3a5ad2ee
DM
2231void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2232{
e2d118a1
LC
2233 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2234 sk->sk_uid);
3a5ad2ee
DM
2235}
2236EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2237
0dbaee3b 2238static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2239{
0dbaee3b
DM
2240 struct net_device *dev = dst->dev;
2241 unsigned int mtu = dst_mtu(dst);
2242 struct net *net = dev_net(dev);
2243
1da177e4
LT
2244 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2245
5578689a
DL
2246 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2247 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2248
2249 /*
1ab1457c
YH
2250 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2251 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2252 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2253 * rely only on pmtu discovery"
2254 */
2255 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2256 mtu = IPV6_MAXPLEN;
2257 return mtu;
2258}
2259
ebb762f2 2260static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2261{
4b32b5ad
MKL
2262 const struct rt6_info *rt = (const struct rt6_info *)dst;
2263 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 2264 struct inet6_dev *idev;
618f9bc7 2265
4b32b5ad
MKL
2266 if (mtu)
2267 goto out;
2268
2269 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2270 if (mtu)
30f78d8e 2271 goto out;
618f9bc7
SK
2272
2273 mtu = IPV6_MIN_MTU;
d33e4553
DM
2274
2275 rcu_read_lock();
2276 idev = __in6_dev_get(dst->dev);
2277 if (idev)
2278 mtu = idev->cnf.mtu6;
2279 rcu_read_unlock();
2280
30f78d8e 2281out:
14972cbd
RP
2282 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2283
2284 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2285}
2286
3b00944c 2287struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2288 struct flowi6 *fl6)
1da177e4 2289{
87a11578 2290 struct dst_entry *dst;
1da177e4
LT
2291 struct rt6_info *rt;
2292 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2293 struct net *net = dev_net(dev);
1da177e4 2294
38308473 2295 if (unlikely(!idev))
122bdf67 2296 return ERR_PTR(-ENODEV);
1da177e4 2297
ad706862 2298 rt = ip6_dst_alloc(net, dev, 0);
38308473 2299 if (unlikely(!rt)) {
1da177e4 2300 in6_dev_put(idev);
87a11578 2301 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2302 goto out;
2303 }
2304
8e2ec639
YZ
2305 rt->dst.flags |= DST_HOST;
2306 rt->dst.output = ip6_output;
550bab42 2307 rt->rt6i_gateway = fl6->daddr;
87a11578 2308 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2309 rt->rt6i_dst.plen = 128;
2310 rt->rt6i_idev = idev;
14edd87d 2311 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2312
587fea74
WW
2313 /* Add this dst into uncached_list so that rt6_ifdown() can
2314 * do proper release of the net_device
2315 */
2316 rt6_uncached_list_add(rt);
1da177e4 2317
87a11578
DM
2318 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2319
1da177e4 2320out:
87a11578 2321 return dst;
1da177e4
LT
2322}
2323
569d3645 2324static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2325{
86393e52 2326 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2327 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2328 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2329 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2330 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2331 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2332 int entries;
7019b78e 2333
fc66f95c 2334 entries = dst_entries_get_fast(ops);
49a18d86 2335 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2336 entries <= rt_max_size)
1da177e4
LT
2337 goto out;
2338
6891a346 2339 net->ipv6.ip6_rt_gc_expire++;
14956643 2340 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2341 entries = dst_entries_get_slow(ops);
2342 if (entries < ops->gc_thresh)
7019b78e 2343 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2344out:
7019b78e 2345 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2346 return entries > rt_max_size;
1da177e4
LT
2347}
2348
e715b6d3
FW
2349static int ip6_convert_metrics(struct mx6_config *mxc,
2350 const struct fib6_config *cfg)
2351{
c3a8d947 2352 bool ecn_ca = false;
e715b6d3
FW
2353 struct nlattr *nla;
2354 int remaining;
2355 u32 *mp;
2356
63159f29 2357 if (!cfg->fc_mx)
e715b6d3
FW
2358 return 0;
2359
2360 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
2361 if (unlikely(!mp))
2362 return -ENOMEM;
2363
2364 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
2365 int type = nla_type(nla);
1bb14807 2366 u32 val;
e715b6d3 2367
1bb14807
DB
2368 if (!type)
2369 continue;
2370 if (unlikely(type > RTAX_MAX))
2371 goto err;
ea697639 2372
1bb14807
DB
2373 if (type == RTAX_CC_ALGO) {
2374 char tmp[TCP_CA_NAME_MAX];
e715b6d3 2375
1bb14807 2376 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 2377 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
2378 if (val == TCP_CA_UNSPEC)
2379 goto err;
2380 } else {
2381 val = nla_get_u32(nla);
e715b6d3 2382 }
626abd59
PA
2383 if (type == RTAX_HOPLIMIT && val > 255)
2384 val = 255;
b8d3e416
DB
2385 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
2386 goto err;
1bb14807
DB
2387
2388 mp[type - 1] = val;
2389 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
2390 }
2391
c3a8d947
DB
2392 if (ecn_ca) {
2393 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
2394 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
2395 }
e715b6d3 2396
c3a8d947 2397 mxc->mx = mp;
e715b6d3
FW
2398 return 0;
2399 err:
2400 kfree(mp);
2401 return -EINVAL;
2402}
1da177e4 2403
8c14586f
DA
2404static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2405 struct fib6_config *cfg,
2406 const struct in6_addr *gw_addr)
2407{
2408 struct flowi6 fl6 = {
2409 .flowi6_oif = cfg->fc_ifindex,
2410 .daddr = *gw_addr,
2411 .saddr = cfg->fc_prefsrc,
2412 };
2413 struct fib6_table *table;
2414 struct rt6_info *rt;
d5d32e4b 2415 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
8c14586f
DA
2416
2417 table = fib6_get_table(net, cfg->fc_table);
2418 if (!table)
2419 return NULL;
2420
2421 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2422 flags |= RT6_LOOKUP_F_HAS_SADDR;
2423
2424 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
2425
2426 /* if table lookup failed, fall back to full lookup */
2427 if (rt == net->ipv6.ip6_null_entry) {
2428 ip6_rt_put(rt);
2429 rt = NULL;
2430 }
2431
2432 return rt;
2433}
2434
333c4301
DA
2435static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2436 struct netlink_ext_ack *extack)
1da177e4 2437{
5578689a 2438 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
2439 struct rt6_info *rt = NULL;
2440 struct net_device *dev = NULL;
2441 struct inet6_dev *idev = NULL;
c71099ac 2442 struct fib6_table *table;
1da177e4 2443 int addr_type;
8c5b83f0 2444 int err = -EINVAL;
1da177e4 2445
557c44be 2446 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2447 if (cfg->fc_flags & RTF_PCPU) {
2448 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2449 goto out;
d5d531cb 2450 }
557c44be 2451
d5d531cb
DA
2452 if (cfg->fc_dst_len > 128) {
2453 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2454 goto out;
2455 }
2456 if (cfg->fc_src_len > 128) {
2457 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2458 goto out;
d5d531cb 2459 }
1da177e4 2460#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2461 if (cfg->fc_src_len) {
2462 NL_SET_ERR_MSG(extack,
2463 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2464 goto out;
d5d531cb 2465 }
1da177e4 2466#endif
86872cb5 2467 if (cfg->fc_ifindex) {
1da177e4 2468 err = -ENODEV;
5578689a 2469 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2470 if (!dev)
2471 goto out;
2472 idev = in6_dev_get(dev);
2473 if (!idev)
2474 goto out;
2475 }
2476
86872cb5
TG
2477 if (cfg->fc_metric == 0)
2478 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2479
d71314b4 2480 err = -ENOBUFS;
38308473
DM
2481 if (cfg->fc_nlinfo.nlh &&
2482 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2483 table = fib6_get_table(net, cfg->fc_table);
38308473 2484 if (!table) {
f3213831 2485 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2486 table = fib6_new_table(net, cfg->fc_table);
2487 }
2488 } else {
2489 table = fib6_new_table(net, cfg->fc_table);
2490 }
38308473
DM
2491
2492 if (!table)
c71099ac 2493 goto out;
c71099ac 2494
ad706862
MKL
2495 rt = ip6_dst_alloc(net, NULL,
2496 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 2497
38308473 2498 if (!rt) {
1da177e4
LT
2499 err = -ENOMEM;
2500 goto out;
2501 }
2502
1716a961
G
2503 if (cfg->fc_flags & RTF_EXPIRES)
2504 rt6_set_expires(rt, jiffies +
2505 clock_t_to_jiffies(cfg->fc_expires));
2506 else
2507 rt6_clean_expires(rt);
1da177e4 2508
86872cb5
TG
2509 if (cfg->fc_protocol == RTPROT_UNSPEC)
2510 cfg->fc_protocol = RTPROT_BOOT;
2511 rt->rt6i_protocol = cfg->fc_protocol;
2512
2513 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
2514
2515 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 2516 rt->dst.input = ip6_mc_input;
ab79ad14
2517 else if (cfg->fc_flags & RTF_LOCAL)
2518 rt->dst.input = ip6_input;
1da177e4 2519 else
d8d1f30b 2520 rt->dst.input = ip6_forward;
1da177e4 2521
d8d1f30b 2522 rt->dst.output = ip6_output;
1da177e4 2523
19e42e45
RP
2524 if (cfg->fc_encap) {
2525 struct lwtunnel_state *lwtstate;
2526
30357d7d 2527 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2528 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2529 &lwtstate, extack);
19e42e45
RP
2530 if (err)
2531 goto out;
61adedf3
JB
2532 rt->dst.lwtstate = lwtstate_get(lwtstate);
2533 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
2534 rt->dst.lwtstate->orig_output = rt->dst.output;
2535 rt->dst.output = lwtunnel_output;
25368623 2536 }
61adedf3
JB
2537 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
2538 rt->dst.lwtstate->orig_input = rt->dst.input;
2539 rt->dst.input = lwtunnel_input;
25368623 2540 }
19e42e45
RP
2541 }
2542
86872cb5
TG
2543 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2544 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 2545 if (rt->rt6i_dst.plen == 128)
e5fd387a 2546 rt->dst.flags |= DST_HOST;
e5fd387a 2547
1da177e4 2548#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
2549 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2550 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
2551#endif
2552
86872cb5 2553 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
2554
2555 /* We cannot add true routes via loopback here,
2556 they would result in kernel looping; promote them to reject routes
2557 */
86872cb5 2558 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
2559 (dev && (dev->flags & IFF_LOOPBACK) &&
2560 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2561 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 2562 /* hold loopback dev/idev if we haven't done so. */
5578689a 2563 if (dev != net->loopback_dev) {
1da177e4
LT
2564 if (dev) {
2565 dev_put(dev);
2566 in6_dev_put(idev);
2567 }
5578689a 2568 dev = net->loopback_dev;
1da177e4
LT
2569 dev_hold(dev);
2570 idev = in6_dev_get(dev);
2571 if (!idev) {
2572 err = -ENODEV;
2573 goto out;
2574 }
2575 }
1da177e4 2576 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
2577 switch (cfg->fc_type) {
2578 case RTN_BLACKHOLE:
2579 rt->dst.error = -EINVAL;
ede2059d 2580 rt->dst.output = dst_discard_out;
7150aede 2581 rt->dst.input = dst_discard;
ef2c7d7b
ND
2582 break;
2583 case RTN_PROHIBIT:
2584 rt->dst.error = -EACCES;
7150aede
K
2585 rt->dst.output = ip6_pkt_prohibit_out;
2586 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 2587 break;
b4949ab2 2588 case RTN_THROW:
0315e382 2589 case RTN_UNREACHABLE:
ef2c7d7b 2590 default:
7150aede 2591 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
2592 : (cfg->fc_type == RTN_UNREACHABLE)
2593 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
2594 rt->dst.output = ip6_pkt_discard_out;
2595 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
2596 break;
2597 }
1da177e4
LT
2598 goto install_route;
2599 }
2600
86872cb5 2601 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 2602 const struct in6_addr *gw_addr;
1da177e4
LT
2603 int gwa_type;
2604
86872cb5 2605 gw_addr = &cfg->fc_gateway;
330567b7 2606 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
2607
2608 /* if gw_addr is local we will fail to detect this in case
2609 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2610 * will return already-added prefix route via interface that
2611 * prefix route was assigned to, which might be non-loopback.
2612 */
2613 err = -EINVAL;
330567b7
FW
2614 if (ipv6_chk_addr_and_flags(net, gw_addr,
2615 gwa_type & IPV6_ADDR_LINKLOCAL ?
d5d531cb
DA
2616 dev : NULL, 0, 0)) {
2617 NL_SET_ERR_MSG(extack, "Invalid gateway address");
48ed7b26 2618 goto out;
d5d531cb 2619 }
4e3fd7a0 2620 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
2621
2622 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
8c14586f 2623 struct rt6_info *grt = NULL;
1da177e4
LT
2624
2625 /* IPv6 strictly inhibits using not link-local
2626 addresses as nexthop address.
2627 Otherwise, router will not able to send redirects.
2628 It is very good, but in some (rare!) circumstances
2629 (SIT, PtP, NBMA NOARP links) it is handy to allow
2630 some exceptions. --ANK
96d5822c
EN
2631 We allow IPv4-mapped nexthops to support RFC4798-type
2632 addressing
1da177e4 2633 */
96d5822c 2634 if (!(gwa_type & (IPV6_ADDR_UNICAST |
d5d531cb
DA
2635 IPV6_ADDR_MAPPED))) {
2636 NL_SET_ERR_MSG(extack,
2637 "Invalid gateway address");
1da177e4 2638 goto out;
d5d531cb 2639 }
1da177e4 2640
a435a07f 2641 if (cfg->fc_table) {
8c14586f
DA
2642 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2643
a435a07f
VB
2644 if (grt) {
2645 if (grt->rt6i_flags & RTF_GATEWAY ||
2646 (dev && dev != grt->dst.dev)) {
2647 ip6_rt_put(grt);
2648 grt = NULL;
2649 }
2650 }
2651 }
2652
8c14586f
DA
2653 if (!grt)
2654 grt = rt6_lookup(net, gw_addr, NULL,
2655 cfg->fc_ifindex, 1);
1da177e4
LT
2656
2657 err = -EHOSTUNREACH;
38308473 2658 if (!grt)
1da177e4
LT
2659 goto out;
2660 if (dev) {
d1918542 2661 if (dev != grt->dst.dev) {
94e187c0 2662 ip6_rt_put(grt);
1da177e4
LT
2663 goto out;
2664 }
2665 } else {
d1918542 2666 dev = grt->dst.dev;
1da177e4
LT
2667 idev = grt->rt6i_idev;
2668 dev_hold(dev);
2669 in6_dev_hold(grt->rt6i_idev);
2670 }
38308473 2671 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 2672 err = 0;
94e187c0 2673 ip6_rt_put(grt);
1da177e4
LT
2674
2675 if (err)
2676 goto out;
2677 }
2678 err = -EINVAL;
d5d531cb
DA
2679 if (!dev) {
2680 NL_SET_ERR_MSG(extack, "Egress device not specified");
2681 goto out;
2682 } else if (dev->flags & IFF_LOOPBACK) {
2683 NL_SET_ERR_MSG(extack,
2684 "Egress device can not be loopback device for this route");
1da177e4 2685 goto out;
d5d531cb 2686 }
1da177e4
LT
2687 }
2688
2689 err = -ENODEV;
38308473 2690 if (!dev)
1da177e4
LT
2691 goto out;
2692
c3968a85
DW
2693 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2694 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 2695 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
2696 err = -EINVAL;
2697 goto out;
2698 }
4e3fd7a0 2699 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2700 rt->rt6i_prefsrc.plen = 128;
2701 } else
2702 rt->rt6i_prefsrc.plen = 0;
2703
86872cb5 2704 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2705
2706install_route:
d8d1f30b 2707 rt->dst.dev = dev;
1da177e4 2708 rt->rt6i_idev = idev;
c71099ac 2709 rt->rt6i_table = table;
63152fc0 2710
c346dca1 2711 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2712
8c5b83f0 2713 return rt;
6b9ea5a6
RP
2714out:
2715 if (dev)
2716 dev_put(dev);
2717 if (idev)
2718 in6_dev_put(idev);
587fea74
WW
2719 if (rt)
2720 dst_release_immediate(&rt->dst);
6b9ea5a6 2721
8c5b83f0 2722 return ERR_PTR(err);
6b9ea5a6
RP
2723}
2724
333c4301
DA
2725int ip6_route_add(struct fib6_config *cfg,
2726 struct netlink_ext_ack *extack)
6b9ea5a6
RP
2727{
2728 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2729 struct rt6_info *rt;
6b9ea5a6
RP
2730 int err;
2731
333c4301 2732 rt = ip6_route_info_create(cfg, extack);
8c5b83f0
RP
2733 if (IS_ERR(rt)) {
2734 err = PTR_ERR(rt);
2735 rt = NULL;
6b9ea5a6 2736 goto out;
8c5b83f0 2737 }
6b9ea5a6 2738
e715b6d3
FW
2739 err = ip6_convert_metrics(&mxc, cfg);
2740 if (err)
2741 goto out;
1da177e4 2742
333c4301 2743 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
e715b6d3
FW
2744
2745 kfree(mxc.mx);
6b9ea5a6 2746
e715b6d3 2747 return err;
1da177e4 2748out:
587fea74
WW
2749 if (rt)
2750 dst_release_immediate(&rt->dst);
6b9ea5a6 2751
1da177e4
LT
2752 return err;
2753}
2754
86872cb5 2755static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2756{
2757 int err;
c71099ac 2758 struct fib6_table *table;
d1918542 2759 struct net *net = dev_net(rt->dst.dev);
1da177e4 2760
a4c2fd7f 2761 if (rt == net->ipv6.ip6_null_entry) {
6825a26c
G
2762 err = -ENOENT;
2763 goto out;
2764 }
6c813a72 2765
c71099ac
TG
2766 table = rt->rt6i_table;
2767 write_lock_bh(&table->tb6_lock);
86872cb5 2768 err = fib6_del(rt, info);
c71099ac 2769 write_unlock_bh(&table->tb6_lock);
1da177e4 2770
6825a26c 2771out:
94e187c0 2772 ip6_rt_put(rt);
1da177e4
LT
2773 return err;
2774}
2775
e0a1ad73
TG
2776int ip6_del_rt(struct rt6_info *rt)
2777{
4d1169c1 2778 struct nl_info info = {
d1918542 2779 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2780 };
528c4ceb 2781 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2782}
2783
0ae81335
DA
2784static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2785{
2786 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 2787 struct net *net = info->nl_net;
16a16cd3 2788 struct sk_buff *skb = NULL;
0ae81335 2789 struct fib6_table *table;
e3330039 2790 int err = -ENOENT;
0ae81335 2791
e3330039
WC
2792 if (rt == net->ipv6.ip6_null_entry)
2793 goto out_put;
0ae81335
DA
2794 table = rt->rt6i_table;
2795 write_lock_bh(&table->tb6_lock);
2796
2797 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2798 struct rt6_info *sibling, *next_sibling;
2799
16a16cd3
DA
2800 /* prefer to send a single notification with all hops */
2801 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2802 if (skb) {
2803 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2804
e3330039 2805 if (rt6_fill_node(net, skb, rt,
16a16cd3
DA
2806 NULL, NULL, 0, RTM_DELROUTE,
2807 info->portid, seq, 0) < 0) {
2808 kfree_skb(skb);
2809 skb = NULL;
2810 } else
2811 info->skip_notify = 1;
2812 }
2813
0ae81335
DA
2814 list_for_each_entry_safe(sibling, next_sibling,
2815 &rt->rt6i_siblings,
2816 rt6i_siblings) {
2817 err = fib6_del(sibling, info);
2818 if (err)
e3330039 2819 goto out_unlock;
0ae81335
DA
2820 }
2821 }
2822
2823 err = fib6_del(rt, info);
e3330039 2824out_unlock:
0ae81335 2825 write_unlock_bh(&table->tb6_lock);
e3330039 2826out_put:
0ae81335 2827 ip6_rt_put(rt);
16a16cd3
DA
2828
2829 if (skb) {
e3330039 2830 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
2831 info->nlh, gfp_any());
2832 }
0ae81335
DA
2833 return err;
2834}
2835
333c4301
DA
2836static int ip6_route_del(struct fib6_config *cfg,
2837 struct netlink_ext_ack *extack)
1da177e4 2838{
2b760fcf 2839 struct rt6_info *rt, *rt_cache;
c71099ac 2840 struct fib6_table *table;
1da177e4 2841 struct fib6_node *fn;
1da177e4
LT
2842 int err = -ESRCH;
2843
5578689a 2844 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
2845 if (!table) {
2846 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 2847 return err;
d5d531cb 2848 }
c71099ac
TG
2849
2850 read_lock_bh(&table->tb6_lock);
1da177e4 2851
c71099ac 2852 fn = fib6_locate(&table->tb6_root,
86872cb5 2853 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 2854 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 2855 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 2856
1da177e4 2857 if (fn) {
d8d1f30b 2858 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2b760fcf
WW
2859 if (cfg->fc_flags & RTF_CACHE) {
2860 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
2861 &cfg->fc_src);
2862 if (!rt_cache)
2863 continue;
2864 rt = rt_cache;
2865 }
86872cb5 2866 if (cfg->fc_ifindex &&
d1918542
DM
2867 (!rt->dst.dev ||
2868 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2869 continue;
86872cb5
TG
2870 if (cfg->fc_flags & RTF_GATEWAY &&
2871 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2872 continue;
86872cb5 2873 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2874 continue;
c2ed1880
M
2875 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2876 continue;
d3843fe5
WW
2877 if (!dst_hold_safe(&rt->dst))
2878 break;
c71099ac 2879 read_unlock_bh(&table->tb6_lock);
1da177e4 2880
0ae81335
DA
2881 /* if gateway was specified only delete the one hop */
2882 if (cfg->fc_flags & RTF_GATEWAY)
2883 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2884
2885 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
2886 }
2887 }
c71099ac 2888 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2889
2890 return err;
2891}
2892
6700c270 2893static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2894{
a6279458 2895 struct netevent_redirect netevent;
e8599ff4 2896 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2897 struct ndisc_options ndopts;
2898 struct inet6_dev *in6_dev;
2899 struct neighbour *neigh;
71bcdba0 2900 struct rd_msg *msg;
6e157b6a
DM
2901 int optlen, on_link;
2902 u8 *lladdr;
e8599ff4 2903
29a3cad5 2904 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2905 optlen -= sizeof(*msg);
e8599ff4
DM
2906
2907 if (optlen < 0) {
6e157b6a 2908 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2909 return;
2910 }
2911
71bcdba0 2912 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2913
71bcdba0 2914 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2915 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2916 return;
2917 }
2918
6e157b6a 2919 on_link = 0;
71bcdba0 2920 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2921 on_link = 1;
71bcdba0 2922 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2923 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2924 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2925 return;
2926 }
2927
2928 in6_dev = __in6_dev_get(skb->dev);
2929 if (!in6_dev)
2930 return;
2931 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2932 return;
2933
2934 /* RFC2461 8.1:
2935 * The IP source address of the Redirect MUST be the same as the current
2936 * first-hop router for the specified ICMP Destination Address.
2937 */
2938
f997c55c 2939 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2940 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2941 return;
2942 }
6e157b6a
DM
2943
2944 lladdr = NULL;
e8599ff4
DM
2945 if (ndopts.nd_opts_tgt_lladdr) {
2946 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2947 skb->dev);
2948 if (!lladdr) {
2949 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2950 return;
2951 }
2952 }
2953
6e157b6a 2954 rt = (struct rt6_info *) dst;
ec13ad1d 2955 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 2956 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2957 return;
6e157b6a 2958 }
e8599ff4 2959
6e157b6a
DM
2960 /* Redirect received -> path was valid.
2961 * Look, redirects are sent only in response to data packets,
2962 * so that this nexthop apparently is reachable. --ANK
2963 */
0dec879f 2964 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 2965
71bcdba0 2966 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2967 if (!neigh)
2968 return;
a6279458 2969
1da177e4
LT
2970 /*
2971 * We have finally decided to accept it.
2972 */
2973
f997c55c 2974 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
2975 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2976 NEIGH_UPDATE_F_OVERRIDE|
2977 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
2978 NEIGH_UPDATE_F_ISROUTER)),
2979 NDISC_REDIRECT, &ndopts);
1da177e4 2980
83a09abd 2981 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2982 if (!nrt)
1da177e4
LT
2983 goto out;
2984
2985 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2986 if (on_link)
2987 nrt->rt6i_flags &= ~RTF_GATEWAY;
2988
b91d5329 2989 nrt->rt6i_protocol = RTPROT_REDIRECT;
4e3fd7a0 2990 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2991
2b760fcf
WW
2992 /* No need to remove rt from the exception table if rt is
2993 * a cached route because rt6_insert_exception() will
2994 * takes care of it
2995 */
2996 if (rt6_insert_exception(nrt, rt)) {
2997 dst_release_immediate(&nrt->dst);
2998 goto out;
2999 }
1da177e4 3000
d8d1f30b
CG
3001 netevent.old = &rt->dst;
3002 netevent.new = &nrt->dst;
71bcdba0 3003 netevent.daddr = &msg->dest;
60592833 3004 netevent.neigh = neigh;
8d71740c
TT
3005 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3006
1da177e4 3007out:
e8599ff4 3008 neigh_release(neigh);
6e157b6a
DM
3009}
3010
1da177e4
LT
3011/*
3012 * Misc support functions
3013 */
3014
4b32b5ad
MKL
3015static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
3016{
3017 BUG_ON(from->dst.from);
3018
3019 rt->rt6i_flags &= ~RTF_EXPIRES;
3020 dst_hold(&from->dst);
3021 rt->dst.from = &from->dst;
3022 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
3023}
3024
83a09abd
MKL
3025static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
3026{
3027 rt->dst.input = ort->dst.input;
3028 rt->dst.output = ort->dst.output;
3029 rt->rt6i_dst = ort->rt6i_dst;
3030 rt->dst.error = ort->dst.error;
3031 rt->rt6i_idev = ort->rt6i_idev;
3032 if (rt->rt6i_idev)
3033 in6_dev_hold(rt->rt6i_idev);
3034 rt->dst.lastuse = jiffies;
3035 rt->rt6i_gateway = ort->rt6i_gateway;
3036 rt->rt6i_flags = ort->rt6i_flags;
3037 rt6_set_from(rt, ort);
3038 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 3039#ifdef CONFIG_IPV6_SUBTREES
83a09abd 3040 rt->rt6i_src = ort->rt6i_src;
1da177e4 3041#endif
83a09abd
MKL
3042 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
3043 rt->rt6i_table = ort->rt6i_table;
61adedf3 3044 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
3045}
3046
70ceb4f5 3047#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 3048static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 3049 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3050 const struct in6_addr *gwaddr,
3051 struct net_device *dev)
70ceb4f5 3052{
830218c1
DA
3053 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3054 int ifindex = dev->ifindex;
70ceb4f5
YH
3055 struct fib6_node *fn;
3056 struct rt6_info *rt = NULL;
c71099ac
TG
3057 struct fib6_table *table;
3058
830218c1 3059 table = fib6_get_table(net, tb_id);
38308473 3060 if (!table)
c71099ac 3061 return NULL;
70ceb4f5 3062
5744dd9b 3063 read_lock_bh(&table->tb6_lock);
38fbeeee 3064 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3065 if (!fn)
3066 goto out;
3067
d8d1f30b 3068 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 3069 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
3070 continue;
3071 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3072 continue;
3073 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
3074 continue;
d3843fe5 3075 ip6_hold_safe(NULL, &rt, false);
70ceb4f5
YH
3076 break;
3077 }
3078out:
5744dd9b 3079 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
3080 return rt;
3081}
3082
efa2cea0 3083static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 3084 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3085 const struct in6_addr *gwaddr,
3086 struct net_device *dev,
95c96174 3087 unsigned int pref)
70ceb4f5 3088{
86872cb5 3089 struct fib6_config cfg = {
238fc7ea 3090 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3091 .fc_ifindex = dev->ifindex,
86872cb5
TG
3092 .fc_dst_len = prefixlen,
3093 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3094 RTF_UP | RTF_PREF(pref),
b91d5329 3095 .fc_protocol = RTPROT_RA,
15e47304 3096 .fc_nlinfo.portid = 0,
efa2cea0
DL
3097 .fc_nlinfo.nlh = NULL,
3098 .fc_nlinfo.nl_net = net,
86872cb5
TG
3099 };
3100
830218c1 3101 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3102 cfg.fc_dst = *prefix;
3103 cfg.fc_gateway = *gwaddr;
70ceb4f5 3104
e317da96
YH
3105 /* We should treat it as a default route if prefix length is 0. */
3106 if (!prefixlen)
86872cb5 3107 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3108
333c4301 3109 ip6_route_add(&cfg, NULL);
70ceb4f5 3110
830218c1 3111 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3112}
3113#endif
3114
b71d1d42 3115struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 3116{
830218c1 3117 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 3118 struct rt6_info *rt;
c71099ac 3119 struct fib6_table *table;
1da177e4 3120
830218c1 3121 table = fib6_get_table(dev_net(dev), tb_id);
38308473 3122 if (!table)
c71099ac 3123 return NULL;
1da177e4 3124
5744dd9b 3125 read_lock_bh(&table->tb6_lock);
67ba4152 3126 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 3127 if (dev == rt->dst.dev &&
045927ff 3128 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
3129 ipv6_addr_equal(&rt->rt6i_gateway, addr))
3130 break;
3131 }
3132 if (rt)
d3843fe5 3133 ip6_hold_safe(NULL, &rt, false);
5744dd9b 3134 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
3135 return rt;
3136}
3137
b71d1d42 3138struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
3139 struct net_device *dev,
3140 unsigned int pref)
1da177e4 3141{
86872cb5 3142 struct fib6_config cfg = {
ca254490 3143 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3144 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3145 .fc_ifindex = dev->ifindex,
3146 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3147 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3148 .fc_protocol = RTPROT_RA,
15e47304 3149 .fc_nlinfo.portid = 0,
5578689a 3150 .fc_nlinfo.nlh = NULL,
c346dca1 3151 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 3152 };
1da177e4 3153
4e3fd7a0 3154 cfg.fc_gateway = *gwaddr;
1da177e4 3155
333c4301 3156 if (!ip6_route_add(&cfg, NULL)) {
830218c1
DA
3157 struct fib6_table *table;
3158
3159 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3160 if (table)
3161 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3162 }
1da177e4 3163
1da177e4
LT
3164 return rt6_get_dflt_router(gwaddr, dev);
3165}
3166
830218c1 3167static void __rt6_purge_dflt_routers(struct fib6_table *table)
1da177e4
LT
3168{
3169 struct rt6_info *rt;
3170
3171restart:
c71099ac 3172 read_lock_bh(&table->tb6_lock);
d8d1f30b 3173 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
3174 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3175 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d3843fe5
WW
3176 if (dst_hold_safe(&rt->dst)) {
3177 read_unlock_bh(&table->tb6_lock);
3178 ip6_del_rt(rt);
3179 } else {
3180 read_unlock_bh(&table->tb6_lock);
3181 }
1da177e4
LT
3182 goto restart;
3183 }
3184 }
c71099ac 3185 read_unlock_bh(&table->tb6_lock);
830218c1
DA
3186
3187 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3188}
3189
3190void rt6_purge_dflt_routers(struct net *net)
3191{
3192 struct fib6_table *table;
3193 struct hlist_head *head;
3194 unsigned int h;
3195
3196 rcu_read_lock();
3197
3198 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3199 head = &net->ipv6.fib_table_hash[h];
3200 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3201 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3202 __rt6_purge_dflt_routers(table);
3203 }
3204 }
3205
3206 rcu_read_unlock();
1da177e4
LT
3207}
3208
5578689a
DL
3209static void rtmsg_to_fib6_config(struct net *net,
3210 struct in6_rtmsg *rtmsg,
86872cb5
TG
3211 struct fib6_config *cfg)
3212{
3213 memset(cfg, 0, sizeof(*cfg));
3214
ca254490
DA
3215 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3216 : RT6_TABLE_MAIN;
86872cb5
TG
3217 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3218 cfg->fc_metric = rtmsg->rtmsg_metric;
3219 cfg->fc_expires = rtmsg->rtmsg_info;
3220 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3221 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3222 cfg->fc_flags = rtmsg->rtmsg_flags;
3223
5578689a 3224 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3225
4e3fd7a0
AD
3226 cfg->fc_dst = rtmsg->rtmsg_dst;
3227 cfg->fc_src = rtmsg->rtmsg_src;
3228 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3229}
3230
5578689a 3231int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3232{
86872cb5 3233 struct fib6_config cfg;
1da177e4
LT
3234 struct in6_rtmsg rtmsg;
3235 int err;
3236
67ba4152 3237 switch (cmd) {
1da177e4
LT
3238 case SIOCADDRT: /* Add a route */
3239 case SIOCDELRT: /* Delete a route */
af31f412 3240 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3241 return -EPERM;
3242 err = copy_from_user(&rtmsg, arg,
3243 sizeof(struct in6_rtmsg));
3244 if (err)
3245 return -EFAULT;
86872cb5 3246
5578689a 3247 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3248
1da177e4
LT
3249 rtnl_lock();
3250 switch (cmd) {
3251 case SIOCADDRT:
333c4301 3252 err = ip6_route_add(&cfg, NULL);
1da177e4
LT
3253 break;
3254 case SIOCDELRT:
333c4301 3255 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3256 break;
3257 default:
3258 err = -EINVAL;
3259 }
3260 rtnl_unlock();
3261
3262 return err;
3ff50b79 3263 }
1da177e4
LT
3264
3265 return -EINVAL;
3266}
3267
3268/*
3269 * Drop the packet on the floor
3270 */
3271
d5fdd6ba 3272static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3273{
612f09e8 3274 int type;
adf30907 3275 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3276 switch (ipstats_mib_noroutes) {
3277 case IPSTATS_MIB_INNOROUTES:
0660e03f 3278 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3279 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
3280 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3281 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3282 break;
3283 }
3284 /* FALLTHROUGH */
3285 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3286 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3287 ipstats_mib_noroutes);
612f09e8
YH
3288 break;
3289 }
3ffe533c 3290 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3291 kfree_skb(skb);
3292 return 0;
3293}
3294
9ce8ade0
TG
3295static int ip6_pkt_discard(struct sk_buff *skb)
3296{
612f09e8 3297 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3298}
3299
ede2059d 3300static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3301{
adf30907 3302 skb->dev = skb_dst(skb)->dev;
612f09e8 3303 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3304}
3305
9ce8ade0
TG
3306static int ip6_pkt_prohibit(struct sk_buff *skb)
3307{
612f09e8 3308 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3309}
3310
ede2059d 3311static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3312{
adf30907 3313 skb->dev = skb_dst(skb)->dev;
612f09e8 3314 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3315}
3316
1da177e4
LT
3317/*
3318 * Allocate a dst for local (unicast / anycast) address.
3319 */
3320
3321struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
3322 const struct in6_addr *addr,
8f031519 3323 bool anycast)
1da177e4 3324{
ca254490 3325 u32 tb_id;
c346dca1 3326 struct net *net = dev_net(idev->dev);
4832c30d 3327 struct net_device *dev = idev->dev;
5f02ce24
DA
3328 struct rt6_info *rt;
3329
5f02ce24 3330 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 3331 if (!rt)
1da177e4
LT
3332 return ERR_PTR(-ENOMEM);
3333
1da177e4
LT
3334 in6_dev_hold(idev);
3335
11d53b49 3336 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
3337 rt->dst.input = ip6_input;
3338 rt->dst.output = ip6_output;
1da177e4 3339 rt->rt6i_idev = idev;
1da177e4 3340
94b5e0f9 3341 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 3342 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
3343 if (anycast)
3344 rt->rt6i_flags |= RTF_ANYCAST;
3345 else
1da177e4 3346 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 3347
550bab42 3348 rt->rt6i_gateway = *addr;
4e3fd7a0 3349 rt->rt6i_dst.addr = *addr;
1da177e4 3350 rt->rt6i_dst.plen = 128;
ca254490
DA
3351 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3352 rt->rt6i_table = fib6_get_table(net, tb_id);
1da177e4 3353
1da177e4
LT
3354 return rt;
3355}
3356
c3968a85
DW
3357/* remove deleted ip from prefsrc entries */
3358struct arg_dev_net_ip {
3359 struct net_device *dev;
3360 struct net *net;
3361 struct in6_addr *addr;
3362};
3363
3364static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3365{
3366 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3367 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3368 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3369
d1918542 3370 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
3371 rt != net->ipv6.ip6_null_entry &&
3372 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
60006a48 3373 spin_lock_bh(&rt6_exception_lock);
c3968a85
DW
3374 /* remove prefsrc entry */
3375 rt->rt6i_prefsrc.plen = 0;
60006a48
WW
3376 /* need to update cache as well */
3377 rt6_exceptions_remove_prefsrc(rt);
3378 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3379 }
3380 return 0;
3381}
3382
3383void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3384{
3385 struct net *net = dev_net(ifp->idev->dev);
3386 struct arg_dev_net_ip adni = {
3387 .dev = ifp->idev->dev,
3388 .net = net,
3389 .addr = &ifp->addr,
3390 };
0c3584d5 3391 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3392}
3393
be7a010d 3394#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3395
3396/* Remove routers and update dst entries when gateway turn into host. */
3397static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3398{
3399 struct in6_addr *gateway = (struct in6_addr *)arg;
3400
2b760fcf
WW
3401 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3402 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
be7a010d
DJ
3403 return -1;
3404 }
b16cb459
WW
3405
3406 /* Further clean up cached routes in exception table.
3407 * This is needed because cached route may have a different
3408 * gateway than its 'parent' in the case of an ip redirect.
3409 */
3410 rt6_exceptions_clean_tohost(rt, gateway);
3411
be7a010d
DJ
3412 return 0;
3413}
3414
3415void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3416{
3417 fib6_clean_all(net, fib6_clean_tohost, gateway);
3418}
3419
8ed67789
DL
3420struct arg_dev_net {
3421 struct net_device *dev;
3422 struct net *net;
3423};
3424
a1a22c12 3425/* called with write lock held for table with rt */
1da177e4
LT
3426static int fib6_ifdown(struct rt6_info *rt, void *arg)
3427{
bc3ef660 3428 const struct arg_dev_net *adn = arg;
3429 const struct net_device *dev = adn->dev;
8ed67789 3430
d1918542 3431 if ((rt->dst.dev == dev || !dev) &&
a1a22c12
DA
3432 rt != adn->net->ipv6.ip6_null_entry &&
3433 (rt->rt6i_nsiblings == 0 ||
8397ed36 3434 (dev && netdev_unregistering(dev)) ||
a1a22c12 3435 !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
1da177e4 3436 return -1;
c159d30c 3437
1da177e4
LT
3438 return 0;
3439}
3440
f3db4851 3441void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 3442{
8ed67789
DL
3443 struct arg_dev_net adn = {
3444 .dev = dev,
3445 .net = net,
3446 };
3447
0c3584d5 3448 fib6_clean_all(net, fib6_ifdown, &adn);
e332bc67
EB
3449 if (dev)
3450 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
3451}
3452
95c96174 3453struct rt6_mtu_change_arg {
1da177e4 3454 struct net_device *dev;
95c96174 3455 unsigned int mtu;
1da177e4
LT
3456};
3457
3458static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3459{
3460 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3461 struct inet6_dev *idev;
3462
3463 /* In IPv6 pmtu discovery is not optional,
3464 so that RTAX_MTU lock cannot disable it.
3465 We still use this lock to block changes
3466 caused by addrconf/ndisc.
3467 */
3468
3469 idev = __in6_dev_get(arg->dev);
38308473 3470 if (!idev)
1da177e4
LT
3471 return 0;
3472
3473 /* For administrative MTU increase, there is no way to discover
3474 IPv6 PMTU increase, so PMTU increase should be updated here.
3475 Since RFC 1981 doesn't include administrative MTU increase
3476 update PMTU increase is a MUST. (i.e. jumbo frame)
3477 */
3478 /*
3479 If new MTU is less than route PMTU, this new MTU will be the
3480 lowest MTU in the path, update the route PMTU to reflect PMTU
3481 decreases; if new MTU is greater than route PMTU, and the
3482 old MTU is the lowest MTU in the path, update the route PMTU
3483 to reflect the increase. In this case if the other nodes' MTU
3484 also have the lowest MTU, TOO BIG MESSAGE will be lead to
67c408cf 3485 PMTU discovery.
1da177e4 3486 */
d1918542 3487 if (rt->dst.dev == arg->dev &&
fb56be83 3488 dst_metric_raw(&rt->dst, RTAX_MTU) &&
4b32b5ad 3489 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
f5bbe7ee 3490 spin_lock_bh(&rt6_exception_lock);
2b760fcf
WW
3491 if (dst_mtu(&rt->dst) >= arg->mtu ||
3492 (dst_mtu(&rt->dst) < arg->mtu &&
3493 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
4b32b5ad
MKL
3494 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
3495 }
f5bbe7ee
WW
3496 rt6_exceptions_update_pmtu(rt, arg->mtu);
3497 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 3498 }
1da177e4
LT
3499 return 0;
3500}
3501
95c96174 3502void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 3503{
c71099ac
TG
3504 struct rt6_mtu_change_arg arg = {
3505 .dev = dev,
3506 .mtu = mtu,
3507 };
1da177e4 3508
0c3584d5 3509 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
3510}
3511
ef7c79ed 3512static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 3513 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 3514 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 3515 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
3516 [RTA_PRIORITY] = { .type = NLA_U32 },
3517 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 3518 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 3519 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
3520 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
3521 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 3522 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 3523 [RTA_UID] = { .type = NLA_U32 },
3b45a410 3524 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
3525};
3526
3527static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
3528 struct fib6_config *cfg,
3529 struct netlink_ext_ack *extack)
1da177e4 3530{
86872cb5
TG
3531 struct rtmsg *rtm;
3532 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 3533 unsigned int pref;
86872cb5 3534 int err;
1da177e4 3535
fceb6435
JB
3536 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3537 NULL);
86872cb5
TG
3538 if (err < 0)
3539 goto errout;
1da177e4 3540
86872cb5
TG
3541 err = -EINVAL;
3542 rtm = nlmsg_data(nlh);
3543 memset(cfg, 0, sizeof(*cfg));
3544
3545 cfg->fc_table = rtm->rtm_table;
3546 cfg->fc_dst_len = rtm->rtm_dst_len;
3547 cfg->fc_src_len = rtm->rtm_src_len;
3548 cfg->fc_flags = RTF_UP;
3549 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 3550 cfg->fc_type = rtm->rtm_type;
86872cb5 3551
ef2c7d7b
ND
3552 if (rtm->rtm_type == RTN_UNREACHABLE ||
3553 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
3554 rtm->rtm_type == RTN_PROHIBIT ||
3555 rtm->rtm_type == RTN_THROW)
86872cb5
TG
3556 cfg->fc_flags |= RTF_REJECT;
3557
ab79ad14
3558 if (rtm->rtm_type == RTN_LOCAL)
3559 cfg->fc_flags |= RTF_LOCAL;
3560
1f56a01f
MKL
3561 if (rtm->rtm_flags & RTM_F_CLONED)
3562 cfg->fc_flags |= RTF_CACHE;
3563
15e47304 3564 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 3565 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 3566 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
3567
3568 if (tb[RTA_GATEWAY]) {
67b61f6c 3569 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 3570 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 3571 }
86872cb5
TG
3572
3573 if (tb[RTA_DST]) {
3574 int plen = (rtm->rtm_dst_len + 7) >> 3;
3575
3576 if (nla_len(tb[RTA_DST]) < plen)
3577 goto errout;
3578
3579 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 3580 }
86872cb5
TG
3581
3582 if (tb[RTA_SRC]) {
3583 int plen = (rtm->rtm_src_len + 7) >> 3;
3584
3585 if (nla_len(tb[RTA_SRC]) < plen)
3586 goto errout;
3587
3588 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 3589 }
86872cb5 3590
c3968a85 3591 if (tb[RTA_PREFSRC])
67b61f6c 3592 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 3593
86872cb5
TG
3594 if (tb[RTA_OIF])
3595 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3596
3597 if (tb[RTA_PRIORITY])
3598 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3599
3600 if (tb[RTA_METRICS]) {
3601 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3602 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 3603 }
86872cb5
TG
3604
3605 if (tb[RTA_TABLE])
3606 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3607
51ebd318
ND
3608 if (tb[RTA_MULTIPATH]) {
3609 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3610 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
3611
3612 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 3613 cfg->fc_mp_len, extack);
9ed59592
DA
3614 if (err < 0)
3615 goto errout;
51ebd318
ND
3616 }
3617
c78ba6d6
LR
3618 if (tb[RTA_PREF]) {
3619 pref = nla_get_u8(tb[RTA_PREF]);
3620 if (pref != ICMPV6_ROUTER_PREF_LOW &&
3621 pref != ICMPV6_ROUTER_PREF_HIGH)
3622 pref = ICMPV6_ROUTER_PREF_MEDIUM;
3623 cfg->fc_flags |= RTF_PREF(pref);
3624 }
3625
19e42e45
RP
3626 if (tb[RTA_ENCAP])
3627 cfg->fc_encap = tb[RTA_ENCAP];
3628
9ed59592 3629 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
3630 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3631
c255bd68 3632 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
3633 if (err < 0)
3634 goto errout;
3635 }
3636
32bc201e
XL
3637 if (tb[RTA_EXPIRES]) {
3638 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3639
3640 if (addrconf_finite_timeout(timeout)) {
3641 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3642 cfg->fc_flags |= RTF_EXPIRES;
3643 }
3644 }
3645
86872cb5
TG
3646 err = 0;
3647errout:
3648 return err;
1da177e4
LT
3649}
3650
6b9ea5a6
RP
3651struct rt6_nh {
3652 struct rt6_info *rt6_info;
3653 struct fib6_config r_cfg;
3654 struct mx6_config mxc;
3655 struct list_head next;
3656};
3657
3658static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3659{
3660 struct rt6_nh *nh;
3661
3662 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 3663 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
3664 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3665 nh->r_cfg.fc_ifindex);
3666 }
3667}
3668
3669static int ip6_route_info_append(struct list_head *rt6_nh_list,
3670 struct rt6_info *rt, struct fib6_config *r_cfg)
3671{
3672 struct rt6_nh *nh;
6b9ea5a6
RP
3673 int err = -EEXIST;
3674
3675 list_for_each_entry(nh, rt6_nh_list, next) {
3676 /* check if rt6_info already exists */
f06b7549 3677 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
6b9ea5a6
RP
3678 return err;
3679 }
3680
3681 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3682 if (!nh)
3683 return -ENOMEM;
3684 nh->rt6_info = rt;
3685 err = ip6_convert_metrics(&nh->mxc, r_cfg);
3686 if (err) {
3687 kfree(nh);
3688 return err;
3689 }
3690 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3691 list_add_tail(&nh->next, rt6_nh_list);
3692
3693 return 0;
3694}
3695
3b1137fe
DA
3696static void ip6_route_mpath_notify(struct rt6_info *rt,
3697 struct rt6_info *rt_last,
3698 struct nl_info *info,
3699 __u16 nlflags)
3700{
3701 /* if this is an APPEND route, then rt points to the first route
3702 * inserted and rt_last points to last route inserted. Userspace
3703 * wants a consistent dump of the route which starts at the first
3704 * nexthop. Since sibling routes are always added at the end of
3705 * the list, find the first sibling of the last route appended
3706 */
3707 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3708 rt = list_first_entry(&rt_last->rt6i_siblings,
3709 struct rt6_info,
3710 rt6i_siblings);
3711 }
3712
3713 if (rt)
3714 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3715}
3716
333c4301
DA
3717static int ip6_route_multipath_add(struct fib6_config *cfg,
3718 struct netlink_ext_ack *extack)
51ebd318 3719{
3b1137fe
DA
3720 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3721 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
3722 struct fib6_config r_cfg;
3723 struct rtnexthop *rtnh;
6b9ea5a6
RP
3724 struct rt6_info *rt;
3725 struct rt6_nh *err_nh;
3726 struct rt6_nh *nh, *nh_safe;
3b1137fe 3727 __u16 nlflags;
51ebd318
ND
3728 int remaining;
3729 int attrlen;
6b9ea5a6
RP
3730 int err = 1;
3731 int nhn = 0;
3732 int replace = (cfg->fc_nlinfo.nlh &&
3733 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3734 LIST_HEAD(rt6_nh_list);
51ebd318 3735
3b1137fe
DA
3736 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3737 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3738 nlflags |= NLM_F_APPEND;
3739
35f1b4e9 3740 remaining = cfg->fc_mp_len;
51ebd318 3741 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 3742
6b9ea5a6
RP
3743 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3744 * rt6_info structs per nexthop
3745 */
51ebd318
ND
3746 while (rtnh_ok(rtnh, remaining)) {
3747 memcpy(&r_cfg, cfg, sizeof(*cfg));
3748 if (rtnh->rtnh_ifindex)
3749 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3750
3751 attrlen = rtnh_attrlen(rtnh);
3752 if (attrlen > 0) {
3753 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3754
3755 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3756 if (nla) {
67b61f6c 3757 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
3758 r_cfg.fc_flags |= RTF_GATEWAY;
3759 }
19e42e45
RP
3760 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3761 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3762 if (nla)
3763 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 3764 }
6b9ea5a6 3765
333c4301 3766 rt = ip6_route_info_create(&r_cfg, extack);
8c5b83f0
RP
3767 if (IS_ERR(rt)) {
3768 err = PTR_ERR(rt);
3769 rt = NULL;
6b9ea5a6 3770 goto cleanup;
8c5b83f0 3771 }
6b9ea5a6
RP
3772
3773 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 3774 if (err) {
587fea74 3775 dst_release_immediate(&rt->dst);
6b9ea5a6
RP
3776 goto cleanup;
3777 }
3778
3779 rtnh = rtnh_next(rtnh, &remaining);
3780 }
3781
3b1137fe
DA
3782 /* for add and replace send one notification with all nexthops.
3783 * Skip the notification in fib6_add_rt2node and send one with
3784 * the full route when done
3785 */
3786 info->skip_notify = 1;
3787
6b9ea5a6
RP
3788 err_nh = NULL;
3789 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe 3790 rt_last = nh->rt6_info;
333c4301 3791 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3b1137fe
DA
3792 /* save reference to first route for notification */
3793 if (!rt_notif && !err)
3794 rt_notif = nh->rt6_info;
3795
6b9ea5a6
RP
3796 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3797 nh->rt6_info = NULL;
3798 if (err) {
3799 if (replace && nhn)
3800 ip6_print_replace_route_err(&rt6_nh_list);
3801 err_nh = nh;
3802 goto add_errout;
51ebd318 3803 }
6b9ea5a6 3804
1a72418b 3805 /* Because each route is added like a single route we remove
27596472
MK
3806 * these flags after the first nexthop: if there is a collision,
3807 * we have already failed to add the first nexthop:
3808 * fib6_add_rt2node() has rejected it; when replacing, old
3809 * nexthops have been replaced by first new, the rest should
3810 * be added to it.
1a72418b 3811 */
27596472
MK
3812 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3813 NLM_F_REPLACE);
6b9ea5a6
RP
3814 nhn++;
3815 }
3816
3b1137fe
DA
3817 /* success ... tell user about new route */
3818 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
3819 goto cleanup;
3820
3821add_errout:
3b1137fe
DA
3822 /* send notification for routes that were added so that
3823 * the delete notifications sent by ip6_route_del are
3824 * coherent
3825 */
3826 if (rt_notif)
3827 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3828
6b9ea5a6
RP
3829 /* Delete routes that were already added */
3830 list_for_each_entry(nh, &rt6_nh_list, next) {
3831 if (err_nh == nh)
3832 break;
333c4301 3833 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
3834 }
3835
3836cleanup:
3837 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
587fea74
WW
3838 if (nh->rt6_info)
3839 dst_release_immediate(&nh->rt6_info->dst);
52fe51f8 3840 kfree(nh->mxc.mx);
6b9ea5a6
RP
3841 list_del(&nh->next);
3842 kfree(nh);
3843 }
3844
3845 return err;
3846}
3847
333c4301
DA
3848static int ip6_route_multipath_del(struct fib6_config *cfg,
3849 struct netlink_ext_ack *extack)
6b9ea5a6
RP
3850{
3851 struct fib6_config r_cfg;
3852 struct rtnexthop *rtnh;
3853 int remaining;
3854 int attrlen;
3855 int err = 1, last_err = 0;
3856
3857 remaining = cfg->fc_mp_len;
3858 rtnh = (struct rtnexthop *)cfg->fc_mp;
3859
3860 /* Parse a Multipath Entry */
3861 while (rtnh_ok(rtnh, remaining)) {
3862 memcpy(&r_cfg, cfg, sizeof(*cfg));
3863 if (rtnh->rtnh_ifindex)
3864 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3865
3866 attrlen = rtnh_attrlen(rtnh);
3867 if (attrlen > 0) {
3868 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3869
3870 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3871 if (nla) {
3872 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3873 r_cfg.fc_flags |= RTF_GATEWAY;
3874 }
3875 }
333c4301 3876 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
3877 if (err)
3878 last_err = err;
3879
51ebd318
ND
3880 rtnh = rtnh_next(rtnh, &remaining);
3881 }
3882
3883 return last_err;
3884}
3885
c21ef3e3
DA
3886static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3887 struct netlink_ext_ack *extack)
1da177e4 3888{
86872cb5
TG
3889 struct fib6_config cfg;
3890 int err;
1da177e4 3891
333c4301 3892 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
3893 if (err < 0)
3894 return err;
3895
51ebd318 3896 if (cfg.fc_mp)
333c4301 3897 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
3898 else {
3899 cfg.fc_delete_all_nh = 1;
333c4301 3900 return ip6_route_del(&cfg, extack);
0ae81335 3901 }
1da177e4
LT
3902}
3903
c21ef3e3
DA
3904static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3905 struct netlink_ext_ack *extack)
1da177e4 3906{
86872cb5
TG
3907 struct fib6_config cfg;
3908 int err;
1da177e4 3909
333c4301 3910 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
3911 if (err < 0)
3912 return err;
3913
51ebd318 3914 if (cfg.fc_mp)
333c4301 3915 return ip6_route_multipath_add(&cfg, extack);
51ebd318 3916 else
333c4301 3917 return ip6_route_add(&cfg, extack);
1da177e4
LT
3918}
3919
beb1afac 3920static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 3921{
beb1afac
DA
3922 int nexthop_len = 0;
3923
3924 if (rt->rt6i_nsiblings) {
3925 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
3926 + NLA_ALIGN(sizeof(struct rtnexthop))
3927 + nla_total_size(16) /* RTA_GATEWAY */
beb1afac
DA
3928 + lwtunnel_get_encap_size(rt->dst.lwtstate);
3929
3930 nexthop_len *= rt->rt6i_nsiblings;
3931 }
3932
339bf98f
TG
3933 return NLMSG_ALIGN(sizeof(struct rtmsg))
3934 + nla_total_size(16) /* RTA_SRC */
3935 + nla_total_size(16) /* RTA_DST */
3936 + nla_total_size(16) /* RTA_GATEWAY */
3937 + nla_total_size(16) /* RTA_PREFSRC */
3938 + nla_total_size(4) /* RTA_TABLE */
3939 + nla_total_size(4) /* RTA_IIF */
3940 + nla_total_size(4) /* RTA_OIF */
3941 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3942 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3943 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3944 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3945 + nla_total_size(1) /* RTA_PREF */
beb1afac
DA
3946 + lwtunnel_get_encap_size(rt->dst.lwtstate)
3947 + nexthop_len;
3948}
3949
3950static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 3951 unsigned int *flags, bool skip_oif)
beb1afac
DA
3952{
3953 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3954 *flags |= RTNH_F_LINKDOWN;
3955 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3956 *flags |= RTNH_F_DEAD;
3957 }
3958
3959 if (rt->rt6i_flags & RTF_GATEWAY) {
3960 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3961 goto nla_put_failure;
3962 }
3963
fe400799 3964 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
3965 *flags |= RTNH_F_OFFLOAD;
3966
5be083ce
DA
3967 /* not needed for multipath encoding b/c it has a rtnexthop struct */
3968 if (!skip_oif && rt->dst.dev &&
beb1afac
DA
3969 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3970 goto nla_put_failure;
3971
3972 if (rt->dst.lwtstate &&
3973 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3974 goto nla_put_failure;
3975
3976 return 0;
3977
3978nla_put_failure:
3979 return -EMSGSIZE;
3980}
3981
5be083ce 3982/* add multipath next hop */
beb1afac
DA
3983static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3984{
3985 struct rtnexthop *rtnh;
3986 unsigned int flags = 0;
3987
3988 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3989 if (!rtnh)
3990 goto nla_put_failure;
3991
3992 rtnh->rtnh_hops = 0;
3993 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3994
5be083ce 3995 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
3996 goto nla_put_failure;
3997
3998 rtnh->rtnh_flags = flags;
3999
4000 /* length of rtnetlink header + attributes */
4001 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4002
4003 return 0;
4004
4005nla_put_failure:
4006 return -EMSGSIZE;
339bf98f
TG
4007}
4008
191cd582
BH
4009static int rt6_fill_node(struct net *net,
4010 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 4011 struct in6_addr *dst, struct in6_addr *src,
15e47304 4012 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4013 unsigned int flags)
1da177e4 4014{
4b32b5ad 4015 u32 metrics[RTAX_MAX];
1da177e4 4016 struct rtmsg *rtm;
2d7202bf 4017 struct nlmsghdr *nlh;
e3703b3d 4018 long expires;
9e762a4a 4019 u32 table;
1da177e4 4020
15e47304 4021 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4022 if (!nlh)
26932566 4023 return -EMSGSIZE;
2d7202bf
TG
4024
4025 rtm = nlmsg_data(nlh);
1da177e4
LT
4026 rtm->rtm_family = AF_INET6;
4027 rtm->rtm_dst_len = rt->rt6i_dst.plen;
4028 rtm->rtm_src_len = rt->rt6i_src.plen;
4029 rtm->rtm_tos = 0;
c71099ac 4030 if (rt->rt6i_table)
9e762a4a 4031 table = rt->rt6i_table->tb6_id;
c71099ac 4032 else
9e762a4a
PM
4033 table = RT6_TABLE_UNSPEC;
4034 rtm->rtm_table = table;
c78679e8
DM
4035 if (nla_put_u32(skb, RTA_TABLE, table))
4036 goto nla_put_failure;
ef2c7d7b
ND
4037 if (rt->rt6i_flags & RTF_REJECT) {
4038 switch (rt->dst.error) {
4039 case -EINVAL:
4040 rtm->rtm_type = RTN_BLACKHOLE;
4041 break;
4042 case -EACCES:
4043 rtm->rtm_type = RTN_PROHIBIT;
4044 break;
b4949ab2
ND
4045 case -EAGAIN:
4046 rtm->rtm_type = RTN_THROW;
4047 break;
ef2c7d7b
ND
4048 default:
4049 rtm->rtm_type = RTN_UNREACHABLE;
4050 break;
4051 }
4052 }
38308473 4053 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 4054 rtm->rtm_type = RTN_LOCAL;
4ee39733
DA
4055 else if (rt->rt6i_flags & RTF_ANYCAST)
4056 rtm->rtm_type = RTN_ANYCAST;
d1918542 4057 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
4058 rtm->rtm_type = RTN_LOCAL;
4059 else
4060 rtm->rtm_type = RTN_UNICAST;
4061 rtm->rtm_flags = 0;
4062 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4063 rtm->rtm_protocol = rt->rt6i_protocol;
1da177e4 4064
38308473 4065 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
4066 rtm->rtm_flags |= RTM_F_CLONED;
4067
4068 if (dst) {
930345ea 4069 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 4070 goto nla_put_failure;
1ab1457c 4071 rtm->rtm_dst_len = 128;
1da177e4 4072 } else if (rtm->rtm_dst_len)
930345ea 4073 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 4074 goto nla_put_failure;
1da177e4
LT
4075#ifdef CONFIG_IPV6_SUBTREES
4076 if (src) {
930345ea 4077 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4078 goto nla_put_failure;
1ab1457c 4079 rtm->rtm_src_len = 128;
c78679e8 4080 } else if (rtm->rtm_src_len &&
930345ea 4081 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 4082 goto nla_put_failure;
1da177e4 4083#endif
7bc570c8
YH
4084 if (iif) {
4085#ifdef CONFIG_IPV6_MROUTE
4086 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
4087 int err = ip6mr_get_route(net, skb, rtm, portid);
4088
4089 if (err == 0)
4090 return 0;
4091 if (err < 0)
4092 goto nla_put_failure;
7bc570c8
YH
4093 } else
4094#endif
c78679e8
DM
4095 if (nla_put_u32(skb, RTA_IIF, iif))
4096 goto nla_put_failure;
7bc570c8 4097 } else if (dst) {
1da177e4 4098 struct in6_addr saddr_buf;
c78679e8 4099 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 4100 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4101 goto nla_put_failure;
1da177e4 4102 }
2d7202bf 4103
c3968a85
DW
4104 if (rt->rt6i_prefsrc.plen) {
4105 struct in6_addr saddr_buf;
4e3fd7a0 4106 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 4107 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4108 goto nla_put_failure;
c3968a85
DW
4109 }
4110
4b32b5ad
MKL
4111 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
4112 if (rt->rt6i_pmtu)
4113 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
4114 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
4115 goto nla_put_failure;
4116
c78679e8
DM
4117 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4118 goto nla_put_failure;
8253947e 4119
beb1afac
DA
4120 /* For multipath routes, walk the siblings list and add
4121 * each as a nexthop within RTA_MULTIPATH.
4122 */
4123 if (rt->rt6i_nsiblings) {
4124 struct rt6_info *sibling, *next_sibling;
4125 struct nlattr *mp;
4126
4127 mp = nla_nest_start(skb, RTA_MULTIPATH);
4128 if (!mp)
4129 goto nla_put_failure;
4130
4131 if (rt6_add_nexthop(skb, rt) < 0)
4132 goto nla_put_failure;
4133
4134 list_for_each_entry_safe(sibling, next_sibling,
4135 &rt->rt6i_siblings, rt6i_siblings) {
4136 if (rt6_add_nexthop(skb, sibling) < 0)
4137 goto nla_put_failure;
4138 }
4139
4140 nla_nest_end(skb, mp);
4141 } else {
5be083ce 4142 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4143 goto nla_put_failure;
4144 }
4145
8253947e 4146 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 4147
87a50699 4148 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 4149 goto nla_put_failure;
2d7202bf 4150
c78ba6d6
LR
4151 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4152 goto nla_put_failure;
4153
19e42e45 4154
053c095a
JB
4155 nlmsg_end(skb, nlh);
4156 return 0;
2d7202bf
TG
4157
4158nla_put_failure:
26932566
PM
4159 nlmsg_cancel(skb, nlh);
4160 return -EMSGSIZE;
1da177e4
LT
4161}
4162
1b43af54 4163int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
4164{
4165 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4166 struct net *net = arg->net;
4167
4168 if (rt == net->ipv6.ip6_null_entry)
4169 return 0;
1da177e4 4170
2d7202bf
TG
4171 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4172 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4173
4174 /* user wants prefix routes only */
4175 if (rtm->rtm_flags & RTM_F_PREFIX &&
4176 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4177 /* success since this is not a prefix route */
4178 return 1;
4179 }
4180 }
1da177e4 4181
1f17e2f2 4182 return rt6_fill_node(net,
191cd582 4183 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 4184 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
f8cfe2ce 4185 NLM_F_MULTI);
1da177e4
LT
4186}
4187
c21ef3e3
DA
4188static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4189 struct netlink_ext_ack *extack)
1da177e4 4190{
3b1e0a65 4191 struct net *net = sock_net(in_skb->sk);
ab364a6f 4192 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
4193 int err, iif = 0, oif = 0;
4194 struct dst_entry *dst;
ab364a6f 4195 struct rt6_info *rt;
1da177e4 4196 struct sk_buff *skb;
ab364a6f 4197 struct rtmsg *rtm;
4c9483b2 4198 struct flowi6 fl6;
18c3a61c 4199 bool fibmatch;
1da177e4 4200
fceb6435 4201 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4202 extack);
ab364a6f
TG
4203 if (err < 0)
4204 goto errout;
1da177e4 4205
ab364a6f 4206 err = -EINVAL;
4c9483b2 4207 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4208 rtm = nlmsg_data(nlh);
4209 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4210 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4211
ab364a6f
TG
4212 if (tb[RTA_SRC]) {
4213 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4214 goto errout;
4215
4e3fd7a0 4216 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4217 }
4218
4219 if (tb[RTA_DST]) {
4220 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4221 goto errout;
4222
4e3fd7a0 4223 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4224 }
4225
4226 if (tb[RTA_IIF])
4227 iif = nla_get_u32(tb[RTA_IIF]);
4228
4229 if (tb[RTA_OIF])
72331bc0 4230 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4231
2e47b291
LC
4232 if (tb[RTA_MARK])
4233 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4234
622ec2c9
LC
4235 if (tb[RTA_UID])
4236 fl6.flowi6_uid = make_kuid(current_user_ns(),
4237 nla_get_u32(tb[RTA_UID]));
4238 else
4239 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4240
1da177e4
LT
4241 if (iif) {
4242 struct net_device *dev;
72331bc0
SL
4243 int flags = 0;
4244
121622db
FW
4245 rcu_read_lock();
4246
4247 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4248 if (!dev) {
121622db 4249 rcu_read_unlock();
1da177e4 4250 err = -ENODEV;
ab364a6f 4251 goto errout;
1da177e4 4252 }
72331bc0
SL
4253
4254 fl6.flowi6_iif = iif;
4255
4256 if (!ipv6_addr_any(&fl6.saddr))
4257 flags |= RT6_LOOKUP_F_HAS_SADDR;
4258
18c3a61c
RP
4259 if (!fibmatch)
4260 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
401481e0
AB
4261 else
4262 dst = ip6_route_lookup(net, &fl6, 0);
121622db
FW
4263
4264 rcu_read_unlock();
72331bc0
SL
4265 } else {
4266 fl6.flowi6_oif = oif;
4267
18c3a61c
RP
4268 if (!fibmatch)
4269 dst = ip6_route_output(net, NULL, &fl6);
401481e0
AB
4270 else
4271 dst = ip6_route_lookup(net, &fl6, 0);
18c3a61c
RP
4272 }
4273
18c3a61c
RP
4274
4275 rt = container_of(dst, struct rt6_info, dst);
4276 if (rt->dst.error) {
4277 err = rt->dst.error;
4278 ip6_rt_put(rt);
4279 goto errout;
1da177e4
LT
4280 }
4281
9d6acb3b
WC
4282 if (rt == net->ipv6.ip6_null_entry) {
4283 err = rt->dst.error;
4284 ip6_rt_put(rt);
4285 goto errout;
4286 }
4287
ab364a6f 4288 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4289 if (!skb) {
94e187c0 4290 ip6_rt_put(rt);
ab364a6f
TG
4291 err = -ENOBUFS;
4292 goto errout;
4293 }
1da177e4 4294
d8d1f30b 4295 skb_dst_set(skb, &rt->dst);
18c3a61c
RP
4296 if (fibmatch)
4297 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
4298 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4299 nlh->nlmsg_seq, 0);
4300 else
4301 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
4302 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4303 nlh->nlmsg_seq, 0);
1da177e4 4304 if (err < 0) {
ab364a6f
TG
4305 kfree_skb(skb);
4306 goto errout;
1da177e4
LT
4307 }
4308
15e47304 4309 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4310errout:
1da177e4 4311 return err;
1da177e4
LT
4312}
4313
37a1d361
RP
4314void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4315 unsigned int nlm_flags)
1da177e4
LT
4316{
4317 struct sk_buff *skb;
5578689a 4318 struct net *net = info->nl_net;
528c4ceb
DL
4319 u32 seq;
4320 int err;
4321
4322 err = -ENOBUFS;
38308473 4323 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4324
19e42e45 4325 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4326 if (!skb)
21713ebc
TG
4327 goto errout;
4328
191cd582 4329 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
f8cfe2ce 4330 event, info->portid, seq, nlm_flags);
26932566
PM
4331 if (err < 0) {
4332 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4333 WARN_ON(err == -EMSGSIZE);
4334 kfree_skb(skb);
4335 goto errout;
4336 }
15e47304 4337 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4338 info->nlh, gfp_any());
4339 return;
21713ebc
TG
4340errout:
4341 if (err < 0)
5578689a 4342 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4343}
4344
8ed67789 4345static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4346 unsigned long event, void *ptr)
8ed67789 4347{
351638e7 4348 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4349 struct net *net = dev_net(dev);
8ed67789 4350
242d3a49
WC
4351 if (!(dev->flags & IFF_LOOPBACK))
4352 return NOTIFY_OK;
4353
4354 if (event == NETDEV_REGISTER) {
d8d1f30b 4355 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4356 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4357#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4358 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4359 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4360 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4361 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4362#endif
76da0704
WC
4363 } else if (event == NETDEV_UNREGISTER &&
4364 dev->reg_state != NETREG_UNREGISTERED) {
4365 /* NETDEV_UNREGISTER could be fired for multiple times by
4366 * netdev_wait_allrefs(). Make sure we only call this once.
4367 */
12d94a80 4368 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4369#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4370 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4371 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4372#endif
4373 }
4374
4375 return NOTIFY_OK;
4376}
4377
1da177e4
LT
4378/*
4379 * /proc
4380 */
4381
4382#ifdef CONFIG_PROC_FS
4383
33120b30
AD
4384static const struct file_operations ipv6_route_proc_fops = {
4385 .owner = THIS_MODULE,
4386 .open = ipv6_route_open,
4387 .read = seq_read,
4388 .llseek = seq_lseek,
8d2ca1d7 4389 .release = seq_release_net,
33120b30
AD
4390};
4391
1da177e4
LT
4392static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4393{
69ddb805 4394 struct net *net = (struct net *)seq->private;
1da177e4 4395 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4396 net->ipv6.rt6_stats->fib_nodes,
4397 net->ipv6.rt6_stats->fib_route_nodes,
4398 net->ipv6.rt6_stats->fib_rt_alloc,
4399 net->ipv6.rt6_stats->fib_rt_entries,
4400 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4401 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4402 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4403
4404 return 0;
4405}
4406
4407static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4408{
de05c557 4409 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4410}
4411
9a32144e 4412static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4413 .owner = THIS_MODULE,
4414 .open = rt6_stats_seq_open,
4415 .read = seq_read,
4416 .llseek = seq_lseek,
b6fcbdb4 4417 .release = single_release_net,
1da177e4
LT
4418};
4419#endif /* CONFIG_PROC_FS */
4420
4421#ifdef CONFIG_SYSCTL
4422
1da177e4 4423static
fe2c6338 4424int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4425 void __user *buffer, size_t *lenp, loff_t *ppos)
4426{
c486da34
LAG
4427 struct net *net;
4428 int delay;
4429 if (!write)
1da177e4 4430 return -EINVAL;
c486da34
LAG
4431
4432 net = (struct net *)ctl->extra1;
4433 delay = net->ipv6.sysctl.flush_delay;
4434 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4435 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4436 return 0;
1da177e4
LT
4437}
4438
fe2c6338 4439struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4440 {
1da177e4 4441 .procname = "flush",
4990509f 4442 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4443 .maxlen = sizeof(int),
89c8b3a1 4444 .mode = 0200,
6d9f239a 4445 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4446 },
4447 {
1da177e4 4448 .procname = "gc_thresh",
9a7ec3a9 4449 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4450 .maxlen = sizeof(int),
4451 .mode = 0644,
6d9f239a 4452 .proc_handler = proc_dointvec,
1da177e4
LT
4453 },
4454 {
1da177e4 4455 .procname = "max_size",
4990509f 4456 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
4457 .maxlen = sizeof(int),
4458 .mode = 0644,
6d9f239a 4459 .proc_handler = proc_dointvec,
1da177e4
LT
4460 },
4461 {
1da177e4 4462 .procname = "gc_min_interval",
4990509f 4463 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4464 .maxlen = sizeof(int),
4465 .mode = 0644,
6d9f239a 4466 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4467 },
4468 {
1da177e4 4469 .procname = "gc_timeout",
4990509f 4470 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
4471 .maxlen = sizeof(int),
4472 .mode = 0644,
6d9f239a 4473 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4474 },
4475 {
1da177e4 4476 .procname = "gc_interval",
4990509f 4477 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
4478 .maxlen = sizeof(int),
4479 .mode = 0644,
6d9f239a 4480 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4481 },
4482 {
1da177e4 4483 .procname = "gc_elasticity",
4990509f 4484 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
4485 .maxlen = sizeof(int),
4486 .mode = 0644,
f3d3f616 4487 .proc_handler = proc_dointvec,
1da177e4
LT
4488 },
4489 {
1da177e4 4490 .procname = "mtu_expires",
4990509f 4491 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
4492 .maxlen = sizeof(int),
4493 .mode = 0644,
6d9f239a 4494 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4495 },
4496 {
1da177e4 4497 .procname = "min_adv_mss",
4990509f 4498 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
4499 .maxlen = sizeof(int),
4500 .mode = 0644,
f3d3f616 4501 .proc_handler = proc_dointvec,
1da177e4
LT
4502 },
4503 {
1da177e4 4504 .procname = "gc_min_interval_ms",
4990509f 4505 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4506 .maxlen = sizeof(int),
4507 .mode = 0644,
6d9f239a 4508 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 4509 },
f8572d8f 4510 { }
1da177e4
LT
4511};
4512
2c8c1e72 4513struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
4514{
4515 struct ctl_table *table;
4516
4517 table = kmemdup(ipv6_route_table_template,
4518 sizeof(ipv6_route_table_template),
4519 GFP_KERNEL);
5ee09105
YH
4520
4521 if (table) {
4522 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 4523 table[0].extra1 = net;
86393e52 4524 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
4525 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
4526 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4527 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
4528 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
4529 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
4530 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
4531 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 4532 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
4533
4534 /* Don't export sysctls to unprivileged users */
4535 if (net->user_ns != &init_user_ns)
4536 table[0].procname = NULL;
5ee09105
YH
4537 }
4538
760f2d01
DL
4539 return table;
4540}
1da177e4
LT
4541#endif
4542
2c8c1e72 4543static int __net_init ip6_route_net_init(struct net *net)
cdb18761 4544{
633d424b 4545 int ret = -ENOMEM;
8ed67789 4546
86393e52
AD
4547 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
4548 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 4549
fc66f95c
ED
4550 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
4551 goto out_ip6_dst_ops;
4552
8ed67789
DL
4553 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
4554 sizeof(*net->ipv6.ip6_null_entry),
4555 GFP_KERNEL);
4556 if (!net->ipv6.ip6_null_entry)
fc66f95c 4557 goto out_ip6_dst_entries;
d8d1f30b 4558 net->ipv6.ip6_null_entry->dst.path =
8ed67789 4559 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 4560 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4561 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4562 ip6_template_metrics, true);
8ed67789
DL
4563
4564#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 4565 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
4566 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4567 sizeof(*net->ipv6.ip6_prohibit_entry),
4568 GFP_KERNEL);
68fffc67
PZ
4569 if (!net->ipv6.ip6_prohibit_entry)
4570 goto out_ip6_null_entry;
d8d1f30b 4571 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 4572 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 4573 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4574 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4575 ip6_template_metrics, true);
8ed67789
DL
4576
4577 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4578 sizeof(*net->ipv6.ip6_blk_hole_entry),
4579 GFP_KERNEL);
68fffc67
PZ
4580 if (!net->ipv6.ip6_blk_hole_entry)
4581 goto out_ip6_prohibit_entry;
d8d1f30b 4582 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 4583 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 4584 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4585 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4586 ip6_template_metrics, true);
8ed67789
DL
4587#endif
4588
b339a47c
PZ
4589 net->ipv6.sysctl.flush_delay = 0;
4590 net->ipv6.sysctl.ip6_rt_max_size = 4096;
4591 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4592 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4593 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4594 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4595 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4596 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4597
6891a346
BT
4598 net->ipv6.ip6_rt_gc_expire = 30*HZ;
4599
8ed67789
DL
4600 ret = 0;
4601out:
4602 return ret;
f2fc6a54 4603
68fffc67
PZ
4604#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4605out_ip6_prohibit_entry:
4606 kfree(net->ipv6.ip6_prohibit_entry);
4607out_ip6_null_entry:
4608 kfree(net->ipv6.ip6_null_entry);
4609#endif
fc66f95c
ED
4610out_ip6_dst_entries:
4611 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 4612out_ip6_dst_ops:
f2fc6a54 4613 goto out;
cdb18761
DL
4614}
4615
2c8c1e72 4616static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 4617{
8ed67789
DL
4618 kfree(net->ipv6.ip6_null_entry);
4619#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4620 kfree(net->ipv6.ip6_prohibit_entry);
4621 kfree(net->ipv6.ip6_blk_hole_entry);
4622#endif
41bb78b4 4623 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
4624}
4625
d189634e
TG
4626static int __net_init ip6_route_net_init_late(struct net *net)
4627{
4628#ifdef CONFIG_PROC_FS
d4beaa66
G
4629 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4630 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
4631#endif
4632 return 0;
4633}
4634
4635static void __net_exit ip6_route_net_exit_late(struct net *net)
4636{
4637#ifdef CONFIG_PROC_FS
ece31ffd
G
4638 remove_proc_entry("ipv6_route", net->proc_net);
4639 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
4640#endif
4641}
4642
cdb18761
DL
4643static struct pernet_operations ip6_route_net_ops = {
4644 .init = ip6_route_net_init,
4645 .exit = ip6_route_net_exit,
4646};
4647
c3426b47
DM
4648static int __net_init ipv6_inetpeer_init(struct net *net)
4649{
4650 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4651
4652 if (!bp)
4653 return -ENOMEM;
4654 inet_peer_base_init(bp);
4655 net->ipv6.peers = bp;
4656 return 0;
4657}
4658
4659static void __net_exit ipv6_inetpeer_exit(struct net *net)
4660{
4661 struct inet_peer_base *bp = net->ipv6.peers;
4662
4663 net->ipv6.peers = NULL;
56a6b248 4664 inetpeer_invalidate_tree(bp);
c3426b47
DM
4665 kfree(bp);
4666}
4667
2b823f72 4668static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
4669 .init = ipv6_inetpeer_init,
4670 .exit = ipv6_inetpeer_exit,
4671};
4672
d189634e
TG
4673static struct pernet_operations ip6_route_net_late_ops = {
4674 .init = ip6_route_net_init_late,
4675 .exit = ip6_route_net_exit_late,
4676};
4677
8ed67789
DL
4678static struct notifier_block ip6_route_dev_notifier = {
4679 .notifier_call = ip6_route_dev_notify,
242d3a49 4680 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
4681};
4682
2f460933
WC
4683void __init ip6_route_init_special_entries(void)
4684{
4685 /* Registering of the loopback is done before this portion of code,
4686 * the loopback reference in rt6_info will not be taken, do it
4687 * manually for init_net */
4688 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4689 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4690 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4691 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4692 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4693 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4694 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4695 #endif
4696}
4697
433d49c3 4698int __init ip6_route_init(void)
1da177e4 4699{
433d49c3 4700 int ret;
8d0b94af 4701 int cpu;
433d49c3 4702
9a7ec3a9
DL
4703 ret = -ENOMEM;
4704 ip6_dst_ops_template.kmem_cachep =
e5d679f3 4705 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 4706 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 4707 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 4708 goto out;
14e50e57 4709
fc66f95c 4710 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 4711 if (ret)
bdb3289f 4712 goto out_kmem_cache;
bdb3289f 4713
c3426b47
DM
4714 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4715 if (ret)
e8803b6c 4716 goto out_dst_entries;
2a0c451a 4717
7e52b33b
DM
4718 ret = register_pernet_subsys(&ip6_route_net_ops);
4719 if (ret)
4720 goto out_register_inetpeer;
c3426b47 4721
5dc121e9
AE
4722 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4723
e8803b6c 4724 ret = fib6_init();
433d49c3 4725 if (ret)
8ed67789 4726 goto out_register_subsys;
433d49c3 4727
433d49c3
DL
4728 ret = xfrm6_init();
4729 if (ret)
e8803b6c 4730 goto out_fib6_init;
c35b7e72 4731
433d49c3
DL
4732 ret = fib6_rules_init();
4733 if (ret)
4734 goto xfrm6_init;
7e5449c2 4735
d189634e
TG
4736 ret = register_pernet_subsys(&ip6_route_net_late_ops);
4737 if (ret)
4738 goto fib6_rules_init;
4739
433d49c3 4740 ret = -ENOBUFS;
b97bac64
FW
4741 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
4742 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
e3a22b7f
FW
4743 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
4744 RTNL_FLAG_DOIT_UNLOCKED))
d189634e 4745 goto out_register_late_subsys;
c127ea2c 4746
8ed67789 4747 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 4748 if (ret)
d189634e 4749 goto out_register_late_subsys;
8ed67789 4750
8d0b94af
MKL
4751 for_each_possible_cpu(cpu) {
4752 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4753
4754 INIT_LIST_HEAD(&ul->head);
4755 spin_lock_init(&ul->lock);
4756 }
4757
433d49c3
DL
4758out:
4759 return ret;
4760
d189634e
TG
4761out_register_late_subsys:
4762 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 4763fib6_rules_init:
433d49c3
DL
4764 fib6_rules_cleanup();
4765xfrm6_init:
433d49c3 4766 xfrm6_fini();
2a0c451a
TG
4767out_fib6_init:
4768 fib6_gc_cleanup();
8ed67789
DL
4769out_register_subsys:
4770 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
4771out_register_inetpeer:
4772 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
4773out_dst_entries:
4774 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 4775out_kmem_cache:
f2fc6a54 4776 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 4777 goto out;
1da177e4
LT
4778}
4779
4780void ip6_route_cleanup(void)
4781{
8ed67789 4782 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 4783 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 4784 fib6_rules_cleanup();
1da177e4 4785 xfrm6_fini();
1da177e4 4786 fib6_gc_cleanup();
c3426b47 4787 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 4788 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 4789 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 4790 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 4791}