net/ipv6: Create a neigh_lookup for FIB entries
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
52bd4c0c 99static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3 100static size_t rt6_nlmsg_size(struct rt6_info *rt);
d4ead6b3
DA
101static int rt6_fill_node(struct net *net, struct sk_buff *skb,
102 struct rt6_info *rt, struct dst_entry *dst,
103 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
104 int iif, int type, u32 portid, u32 seq,
105 unsigned int flags);
35732d01
WW
106static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
107 struct in6_addr *daddr,
108 struct in6_addr *saddr);
1da177e4 109
70ceb4f5 110#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 111static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 112 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
113 const struct in6_addr *gwaddr,
114 struct net_device *dev,
95c96174 115 unsigned int pref);
efa2cea0 116static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 117 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
118 const struct in6_addr *gwaddr,
119 struct net_device *dev);
70ceb4f5
YH
120#endif
121
8d0b94af
MKL
122struct uncached_list {
123 spinlock_t lock;
124 struct list_head head;
125};
126
127static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
128
510c321b 129void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
130{
131 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
132
8d0b94af
MKL
133 rt->rt6i_uncached_list = ul;
134
135 spin_lock_bh(&ul->lock);
136 list_add_tail(&rt->rt6i_uncached, &ul->head);
137 spin_unlock_bh(&ul->lock);
138}
139
510c321b 140void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
141{
142 if (!list_empty(&rt->rt6i_uncached)) {
143 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 144 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
145
146 spin_lock_bh(&ul->lock);
147 list_del(&rt->rt6i_uncached);
81eb8447 148 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
149 spin_unlock_bh(&ul->lock);
150 }
151}
152
153static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154{
155 struct net_device *loopback_dev = net->loopback_dev;
156 int cpu;
157
e332bc67
EB
158 if (dev == loopback_dev)
159 return;
160
8d0b94af
MKL
161 for_each_possible_cpu(cpu) {
162 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 struct rt6_info *rt;
164
165 spin_lock_bh(&ul->lock);
166 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 struct inet6_dev *rt_idev = rt->rt6i_idev;
168 struct net_device *rt_dev = rt->dst.dev;
169
e332bc67 170 if (rt_idev->dev == dev) {
8d0b94af
MKL
171 rt->rt6i_idev = in6_dev_get(loopback_dev);
172 in6_dev_put(rt_idev);
173 }
174
e332bc67 175 if (rt_dev == dev) {
8d0b94af
MKL
176 rt->dst.dev = loopback_dev;
177 dev_hold(rt->dst.dev);
178 dev_put(rt_dev);
179 }
180 }
181 spin_unlock_bh(&ul->lock);
182 }
183}
184
f8a1b43b 185static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
186 struct sk_buff *skb,
187 const void *daddr)
39232973 188{
a7563f34 189 if (!ipv6_addr_any(p))
39232973 190 return (const void *) p;
f894cbf8
DM
191 else if (skb)
192 return &ipv6_hdr(skb)->daddr;
39232973
DM
193 return daddr;
194}
195
f8a1b43b
DA
196struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
197 struct net_device *dev,
198 struct sk_buff *skb,
199 const void *daddr)
d3aaeb38 200{
39232973
DM
201 struct neighbour *n;
202
f8a1b43b
DA
203 daddr = choose_neigh_daddr(gw, skb, daddr);
204 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
205 if (n)
206 return n;
f8a1b43b
DA
207 return neigh_create(&nd_tbl, daddr, dev);
208}
209
210static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
211 struct sk_buff *skb,
212 const void *daddr)
213{
214 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
215
216 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
217}
218
63fca65d
JA
219static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
220{
221 struct net_device *dev = dst->dev;
222 struct rt6_info *rt = (struct rt6_info *)dst;
223
f8a1b43b 224 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
225 if (!daddr)
226 return;
227 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
228 return;
229 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
230 return;
231 __ipv6_confirm_neigh(dev, daddr);
232}
233
9a7ec3a9 234static struct dst_ops ip6_dst_ops_template = {
1da177e4 235 .family = AF_INET6,
1da177e4
LT
236 .gc = ip6_dst_gc,
237 .gc_thresh = 1024,
238 .check = ip6_dst_check,
0dbaee3b 239 .default_advmss = ip6_default_advmss,
ebb762f2 240 .mtu = ip6_mtu,
d4ead6b3 241 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
242 .destroy = ip6_dst_destroy,
243 .ifdown = ip6_dst_ifdown,
244 .negative_advice = ip6_negative_advice,
245 .link_failure = ip6_link_failure,
246 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 247 .redirect = rt6_do_redirect,
9f8955cc 248 .local_out = __ip6_local_out,
f8a1b43b 249 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 250 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
251};
252
ebb762f2 253static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 254{
618f9bc7
SK
255 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
256
257 return mtu ? : dst->dev->mtu;
ec831ea7
RD
258}
259
6700c270
DM
260static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
261 struct sk_buff *skb, u32 mtu)
14e50e57
DM
262{
263}
264
6700c270
DM
265static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
266 struct sk_buff *skb)
b587ee3b
DM
267{
268}
269
14e50e57
DM
270static struct dst_ops ip6_dst_blackhole_ops = {
271 .family = AF_INET6,
14e50e57
DM
272 .destroy = ip6_dst_destroy,
273 .check = ip6_dst_check,
ebb762f2 274 .mtu = ip6_blackhole_mtu,
214f45c9 275 .default_advmss = ip6_default_advmss,
14e50e57 276 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 277 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 278 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 279 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
280};
281
62fa8a84 282static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 283 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
284};
285
421842ed
DA
286static const struct rt6_info fib6_null_entry_template = {
287 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
288 .rt6i_protocol = RTPROT_KERNEL,
289 .rt6i_metric = ~(u32)0,
290 .rt6i_ref = ATOMIC_INIT(1),
291 .fib6_type = RTN_UNREACHABLE,
292 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
293};
294
fb0af4c7 295static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
296 .dst = {
297 .__refcnt = ATOMIC_INIT(1),
298 .__use = 1,
2c20cbd7 299 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 300 .error = -ENETUNREACH,
d8d1f30b
CG
301 .input = ip6_pkt_discard,
302 .output = ip6_pkt_discard_out,
1da177e4
LT
303 },
304 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 305 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
306 .rt6i_metric = ~(u32) 0,
307 .rt6i_ref = ATOMIC_INIT(1),
e8478e80 308 .fib6_type = RTN_UNREACHABLE,
1da177e4
LT
309};
310
101367c2
TG
311#ifdef CONFIG_IPV6_MULTIPLE_TABLES
312
fb0af4c7 313static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
314 .dst = {
315 .__refcnt = ATOMIC_INIT(1),
316 .__use = 1,
2c20cbd7 317 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 318 .error = -EACCES,
d8d1f30b
CG
319 .input = ip6_pkt_prohibit,
320 .output = ip6_pkt_prohibit_out,
101367c2
TG
321 },
322 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 323 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
324 .rt6i_metric = ~(u32) 0,
325 .rt6i_ref = ATOMIC_INIT(1),
e8478e80 326 .fib6_type = RTN_PROHIBIT,
101367c2
TG
327};
328
fb0af4c7 329static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
330 .dst = {
331 .__refcnt = ATOMIC_INIT(1),
332 .__use = 1,
2c20cbd7 333 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 334 .error = -EINVAL,
d8d1f30b 335 .input = dst_discard,
ede2059d 336 .output = dst_discard_out,
101367c2
TG
337 },
338 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 339 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
340 .rt6i_metric = ~(u32) 0,
341 .rt6i_ref = ATOMIC_INIT(1),
e8478e80 342 .fib6_type = RTN_BLACKHOLE,
101367c2
TG
343};
344
345#endif
346
ebfa45f0
MKL
347static void rt6_info_init(struct rt6_info *rt)
348{
349 struct dst_entry *dst = &rt->dst;
350
351 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
352 INIT_LIST_HEAD(&rt->rt6i_siblings);
353 INIT_LIST_HEAD(&rt->rt6i_uncached);
d4ead6b3 354 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
ebfa45f0
MKL
355}
356
1da177e4 357/* allocate dst with ip6_dst_ops */
d52d3997
MKL
358static struct rt6_info *__ip6_dst_alloc(struct net *net,
359 struct net_device *dev,
ad706862 360 int flags)
1da177e4 361{
97bab73f 362 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 363 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 364
81eb8447 365 if (rt) {
ebfa45f0 366 rt6_info_init(rt);
81eb8447
WW
367 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
368 }
8104891b 369
cf911662 370 return rt;
1da177e4
LT
371}
372
9ab179d8
DA
373struct rt6_info *ip6_dst_alloc(struct net *net,
374 struct net_device *dev,
375 int flags)
d52d3997 376{
ad706862 377 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
378
379 if (rt) {
380 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
bfd8e5a4 381 if (!rt->rt6i_pcpu) {
587fea74 382 dst_release_immediate(&rt->dst);
d52d3997
MKL
383 return NULL;
384 }
385 }
386
387 return rt;
388}
9ab179d8 389EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 390
1da177e4
LT
391static void ip6_dst_destroy(struct dst_entry *dst)
392{
393 struct rt6_info *rt = (struct rt6_info *)dst;
35732d01 394 struct rt6_exception_bucket *bucket;
3a2232e9 395 struct rt6_info *from = rt->from;
8d0b94af 396 struct inet6_dev *idev;
d4ead6b3 397 struct dst_metrics *m;
1da177e4 398
4b32b5ad 399 dst_destroy_metrics_generic(dst);
87775312 400 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
401 rt6_uncached_list_del(rt);
402
403 idev = rt->rt6i_idev;
38308473 404 if (idev) {
1da177e4
LT
405 rt->rt6i_idev = NULL;
406 in6_dev_put(idev);
1ab1457c 407 }
35732d01
WW
408 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
409 if (bucket) {
410 rt->rt6i_exception_bucket = NULL;
411 kfree(bucket);
412 }
1716a961 413
d4ead6b3
DA
414 m = rt->fib6_metrics;
415 if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
416 kfree(m);
417
3a2232e9
DM
418 rt->from = NULL;
419 dst_release(&from->dst);
b3419363
DM
420}
421
1da177e4
LT
422static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
423 int how)
424{
425 struct rt6_info *rt = (struct rt6_info *)dst;
426 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 427 struct net_device *loopback_dev =
c346dca1 428 dev_net(dev)->loopback_dev;
1da177e4 429
e5645f51
WW
430 if (idev && idev->dev != loopback_dev) {
431 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
432 if (loopback_idev) {
433 rt->rt6i_idev = loopback_idev;
434 in6_dev_put(idev);
97cac082 435 }
1da177e4
LT
436 }
437}
438
5973fb1e
MKL
439static bool __rt6_check_expired(const struct rt6_info *rt)
440{
441 if (rt->rt6i_flags & RTF_EXPIRES)
442 return time_after(jiffies, rt->dst.expires);
443 else
444 return false;
445}
446
a50feda5 447static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 448{
1716a961
G
449 if (rt->rt6i_flags & RTF_EXPIRES) {
450 if (time_after(jiffies, rt->dst.expires))
a50feda5 451 return true;
3a2232e9 452 } else if (rt->from) {
1e2ea8ad 453 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
14895687 454 fib6_check_expired(rt->from);
1716a961 455 }
a50feda5 456 return false;
1da177e4
LT
457}
458
b4bac172
DA
459static struct rt6_info *rt6_multipath_select(const struct net *net,
460 struct rt6_info *match,
52bd4c0c 461 struct flowi6 *fl6, int oif,
b75cc8f9 462 const struct sk_buff *skb,
52bd4c0c 463 int strict)
51ebd318
ND
464{
465 struct rt6_info *sibling, *next_sibling;
51ebd318 466
b673d6cc
JS
467 /* We might have already computed the hash for ICMPv6 errors. In such
468 * case it will always be non-zero. Otherwise now is the time to do it.
469 */
470 if (!fl6->mp_hash)
b4bac172 471 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 472
5e670d84 473 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
3d709f69
IS
474 return match;
475
476 list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
477 rt6i_siblings) {
5e670d84
DA
478 int nh_upper_bound;
479
480 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
481 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
482 continue;
483 if (rt6_score_route(sibling, oif, strict) < 0)
484 break;
485 match = sibling;
486 break;
487 }
488
51ebd318
ND
489 return match;
490}
491
1da177e4 492/*
66f5d6ce 493 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
494 */
495
8ed67789
DL
496static inline struct rt6_info *rt6_device_match(struct net *net,
497 struct rt6_info *rt,
b71d1d42 498 const struct in6_addr *saddr,
1da177e4 499 int oif,
d420895e 500 int flags)
1da177e4
LT
501{
502 struct rt6_info *local = NULL;
503 struct rt6_info *sprt;
504
5e670d84
DA
505 if (!oif && ipv6_addr_any(saddr) &&
506 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
8067bb8c 507 return rt;
dd3abc4e 508
071fb37e 509 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
5e670d84 510 const struct net_device *dev = sprt->fib6_nh.nh_dev;
dd3abc4e 511
5e670d84 512 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
513 continue;
514
dd3abc4e 515 if (oif) {
1da177e4
LT
516 if (dev->ifindex == oif)
517 return sprt;
518 if (dev->flags & IFF_LOOPBACK) {
38308473 519 if (!sprt->rt6i_idev ||
1da177e4 520 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 521 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 522 continue;
17fb0b2b
DA
523 if (local &&
524 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
525 continue;
526 }
527 local = sprt;
528 }
dd3abc4e
YH
529 } else {
530 if (ipv6_chk_addr(net, saddr, dev,
531 flags & RT6_LOOKUP_F_IFACE))
532 return sprt;
1da177e4 533 }
dd3abc4e 534 }
1da177e4 535
dd3abc4e 536 if (oif) {
1da177e4
LT
537 if (local)
538 return local;
539
d420895e 540 if (flags & RT6_LOOKUP_F_IFACE)
421842ed 541 return net->ipv6.fib6_null_entry;
1da177e4 542 }
8067bb8c 543
421842ed 544 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
545}
546
27097255 547#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
548struct __rt6_probe_work {
549 struct work_struct work;
550 struct in6_addr target;
551 struct net_device *dev;
552};
553
554static void rt6_probe_deferred(struct work_struct *w)
555{
556 struct in6_addr mcaddr;
557 struct __rt6_probe_work *work =
558 container_of(w, struct __rt6_probe_work, work);
559
560 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 561 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 562 dev_put(work->dev);
662f5533 563 kfree(work);
c2f17e82
HFS
564}
565
27097255
YH
566static void rt6_probe(struct rt6_info *rt)
567{
990edb42 568 struct __rt6_probe_work *work;
5e670d84 569 const struct in6_addr *nh_gw;
f2c31e32 570 struct neighbour *neigh;
5e670d84
DA
571 struct net_device *dev;
572
27097255
YH
573 /*
574 * Okay, this does not seem to be appropriate
575 * for now, however, we need to check if it
576 * is really so; aka Router Reachability Probing.
577 *
578 * Router Reachability Probe MUST be rate-limited
579 * to no more than one per minute.
580 */
2152caea 581 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 582 return;
5e670d84
DA
583
584 nh_gw = &rt->fib6_nh.nh_gw;
585 dev = rt->fib6_nh.nh_dev;
2152caea 586 rcu_read_lock_bh();
5e670d84 587 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 588 if (neigh) {
8d6c31bf
MKL
589 if (neigh->nud_state & NUD_VALID)
590 goto out;
591
990edb42 592 work = NULL;
2152caea 593 write_lock(&neigh->lock);
990edb42
MKL
594 if (!(neigh->nud_state & NUD_VALID) &&
595 time_after(jiffies,
596 neigh->updated +
597 rt->rt6i_idev->cnf.rtr_probe_interval)) {
598 work = kmalloc(sizeof(*work), GFP_ATOMIC);
599 if (work)
600 __neigh_set_probe_once(neigh);
c2f17e82 601 }
2152caea 602 write_unlock(&neigh->lock);
990edb42
MKL
603 } else {
604 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 605 }
990edb42
MKL
606
607 if (work) {
608 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
609 work->target = *nh_gw;
610 dev_hold(dev);
611 work->dev = dev;
990edb42
MKL
612 schedule_work(&work->work);
613 }
614
8d6c31bf 615out:
2152caea 616 rcu_read_unlock_bh();
27097255
YH
617}
618#else
619static inline void rt6_probe(struct rt6_info *rt)
620{
27097255
YH
621}
622#endif
623
1da177e4 624/*
554cfb7e 625 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 626 */
b6f99a21 627static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 628{
5e670d84
DA
629 const struct net_device *dev = rt->fib6_nh.nh_dev;
630
161980f4 631 if (!oif || dev->ifindex == oif)
554cfb7e 632 return 2;
161980f4
DM
633 if ((dev->flags & IFF_LOOPBACK) &&
634 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
635 return 1;
636 return 0;
554cfb7e 637}
1da177e4 638
afc154e9 639static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 640{
afc154e9 641 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 642 struct neighbour *neigh;
f2c31e32 643
4d0c5911
YH
644 if (rt->rt6i_flags & RTF_NONEXTHOP ||
645 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 646 return RT6_NUD_SUCCEED;
145a3621
YH
647
648 rcu_read_lock_bh();
5e670d84
DA
649 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
650 &rt->fib6_nh.nh_gw);
145a3621
YH
651 if (neigh) {
652 read_lock(&neigh->lock);
554cfb7e 653 if (neigh->nud_state & NUD_VALID)
afc154e9 654 ret = RT6_NUD_SUCCEED;
398bcbeb 655#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 656 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 657 ret = RT6_NUD_SUCCEED;
7e980569
JB
658 else
659 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 660#endif
145a3621 661 read_unlock(&neigh->lock);
afc154e9
HFS
662 } else {
663 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 664 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 665 }
145a3621
YH
666 rcu_read_unlock_bh();
667
a5a81f0b 668 return ret;
1da177e4
LT
669}
670
554cfb7e
YH
671static int rt6_score_route(struct rt6_info *rt, int oif,
672 int strict)
1da177e4 673{
a5a81f0b 674 int m;
1ab1457c 675
4d0c5911 676 m = rt6_check_dev(rt, oif);
77d16f45 677 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 678 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
679#ifdef CONFIG_IPV6_ROUTER_PREF
680 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
681#endif
afc154e9
HFS
682 if (strict & RT6_LOOKUP_F_REACHABLE) {
683 int n = rt6_check_neigh(rt);
684 if (n < 0)
685 return n;
686 }
554cfb7e
YH
687 return m;
688}
689
f11e6659 690static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
691 int *mpri, struct rt6_info *match,
692 bool *do_rr)
554cfb7e 693{
f11e6659 694 int m;
afc154e9 695 bool match_do_rr = false;
35103d11 696 struct inet6_dev *idev = rt->rt6i_idev;
35103d11 697
5e670d84 698 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
699 goto out;
700
14c5206c 701 if (idev->cnf.ignore_routes_with_linkdown &&
5e670d84 702 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 703 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 704 goto out;
f11e6659 705
14895687 706 if (fib6_check_expired(rt))
f11e6659
DM
707 goto out;
708
709 m = rt6_score_route(rt, oif, strict);
7e980569 710 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
711 match_do_rr = true;
712 m = 0; /* lowest valid score */
7e980569 713 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 714 goto out;
afc154e9
HFS
715 }
716
717 if (strict & RT6_LOOKUP_F_REACHABLE)
718 rt6_probe(rt);
f11e6659 719
7e980569 720 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 721 if (m > *mpri) {
afc154e9 722 *do_rr = match_do_rr;
f11e6659
DM
723 *mpri = m;
724 match = rt;
f11e6659 725 }
f11e6659
DM
726out:
727 return match;
728}
729
730static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
8d1040e8 731 struct rt6_info *leaf,
f11e6659 732 struct rt6_info *rr_head,
afc154e9
HFS
733 u32 metric, int oif, int strict,
734 bool *do_rr)
f11e6659 735{
9fbdcfaf 736 struct rt6_info *rt, *match, *cont;
554cfb7e 737 int mpri = -1;
1da177e4 738
f11e6659 739 match = NULL;
9fbdcfaf 740 cont = NULL;
071fb37e 741 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
742 if (rt->rt6i_metric != metric) {
743 cont = rt;
744 break;
745 }
746
747 match = find_match(rt, oif, strict, &mpri, match, do_rr);
748 }
749
66f5d6ce 750 for (rt = leaf; rt && rt != rr_head;
071fb37e 751 rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
752 if (rt->rt6i_metric != metric) {
753 cont = rt;
754 break;
755 }
756
afc154e9 757 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
758 }
759
760 if (match || !cont)
761 return match;
762
071fb37e 763 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
afc154e9 764 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 765
f11e6659
DM
766 return match;
767}
1da177e4 768
8d1040e8
WW
769static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
770 int oif, int strict)
f11e6659 771{
66f5d6ce 772 struct rt6_info *leaf = rcu_dereference(fn->leaf);
f11e6659 773 struct rt6_info *match, *rt0;
afc154e9 774 bool do_rr = false;
17ecf590 775 int key_plen;
1da177e4 776
421842ed
DA
777 if (!leaf || leaf == net->ipv6.fib6_null_entry)
778 return net->ipv6.fib6_null_entry;
8d1040e8 779
66f5d6ce 780 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 781 if (!rt0)
66f5d6ce 782 rt0 = leaf;
1da177e4 783
17ecf590
WW
784 /* Double check to make sure fn is not an intermediate node
785 * and fn->leaf does not points to its child's leaf
786 * (This might happen if all routes under fn are deleted from
787 * the tree and fib6_repair_tree() is called on the node.)
788 */
789 key_plen = rt0->rt6i_dst.plen;
790#ifdef CONFIG_IPV6_SUBTREES
791 if (rt0->rt6i_src.plen)
792 key_plen = rt0->rt6i_src.plen;
793#endif
794 if (fn->fn_bit != key_plen)
421842ed 795 return net->ipv6.fib6_null_entry;
17ecf590 796
8d1040e8 797 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
afc154e9 798 &do_rr);
1da177e4 799
afc154e9 800 if (do_rr) {
071fb37e 801 struct rt6_info *next = rcu_dereference(rt0->rt6_next);
f11e6659 802
554cfb7e 803 /* no entries matched; do round-robin */
f11e6659 804 if (!next || next->rt6i_metric != rt0->rt6i_metric)
8d1040e8 805 next = leaf;
f11e6659 806
66f5d6ce
WW
807 if (next != rt0) {
808 spin_lock_bh(&leaf->rt6i_table->tb6_lock);
809 /* make sure next is not being deleted from the tree */
810 if (next->rt6i_node)
811 rcu_assign_pointer(fn->rr_ptr, next);
812 spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
813 }
1da177e4 814 }
1da177e4 815
421842ed 816 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
817}
818
8b9df265
MKL
819static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
820{
821 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
822}
823
70ceb4f5
YH
824#ifdef CONFIG_IPV6_ROUTE_INFO
825int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 826 const struct in6_addr *gwaddr)
70ceb4f5 827{
c346dca1 828 struct net *net = dev_net(dev);
70ceb4f5
YH
829 struct route_info *rinfo = (struct route_info *) opt;
830 struct in6_addr prefix_buf, *prefix;
831 unsigned int pref;
4bed72e4 832 unsigned long lifetime;
70ceb4f5
YH
833 struct rt6_info *rt;
834
835 if (len < sizeof(struct route_info)) {
836 return -EINVAL;
837 }
838
839 /* Sanity check for prefix_len and length */
840 if (rinfo->length > 3) {
841 return -EINVAL;
842 } else if (rinfo->prefix_len > 128) {
843 return -EINVAL;
844 } else if (rinfo->prefix_len > 64) {
845 if (rinfo->length < 2) {
846 return -EINVAL;
847 }
848 } else if (rinfo->prefix_len > 0) {
849 if (rinfo->length < 1) {
850 return -EINVAL;
851 }
852 }
853
854 pref = rinfo->route_pref;
855 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 856 return -EINVAL;
70ceb4f5 857
4bed72e4 858 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
859
860 if (rinfo->length == 3)
861 prefix = (struct in6_addr *)rinfo->prefix;
862 else {
863 /* this function is safe */
864 ipv6_addr_prefix(&prefix_buf,
865 (struct in6_addr *)rinfo->prefix,
866 rinfo->prefix_len);
867 prefix = &prefix_buf;
868 }
869
f104a567 870 if (rinfo->prefix_len == 0)
afb1d4b5 871 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
872 else
873 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 874 gwaddr, dev);
70ceb4f5
YH
875
876 if (rt && !lifetime) {
afb1d4b5 877 ip6_del_rt(net, rt);
70ceb4f5
YH
878 rt = NULL;
879 }
880
881 if (!rt && lifetime)
830218c1
DA
882 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
883 dev, pref);
70ceb4f5
YH
884 else if (rt)
885 rt->rt6i_flags = RTF_ROUTEINFO |
886 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
887
888 if (rt) {
1716a961 889 if (!addrconf_finite_timeout(lifetime))
14895687 890 fib6_clean_expires(rt);
1716a961 891 else
14895687 892 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 893
94e187c0 894 ip6_rt_put(rt);
70ceb4f5
YH
895 }
896 return 0;
897}
898#endif
899
ae90d867
DA
900/*
901 * Misc support functions
902 */
903
904/* called with rcu_lock held */
905static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
906{
5e670d84 907 struct net_device *dev = rt->fib6_nh.nh_dev;
ae90d867
DA
908
909 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
910 /* for copies of local routes, dst->dev needs to be the
911 * device if it is a master device, the master device if
912 * device is enslaved, and the loopback as the default
913 */
914 if (netif_is_l3_slave(dev) &&
915 !rt6_need_strict(&rt->rt6i_dst.addr))
916 dev = l3mdev_master_dev_rcu(dev);
917 else if (!netif_is_l3_master(dev))
918 dev = dev_net(dev)->loopback_dev;
919 /* last case is netif_is_l3_master(dev) is true in which
920 * case we want dev returned to be dev
921 */
922 }
923
924 return dev;
925}
926
6edb3c96
DA
927static const int fib6_prop[RTN_MAX + 1] = {
928 [RTN_UNSPEC] = 0,
929 [RTN_UNICAST] = 0,
930 [RTN_LOCAL] = 0,
931 [RTN_BROADCAST] = 0,
932 [RTN_ANYCAST] = 0,
933 [RTN_MULTICAST] = 0,
934 [RTN_BLACKHOLE] = -EINVAL,
935 [RTN_UNREACHABLE] = -EHOSTUNREACH,
936 [RTN_PROHIBIT] = -EACCES,
937 [RTN_THROW] = -EAGAIN,
938 [RTN_NAT] = -EINVAL,
939 [RTN_XRESOLVE] = -EINVAL,
940};
941
942static int ip6_rt_type_to_error(u8 fib6_type)
943{
944 return fib6_prop[fib6_type];
945}
946
3b6761d1
DA
947static unsigned short fib6_info_dst_flags(struct rt6_info *rt)
948{
949 unsigned short flags = 0;
950
951 if (rt->dst_nocount)
952 flags |= DST_NOCOUNT;
953 if (rt->dst_nopolicy)
954 flags |= DST_NOPOLICY;
955 if (rt->dst_host)
956 flags |= DST_HOST;
957
958 return flags;
959}
960
6edb3c96
DA
961static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct rt6_info *ort)
962{
963 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
964
965 switch (ort->fib6_type) {
966 case RTN_BLACKHOLE:
967 rt->dst.output = dst_discard_out;
968 rt->dst.input = dst_discard;
969 break;
970 case RTN_PROHIBIT:
971 rt->dst.output = ip6_pkt_prohibit_out;
972 rt->dst.input = ip6_pkt_prohibit;
973 break;
974 case RTN_THROW:
975 case RTN_UNREACHABLE:
976 default:
977 rt->dst.output = ip6_pkt_discard_out;
978 rt->dst.input = ip6_pkt_discard;
979 break;
980 }
981}
982
983static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
984{
3b6761d1
DA
985 rt->dst.flags |= fib6_info_dst_flags(ort);
986
6edb3c96
DA
987 if (ort->rt6i_flags & RTF_REJECT) {
988 ip6_rt_init_dst_reject(rt, ort);
989 return;
990 }
991
992 rt->dst.error = 0;
993 rt->dst.output = ip6_output;
994
995 if (ort->fib6_type == RTN_LOCAL) {
6edb3c96
DA
996 rt->dst.input = ip6_input;
997 } else if (ipv6_addr_type(&ort->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) {
998 rt->dst.input = ip6_mc_input;
999 } else {
1000 rt->dst.input = ip6_forward;
1001 }
1002
1003 if (ort->fib6_nh.nh_lwtstate) {
1004 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
1005 lwtunnel_set_redirect(&rt->dst);
1006 }
1007
1008 rt->dst.lastuse = jiffies;
1009}
1010
ae90d867
DA
1011static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
1012{
1013 BUG_ON(from->from);
1014
1015 rt->rt6i_flags &= ~RTF_EXPIRES;
1016 dst_hold(&from->dst);
1017 rt->from = from;
d4ead6b3
DA
1018 dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
1019 if (from->fib6_metrics != &dst_default_metrics) {
1020 rt->dst._metrics |= DST_METRICS_REFCOUNTED;
1021 refcount_inc(&from->fib6_metrics->refcnt);
1022 }
ae90d867
DA
1023}
1024
1025static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
1026{
6edb3c96
DA
1027 ip6_rt_init_dst(rt, ort);
1028
ae90d867 1029 rt->rt6i_dst = ort->rt6i_dst;
ae90d867
DA
1030 rt->rt6i_idev = ort->rt6i_idev;
1031 if (rt->rt6i_idev)
1032 in6_dev_hold(rt->rt6i_idev);
5e670d84 1033 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
ae90d867
DA
1034 rt->rt6i_flags = ort->rt6i_flags;
1035 rt6_set_from(rt, ort);
1036 rt->rt6i_metric = ort->rt6i_metric;
1037#ifdef CONFIG_IPV6_SUBTREES
1038 rt->rt6i_src = ort->rt6i_src;
1039#endif
1040 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
1041 rt->rt6i_table = ort->rt6i_table;
5e670d84 1042 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
ae90d867
DA
1043}
1044
a3c00e46
MKL
1045static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1046 struct in6_addr *saddr)
1047{
66f5d6ce 1048 struct fib6_node *pn, *sn;
a3c00e46
MKL
1049 while (1) {
1050 if (fn->fn_flags & RTN_TL_ROOT)
1051 return NULL;
66f5d6ce
WW
1052 pn = rcu_dereference(fn->parent);
1053 sn = FIB6_SUBTREE(pn);
1054 if (sn && sn != fn)
1055 fn = fib6_lookup(sn, NULL, saddr);
a3c00e46
MKL
1056 else
1057 fn = pn;
1058 if (fn->fn_flags & RTN_RTINFO)
1059 return fn;
1060 }
1061}
c71099ac 1062
d3843fe5
WW
1063static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1064 bool null_fallback)
1065{
1066 struct rt6_info *rt = *prt;
1067
1068 if (dst_hold_safe(&rt->dst))
1069 return true;
1070 if (null_fallback) {
1071 rt = net->ipv6.ip6_null_entry;
1072 dst_hold(&rt->dst);
1073 } else {
1074 rt = NULL;
1075 }
1076 *prt = rt;
1077 return false;
1078}
1079
dec9b0e2
DA
1080/* called with rcu_lock held */
1081static struct rt6_info *ip6_create_rt_rcu(struct rt6_info *rt)
1082{
3b6761d1 1083 unsigned short flags = fib6_info_dst_flags(rt);
dec9b0e2
DA
1084 struct net_device *dev = rt->fib6_nh.nh_dev;
1085 struct rt6_info *nrt;
1086
3b6761d1 1087 nrt = __ip6_dst_alloc(dev_net(dev), dev, flags);
dec9b0e2
DA
1088 if (nrt)
1089 ip6_rt_copy_init(nrt, rt);
1090
1091 return nrt;
1092}
1093
8ed67789
DL
1094static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1095 struct fib6_table *table,
b75cc8f9
DA
1096 struct flowi6 *fl6,
1097 const struct sk_buff *skb,
1098 int flags)
1da177e4 1099{
2b760fcf 1100 struct rt6_info *rt, *rt_cache;
1da177e4 1101 struct fib6_node *fn;
1da177e4 1102
b6cdbc85
DA
1103 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1104 flags &= ~RT6_LOOKUP_F_IFACE;
1105
66f5d6ce 1106 rcu_read_lock();
4c9483b2 1107 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1108restart:
66f5d6ce
WW
1109 rt = rcu_dereference(fn->leaf);
1110 if (!rt) {
421842ed 1111 rt = net->ipv6.fib6_null_entry;
66f5d6ce
WW
1112 } else {
1113 rt = rt6_device_match(net, rt, &fl6->saddr,
1114 fl6->flowi6_oif, flags);
1115 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
b4bac172 1116 rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
b75cc8f9 1117 skb, flags);
66f5d6ce 1118 }
421842ed 1119 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1120 fn = fib6_backtrack(fn, &fl6->saddr);
1121 if (fn)
1122 goto restart;
1123 }
2b760fcf
WW
1124 /* Search through exception table */
1125 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
dec9b0e2 1126 if (rt_cache) {
2b760fcf 1127 rt = rt_cache;
dec9b0e2
DA
1128 if (ip6_hold_safe(net, &rt, true))
1129 dst_use_noref(&rt->dst, jiffies);
1130 } else if (dst_hold_safe(&rt->dst)) {
1131 struct rt6_info *nrt;
1132
1133 nrt = ip6_create_rt_rcu(rt);
1134 dst_release(&rt->dst);
1135 rt = nrt;
1136 } else {
1137 rt = net->ipv6.ip6_null_entry;
1138 dst_hold(&rt->dst);
1139 }
d3843fe5 1140
66f5d6ce 1141 rcu_read_unlock();
b811580d 1142
b65f164d 1143 trace_fib6_table_lookup(net, rt, table, fl6);
b811580d 1144
c71099ac 1145 return rt;
c71099ac
TG
1146}
1147
67ba4152 1148struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1149 const struct sk_buff *skb, int flags)
ea6e574e 1150{
b75cc8f9 1151 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1152}
1153EXPORT_SYMBOL_GPL(ip6_route_lookup);
1154
9acd9f3a 1155struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1156 const struct in6_addr *saddr, int oif,
1157 const struct sk_buff *skb, int strict)
c71099ac 1158{
4c9483b2
DM
1159 struct flowi6 fl6 = {
1160 .flowi6_oif = oif,
1161 .daddr = *daddr,
c71099ac
TG
1162 };
1163 struct dst_entry *dst;
77d16f45 1164 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1165
adaa70bb 1166 if (saddr) {
4c9483b2 1167 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1168 flags |= RT6_LOOKUP_F_HAS_SADDR;
1169 }
1170
b75cc8f9 1171 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1172 if (dst->error == 0)
1173 return (struct rt6_info *) dst;
1174
1175 dst_release(dst);
1176
1da177e4
LT
1177 return NULL;
1178}
7159039a
YH
1179EXPORT_SYMBOL(rt6_lookup);
1180
c71099ac 1181/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1182 * It takes new route entry, the addition fails by any reason the
1183 * route is released.
1184 * Caller must hold dst before calling it.
1da177e4
LT
1185 */
1186
e5fd387a 1187static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301 1188 struct netlink_ext_ack *extack)
1da177e4
LT
1189{
1190 int err;
c71099ac 1191 struct fib6_table *table;
1da177e4 1192
c71099ac 1193 table = rt->rt6i_table;
66f5d6ce 1194 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1195 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1196 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1197
1198 return err;
1199}
1200
afb1d4b5 1201int ip6_ins_rt(struct net *net, struct rt6_info *rt)
40e22e8f 1202{
afb1d4b5 1203 struct nl_info info = { .nl_net = net, };
e715b6d3 1204
1cfb71ee
WW
1205 /* Hold dst to account for the reference from the fib6 tree */
1206 dst_hold(&rt->dst);
d4ead6b3 1207 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1208}
1209
8b9df265
MKL
1210static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1211 const struct in6_addr *daddr,
1212 const struct in6_addr *saddr)
1da177e4 1213{
4832c30d 1214 struct net_device *dev;
1da177e4
LT
1215 struct rt6_info *rt;
1216
1217 /*
1218 * Clone the route.
1219 */
1220
d52d3997 1221 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
3a2232e9 1222 ort = ort->from;
1da177e4 1223
4832c30d
DA
1224 rcu_read_lock();
1225 dev = ip6_rt_get_dev_rcu(ort);
1226 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1227 rcu_read_unlock();
83a09abd
MKL
1228 if (!rt)
1229 return NULL;
1230
1231 ip6_rt_copy_init(rt, ort);
1232 rt->rt6i_flags |= RTF_CACHE;
1233 rt->rt6i_metric = 0;
1234 rt->dst.flags |= DST_HOST;
1235 rt->rt6i_dst.addr = *daddr;
1236 rt->rt6i_dst.plen = 128;
1da177e4 1237
83a09abd
MKL
1238 if (!rt6_is_gw_or_nonexthop(ort)) {
1239 if (ort->rt6i_dst.plen != 128 &&
1240 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1241 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1242#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1243 if (rt->rt6i_src.plen && saddr) {
1244 rt->rt6i_src.addr = *saddr;
1245 rt->rt6i_src.plen = 128;
8b9df265 1246 }
83a09abd 1247#endif
95a9a5ba 1248 }
1da177e4 1249
95a9a5ba
YH
1250 return rt;
1251}
1da177e4 1252
d52d3997
MKL
1253static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1254{
3b6761d1 1255 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1256 struct net_device *dev;
d52d3997
MKL
1257 struct rt6_info *pcpu_rt;
1258
4832c30d
DA
1259 rcu_read_lock();
1260 dev = ip6_rt_get_dev_rcu(rt);
3b6761d1 1261 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1262 rcu_read_unlock();
d52d3997
MKL
1263 if (!pcpu_rt)
1264 return NULL;
1265 ip6_rt_copy_init(pcpu_rt, rt);
1266 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1267 pcpu_rt->rt6i_flags |= RTF_PCPU;
1268 return pcpu_rt;
1269}
1270
66f5d6ce 1271/* It should be called with rcu_read_lock() acquired */
d52d3997
MKL
1272static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1273{
a73e4195 1274 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1275
1276 p = this_cpu_ptr(rt->rt6i_pcpu);
1277 pcpu_rt = *p;
1278
d4ead6b3
DA
1279 if (pcpu_rt)
1280 ip6_hold_safe(NULL, &pcpu_rt, false);
d3843fe5 1281
a73e4195
MKL
1282 return pcpu_rt;
1283}
1284
afb1d4b5
DA
1285static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1286 struct rt6_info *rt)
a73e4195
MKL
1287{
1288 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1289
1290 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1291 if (!pcpu_rt) {
9c7370a1
MKL
1292 dst_hold(&net->ipv6.ip6_null_entry->dst);
1293 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1294 }
1295
a94b9367
WW
1296 dst_hold(&pcpu_rt->dst);
1297 p = this_cpu_ptr(rt->rt6i_pcpu);
1298 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1299 BUG_ON(prev);
a94b9367 1300
d52d3997
MKL
1301 return pcpu_rt;
1302}
1303
35732d01
WW
1304/* exception hash table implementation
1305 */
1306static DEFINE_SPINLOCK(rt6_exception_lock);
1307
1308/* Remove rt6_ex from hash table and free the memory
1309 * Caller must hold rt6_exception_lock
1310 */
1311static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1312 struct rt6_exception *rt6_ex)
1313{
b2427e67 1314 struct net *net;
81eb8447 1315
35732d01
WW
1316 if (!bucket || !rt6_ex)
1317 return;
b2427e67
CIK
1318
1319 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01
WW
1320 rt6_ex->rt6i->rt6i_node = NULL;
1321 hlist_del_rcu(&rt6_ex->hlist);
1322 rt6_release(rt6_ex->rt6i);
1323 kfree_rcu(rt6_ex, rcu);
1324 WARN_ON_ONCE(!bucket->depth);
1325 bucket->depth--;
81eb8447 1326 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1327}
1328
1329/* Remove oldest rt6_ex in bucket and free the memory
1330 * Caller must hold rt6_exception_lock
1331 */
1332static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1333{
1334 struct rt6_exception *rt6_ex, *oldest = NULL;
1335
1336 if (!bucket)
1337 return;
1338
1339 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1340 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1341 oldest = rt6_ex;
1342 }
1343 rt6_remove_exception(bucket, oldest);
1344}
1345
1346static u32 rt6_exception_hash(const struct in6_addr *dst,
1347 const struct in6_addr *src)
1348{
1349 static u32 seed __read_mostly;
1350 u32 val;
1351
1352 net_get_random_once(&seed, sizeof(seed));
1353 val = jhash(dst, sizeof(*dst), seed);
1354
1355#ifdef CONFIG_IPV6_SUBTREES
1356 if (src)
1357 val = jhash(src, sizeof(*src), val);
1358#endif
1359 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1360}
1361
1362/* Helper function to find the cached rt in the hash table
1363 * and update bucket pointer to point to the bucket for this
1364 * (daddr, saddr) pair
1365 * Caller must hold rt6_exception_lock
1366 */
1367static struct rt6_exception *
1368__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1369 const struct in6_addr *daddr,
1370 const struct in6_addr *saddr)
1371{
1372 struct rt6_exception *rt6_ex;
1373 u32 hval;
1374
1375 if (!(*bucket) || !daddr)
1376 return NULL;
1377
1378 hval = rt6_exception_hash(daddr, saddr);
1379 *bucket += hval;
1380
1381 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1382 struct rt6_info *rt6 = rt6_ex->rt6i;
1383 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1384
1385#ifdef CONFIG_IPV6_SUBTREES
1386 if (matched && saddr)
1387 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1388#endif
1389 if (matched)
1390 return rt6_ex;
1391 }
1392 return NULL;
1393}
1394
1395/* Helper function to find the cached rt in the hash table
1396 * and update bucket pointer to point to the bucket for this
1397 * (daddr, saddr) pair
1398 * Caller must hold rcu_read_lock()
1399 */
1400static struct rt6_exception *
1401__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1402 const struct in6_addr *daddr,
1403 const struct in6_addr *saddr)
1404{
1405 struct rt6_exception *rt6_ex;
1406 u32 hval;
1407
1408 WARN_ON_ONCE(!rcu_read_lock_held());
1409
1410 if (!(*bucket) || !daddr)
1411 return NULL;
1412
1413 hval = rt6_exception_hash(daddr, saddr);
1414 *bucket += hval;
1415
1416 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1417 struct rt6_info *rt6 = rt6_ex->rt6i;
1418 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1419
1420#ifdef CONFIG_IPV6_SUBTREES
1421 if (matched && saddr)
1422 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1423#endif
1424 if (matched)
1425 return rt6_ex;
1426 }
1427 return NULL;
1428}
1429
d4ead6b3
DA
1430static unsigned int fib6_mtu(const struct rt6_info *rt)
1431{
1432 unsigned int mtu;
1433
1434 mtu = rt->fib6_pmtu ? : rt->rt6i_idev->cnf.mtu6;
1435 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1436
1437 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1438}
1439
35732d01
WW
1440static int rt6_insert_exception(struct rt6_info *nrt,
1441 struct rt6_info *ort)
1442{
5e670d84 1443 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1444 struct rt6_exception_bucket *bucket;
1445 struct in6_addr *src_key = NULL;
1446 struct rt6_exception *rt6_ex;
1447 int err = 0;
1448
1449 /* ort can't be a cache or pcpu route */
1450 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
3a2232e9 1451 ort = ort->from;
35732d01
WW
1452 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1453
1454 spin_lock_bh(&rt6_exception_lock);
1455
1456 if (ort->exception_bucket_flushed) {
1457 err = -EINVAL;
1458 goto out;
1459 }
1460
1461 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1462 lockdep_is_held(&rt6_exception_lock));
1463 if (!bucket) {
1464 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1465 GFP_ATOMIC);
1466 if (!bucket) {
1467 err = -ENOMEM;
1468 goto out;
1469 }
1470 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1471 }
1472
1473#ifdef CONFIG_IPV6_SUBTREES
1474 /* rt6i_src.plen != 0 indicates ort is in subtree
1475 * and exception table is indexed by a hash of
1476 * both rt6i_dst and rt6i_src.
1477 * Otherwise, the exception table is indexed by
1478 * a hash of only rt6i_dst.
1479 */
1480 if (ort->rt6i_src.plen)
1481 src_key = &nrt->rt6i_src.addr;
1482#endif
60006a48
WW
1483
1484 /* Update rt6i_prefsrc as it could be changed
1485 * in rt6_remove_prefsrc()
1486 */
1487 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
f5bbe7ee
WW
1488 /* rt6_mtu_change() might lower mtu on ort.
1489 * Only insert this exception route if its mtu
1490 * is less than ort's mtu value.
1491 */
d4ead6b3 1492 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1493 err = -EINVAL;
1494 goto out;
1495 }
60006a48 1496
35732d01
WW
1497 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1498 src_key);
1499 if (rt6_ex)
1500 rt6_remove_exception(bucket, rt6_ex);
1501
1502 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1503 if (!rt6_ex) {
1504 err = -ENOMEM;
1505 goto out;
1506 }
1507 rt6_ex->rt6i = nrt;
1508 rt6_ex->stamp = jiffies;
1509 atomic_inc(&nrt->rt6i_ref);
1510 nrt->rt6i_node = ort->rt6i_node;
1511 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1512 bucket->depth++;
81eb8447 1513 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1514
1515 if (bucket->depth > FIB6_MAX_DEPTH)
1516 rt6_exception_remove_oldest(bucket);
1517
1518out:
1519 spin_unlock_bh(&rt6_exception_lock);
1520
1521 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1522 if (!err) {
922c2ac8 1523 spin_lock_bh(&ort->rt6i_table->tb6_lock);
7aef6859 1524 fib6_update_sernum(net, ort);
922c2ac8 1525 spin_unlock_bh(&ort->rt6i_table->tb6_lock);
b886d5f2
PA
1526 fib6_force_start_gc(net);
1527 }
35732d01
WW
1528
1529 return err;
1530}
1531
1532void rt6_flush_exceptions(struct rt6_info *rt)
1533{
1534 struct rt6_exception_bucket *bucket;
1535 struct rt6_exception *rt6_ex;
1536 struct hlist_node *tmp;
1537 int i;
1538
1539 spin_lock_bh(&rt6_exception_lock);
1540 /* Prevent rt6_insert_exception() to recreate the bucket list */
1541 rt->exception_bucket_flushed = 1;
1542
1543 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1544 lockdep_is_held(&rt6_exception_lock));
1545 if (!bucket)
1546 goto out;
1547
1548 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1549 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1550 rt6_remove_exception(bucket, rt6_ex);
1551 WARN_ON_ONCE(bucket->depth);
1552 bucket++;
1553 }
1554
1555out:
1556 spin_unlock_bh(&rt6_exception_lock);
1557}
1558
1559/* Find cached rt in the hash table inside passed in rt
1560 * Caller has to hold rcu_read_lock()
1561 */
1562static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1563 struct in6_addr *daddr,
1564 struct in6_addr *saddr)
1565{
1566 struct rt6_exception_bucket *bucket;
1567 struct in6_addr *src_key = NULL;
1568 struct rt6_exception *rt6_ex;
1569 struct rt6_info *res = NULL;
1570
1571 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1572
1573#ifdef CONFIG_IPV6_SUBTREES
1574 /* rt6i_src.plen != 0 indicates rt is in subtree
1575 * and exception table is indexed by a hash of
1576 * both rt6i_dst and rt6i_src.
1577 * Otherwise, the exception table is indexed by
1578 * a hash of only rt6i_dst.
1579 */
1580 if (rt->rt6i_src.plen)
1581 src_key = saddr;
1582#endif
1583 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1584
1585 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1586 res = rt6_ex->rt6i;
1587
1588 return res;
1589}
1590
1591/* Remove the passed in cached rt from the hash table that contains it */
1592int rt6_remove_exception_rt(struct rt6_info *rt)
1593{
35732d01 1594 struct rt6_exception_bucket *bucket;
3a2232e9 1595 struct rt6_info *from = rt->from;
35732d01
WW
1596 struct in6_addr *src_key = NULL;
1597 struct rt6_exception *rt6_ex;
1598 int err;
1599
1600 if (!from ||
442d713b 1601 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1602 return -EINVAL;
1603
1604 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1605 return -ENOENT;
1606
1607 spin_lock_bh(&rt6_exception_lock);
1608 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1609 lockdep_is_held(&rt6_exception_lock));
1610#ifdef CONFIG_IPV6_SUBTREES
1611 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1612 * and exception table is indexed by a hash of
1613 * both rt6i_dst and rt6i_src.
1614 * Otherwise, the exception table is indexed by
1615 * a hash of only rt6i_dst.
1616 */
1617 if (from->rt6i_src.plen)
1618 src_key = &rt->rt6i_src.addr;
1619#endif
1620 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1621 &rt->rt6i_dst.addr,
1622 src_key);
1623 if (rt6_ex) {
1624 rt6_remove_exception(bucket, rt6_ex);
1625 err = 0;
1626 } else {
1627 err = -ENOENT;
1628 }
1629
1630 spin_unlock_bh(&rt6_exception_lock);
1631 return err;
1632}
1633
1634/* Find rt6_ex which contains the passed in rt cache and
1635 * refresh its stamp
1636 */
1637static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1638{
35732d01 1639 struct rt6_exception_bucket *bucket;
3a2232e9 1640 struct rt6_info *from = rt->from;
35732d01
WW
1641 struct in6_addr *src_key = NULL;
1642 struct rt6_exception *rt6_ex;
1643
1644 if (!from ||
442d713b 1645 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1646 return;
1647
1648 rcu_read_lock();
1649 bucket = rcu_dereference(from->rt6i_exception_bucket);
1650
1651#ifdef CONFIG_IPV6_SUBTREES
1652 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1653 * and exception table is indexed by a hash of
1654 * both rt6i_dst and rt6i_src.
1655 * Otherwise, the exception table is indexed by
1656 * a hash of only rt6i_dst.
1657 */
1658 if (from->rt6i_src.plen)
1659 src_key = &rt->rt6i_src.addr;
1660#endif
1661 rt6_ex = __rt6_find_exception_rcu(&bucket,
1662 &rt->rt6i_dst.addr,
1663 src_key);
1664 if (rt6_ex)
1665 rt6_ex->stamp = jiffies;
1666
1667 rcu_read_unlock();
1668}
1669
60006a48
WW
1670static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1671{
1672 struct rt6_exception_bucket *bucket;
1673 struct rt6_exception *rt6_ex;
1674 int i;
1675
1676 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1677 lockdep_is_held(&rt6_exception_lock));
1678
1679 if (bucket) {
1680 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1681 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1682 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1683 }
1684 bucket++;
1685 }
1686 }
1687}
1688
e9fa1495
SB
1689static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1690 struct rt6_info *rt, int mtu)
1691{
1692 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1693 * lowest MTU in the path: always allow updating the route PMTU to
1694 * reflect PMTU decreases.
1695 *
1696 * If the new MTU is higher, and the route PMTU is equal to the local
1697 * MTU, this means the old MTU is the lowest in the path, so allow
1698 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1699 * handle this.
1700 */
1701
1702 if (dst_mtu(&rt->dst) >= mtu)
1703 return true;
1704
1705 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1706 return true;
1707
1708 return false;
1709}
1710
1711static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1712 struct rt6_info *rt, int mtu)
f5bbe7ee
WW
1713{
1714 struct rt6_exception_bucket *bucket;
1715 struct rt6_exception *rt6_ex;
1716 int i;
1717
1718 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1719 lockdep_is_held(&rt6_exception_lock));
1720
e9fa1495
SB
1721 if (!bucket)
1722 return;
1723
1724 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1725 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1726 struct rt6_info *entry = rt6_ex->rt6i;
1727
1728 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1729 * route), the metrics of its rt->from have already
e9fa1495
SB
1730 * been updated.
1731 */
d4ead6b3 1732 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1733 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1734 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1735 }
e9fa1495 1736 bucket++;
f5bbe7ee
WW
1737 }
1738}
1739
b16cb459
WW
1740#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1741
1742static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1743 struct in6_addr *gateway)
1744{
1745 struct rt6_exception_bucket *bucket;
1746 struct rt6_exception *rt6_ex;
1747 struct hlist_node *tmp;
1748 int i;
1749
1750 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1751 return;
1752
1753 spin_lock_bh(&rt6_exception_lock);
1754 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1755 lockdep_is_held(&rt6_exception_lock));
1756
1757 if (bucket) {
1758 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1759 hlist_for_each_entry_safe(rt6_ex, tmp,
1760 &bucket->chain, hlist) {
1761 struct rt6_info *entry = rt6_ex->rt6i;
1762
1763 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1764 RTF_CACHE_GATEWAY &&
1765 ipv6_addr_equal(gateway,
1766 &entry->rt6i_gateway)) {
1767 rt6_remove_exception(bucket, rt6_ex);
1768 }
1769 }
1770 bucket++;
1771 }
1772 }
1773
1774 spin_unlock_bh(&rt6_exception_lock);
1775}
1776
c757faa8
WW
1777static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1778 struct rt6_exception *rt6_ex,
1779 struct fib6_gc_args *gc_args,
1780 unsigned long now)
1781{
1782 struct rt6_info *rt = rt6_ex->rt6i;
1783
1859bac0
PA
1784 /* we are pruning and obsoleting aged-out and non gateway exceptions
1785 * even if others have still references to them, so that on next
1786 * dst_check() such references can be dropped.
1787 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1788 * expired, independently from their aging, as per RFC 8201 section 4
1789 */
31afeb42
WW
1790 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1791 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1792 RT6_TRACE("aging clone %p\n", rt);
1793 rt6_remove_exception(bucket, rt6_ex);
1794 return;
1795 }
1796 } else if (time_after(jiffies, rt->dst.expires)) {
1797 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1798 rt6_remove_exception(bucket, rt6_ex);
1799 return;
31afeb42
WW
1800 }
1801
1802 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1803 struct neighbour *neigh;
1804 __u8 neigh_flags = 0;
1805
1bfa26ff
ED
1806 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1807 if (neigh)
c757faa8 1808 neigh_flags = neigh->flags;
1bfa26ff 1809
c757faa8
WW
1810 if (!(neigh_flags & NTF_ROUTER)) {
1811 RT6_TRACE("purging route %p via non-router but gateway\n",
1812 rt);
1813 rt6_remove_exception(bucket, rt6_ex);
1814 return;
1815 }
1816 }
31afeb42 1817
c757faa8
WW
1818 gc_args->more++;
1819}
1820
1821void rt6_age_exceptions(struct rt6_info *rt,
1822 struct fib6_gc_args *gc_args,
1823 unsigned long now)
1824{
1825 struct rt6_exception_bucket *bucket;
1826 struct rt6_exception *rt6_ex;
1827 struct hlist_node *tmp;
1828 int i;
1829
1830 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1831 return;
1832
1bfa26ff
ED
1833 rcu_read_lock_bh();
1834 spin_lock(&rt6_exception_lock);
c757faa8
WW
1835 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1836 lockdep_is_held(&rt6_exception_lock));
1837
1838 if (bucket) {
1839 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1840 hlist_for_each_entry_safe(rt6_ex, tmp,
1841 &bucket->chain, hlist) {
1842 rt6_age_examine_exception(bucket, rt6_ex,
1843 gc_args, now);
1844 }
1845 bucket++;
1846 }
1847 }
1bfa26ff
ED
1848 spin_unlock(&rt6_exception_lock);
1849 rcu_read_unlock_bh();
c757faa8
WW
1850}
1851
9ff74384 1852struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
b75cc8f9
DA
1853 int oif, struct flowi6 *fl6,
1854 const struct sk_buff *skb, int flags)
1da177e4 1855{
367efcb9 1856 struct fib6_node *fn, *saved_fn;
2b760fcf 1857 struct rt6_info *rt, *rt_cache;
c71099ac 1858 int strict = 0;
1da177e4 1859
77d16f45 1860 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1861 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1862 if (net->ipv6.devconf_all->forwarding == 0)
1863 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1864
66f5d6ce 1865 rcu_read_lock();
1da177e4 1866
4c9483b2 1867 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1868 saved_fn = fn;
1da177e4 1869
ca254490
DA
1870 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1871 oif = 0;
1872
a3c00e46 1873redo_rt6_select:
8d1040e8 1874 rt = rt6_select(net, fn, oif, strict);
52bd4c0c 1875 if (rt->rt6i_nsiblings)
b4bac172 1876 rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
421842ed 1877 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1878 fn = fib6_backtrack(fn, &fl6->saddr);
1879 if (fn)
1880 goto redo_rt6_select;
367efcb9
MKL
1881 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1882 /* also consider unreachable route */
1883 strict &= ~RT6_LOOKUP_F_REACHABLE;
1884 fn = saved_fn;
1885 goto redo_rt6_select;
367efcb9 1886 }
a3c00e46
MKL
1887 }
1888
2b760fcf
WW
1889 /*Search through exception table */
1890 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1891 if (rt_cache)
1892 rt = rt_cache;
fb9de91e 1893
421842ed
DA
1894 if (rt == net->ipv6.fib6_null_entry) {
1895 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1896 rcu_read_unlock();
d3843fe5 1897 dst_hold(&rt->dst);
b65f164d 1898 trace_fib6_table_lookup(net, rt, table, fl6);
d3843fe5
WW
1899 return rt;
1900 } else if (rt->rt6i_flags & RTF_CACHE) {
d4ead6b3 1901 if (ip6_hold_safe(net, &rt, true))
d3843fe5 1902 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1903
66f5d6ce 1904 rcu_read_unlock();
b65f164d 1905 trace_fib6_table_lookup(net, rt, table, fl6);
d52d3997 1906 return rt;
3da59bd9
MKL
1907 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1908 !(rt->rt6i_flags & RTF_GATEWAY))) {
1909 /* Create a RTF_CACHE clone which will not be
1910 * owned by the fib6 tree. It is for the special case where
1911 * the daddr in the skb during the neighbor look-up is different
1912 * from the fl6->daddr used to look-up route here.
1913 */
1914
1915 struct rt6_info *uncached_rt;
1916
d3843fe5
WW
1917 if (ip6_hold_safe(net, &rt, true)) {
1918 dst_use_noref(&rt->dst, jiffies);
1919 } else {
66f5d6ce 1920 rcu_read_unlock();
d3843fe5
WW
1921 uncached_rt = rt;
1922 goto uncached_rt_out;
1923 }
66f5d6ce 1924 rcu_read_unlock();
d52d3997 1925
3da59bd9
MKL
1926 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1927 dst_release(&rt->dst);
c71099ac 1928
1cfb71ee
WW
1929 if (uncached_rt) {
1930 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1931 * No need for another dst_hold()
1932 */
8d0b94af 1933 rt6_uncached_list_add(uncached_rt);
81eb8447 1934 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1935 } else {
3da59bd9 1936 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1937 dst_hold(&uncached_rt->dst);
1938 }
b811580d 1939
d3843fe5 1940uncached_rt_out:
b65f164d 1941 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
3da59bd9 1942 return uncached_rt;
3da59bd9 1943
d52d3997
MKL
1944 } else {
1945 /* Get a percpu copy */
1946
1947 struct rt6_info *pcpu_rt;
1948
d3843fe5 1949 dst_use_noref(&rt->dst, jiffies);
951f788a 1950 local_bh_disable();
d52d3997 1951 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1952
951f788a 1953 if (!pcpu_rt) {
a94b9367
WW
1954 /* atomic_inc_not_zero() is needed when using rcu */
1955 if (atomic_inc_not_zero(&rt->rt6i_ref)) {
951f788a 1956 /* No dst_hold() on rt is needed because grabbing
a94b9367
WW
1957 * rt->rt6i_ref makes sure rt can't be released.
1958 */
afb1d4b5 1959 pcpu_rt = rt6_make_pcpu_route(net, rt);
a94b9367
WW
1960 rt6_release(rt);
1961 } else {
1962 /* rt is already removed from tree */
a94b9367
WW
1963 pcpu_rt = net->ipv6.ip6_null_entry;
1964 dst_hold(&pcpu_rt->dst);
1965 }
9c7370a1 1966 }
951f788a
ED
1967 local_bh_enable();
1968 rcu_read_unlock();
b65f164d 1969 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
d52d3997
MKL
1970 return pcpu_rt;
1971 }
1da177e4 1972}
9ff74384 1973EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1974
b75cc8f9
DA
1975static struct rt6_info *ip6_pol_route_input(struct net *net,
1976 struct fib6_table *table,
1977 struct flowi6 *fl6,
1978 const struct sk_buff *skb,
1979 int flags)
4acad72d 1980{
b75cc8f9 1981 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1982}
1983
d409b847
MB
1984struct dst_entry *ip6_route_input_lookup(struct net *net,
1985 struct net_device *dev,
b75cc8f9
DA
1986 struct flowi6 *fl6,
1987 const struct sk_buff *skb,
1988 int flags)
72331bc0
SL
1989{
1990 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1991 flags |= RT6_LOOKUP_F_IFACE;
1992
b75cc8f9 1993 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1994}
d409b847 1995EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1996
23aebdac 1997static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1998 struct flow_keys *keys,
1999 struct flow_keys *flkeys)
23aebdac
JS
2000{
2001 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
2002 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 2003 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
2004 const struct ipv6hdr *inner_iph;
2005 const struct icmp6hdr *icmph;
2006 struct ipv6hdr _inner_iph;
2007
2008 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2009 goto out;
2010
2011 icmph = icmp6_hdr(skb);
2012 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
2013 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
2014 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
2015 icmph->icmp6_type != ICMPV6_PARAMPROB)
2016 goto out;
2017
2018 inner_iph = skb_header_pointer(skb,
2019 skb_transport_offset(skb) + sizeof(*icmph),
2020 sizeof(_inner_iph), &_inner_iph);
2021 if (!inner_iph)
2022 goto out;
2023
2024 key_iph = inner_iph;
5e5d6fed 2025 _flkeys = NULL;
23aebdac 2026out:
5e5d6fed
RP
2027 if (_flkeys) {
2028 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2029 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2030 keys->tags.flow_label = _flkeys->tags.flow_label;
2031 keys->basic.ip_proto = _flkeys->basic.ip_proto;
2032 } else {
2033 keys->addrs.v6addrs.src = key_iph->saddr;
2034 keys->addrs.v6addrs.dst = key_iph->daddr;
2035 keys->tags.flow_label = ip6_flowinfo(key_iph);
2036 keys->basic.ip_proto = key_iph->nexthdr;
2037 }
23aebdac
JS
2038}
2039
2040/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
2041u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2042 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
2043{
2044 struct flow_keys hash_keys;
9a2a537a 2045 u32 mhash;
23aebdac 2046
bbfa047a 2047 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
2048 case 0:
2049 memset(&hash_keys, 0, sizeof(hash_keys));
2050 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2051 if (skb) {
2052 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2053 } else {
2054 hash_keys.addrs.v6addrs.src = fl6->saddr;
2055 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2056 hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
2057 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2058 }
2059 break;
2060 case 1:
2061 if (skb) {
2062 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2063 struct flow_keys keys;
2064
2065 /* short-circuit if we already have L4 hash present */
2066 if (skb->l4_hash)
2067 return skb_get_hash_raw(skb) >> 1;
2068
2069 memset(&hash_keys, 0, sizeof(hash_keys));
2070
2071 if (!flkeys) {
2072 skb_flow_dissect_flow_keys(skb, &keys, flag);
2073 flkeys = &keys;
2074 }
2075 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2076 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2077 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2078 hash_keys.ports.src = flkeys->ports.src;
2079 hash_keys.ports.dst = flkeys->ports.dst;
2080 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2081 } else {
2082 memset(&hash_keys, 0, sizeof(hash_keys));
2083 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2084 hash_keys.addrs.v6addrs.src = fl6->saddr;
2085 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2086 hash_keys.ports.src = fl6->fl6_sport;
2087 hash_keys.ports.dst = fl6->fl6_dport;
2088 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2089 }
2090 break;
23aebdac 2091 }
9a2a537a 2092 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2093
9a2a537a 2094 return mhash >> 1;
23aebdac
JS
2095}
2096
c71099ac
TG
2097void ip6_route_input(struct sk_buff *skb)
2098{
b71d1d42 2099 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2100 struct net *net = dev_net(skb->dev);
adaa70bb 2101 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2102 struct ip_tunnel_info *tun_info;
4c9483b2 2103 struct flowi6 fl6 = {
e0d56fdd 2104 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2105 .daddr = iph->daddr,
2106 .saddr = iph->saddr,
6502ca52 2107 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2108 .flowi6_mark = skb->mark,
2109 .flowi6_proto = iph->nexthdr,
c71099ac 2110 };
5e5d6fed 2111 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2112
904af04d 2113 tun_info = skb_tunnel_info(skb);
46fa062a 2114 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2115 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2116
2117 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2118 flkeys = &_flkeys;
2119
23aebdac 2120 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2121 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2122 skb_dst_drop(skb);
b75cc8f9
DA
2123 skb_dst_set(skb,
2124 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2125}
2126
b75cc8f9
DA
2127static struct rt6_info *ip6_pol_route_output(struct net *net,
2128 struct fib6_table *table,
2129 struct flowi6 *fl6,
2130 const struct sk_buff *skb,
2131 int flags)
1da177e4 2132{
b75cc8f9 2133 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2134}
2135
6f21c96a
PA
2136struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2137 struct flowi6 *fl6, int flags)
c71099ac 2138{
d46a9d67 2139 bool any_src;
c71099ac 2140
4c1feac5
DA
2141 if (rt6_need_strict(&fl6->daddr)) {
2142 struct dst_entry *dst;
2143
2144 dst = l3mdev_link_scope_lookup(net, fl6);
2145 if (dst)
2146 return dst;
2147 }
ca254490 2148
1fb9489b 2149 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2150
d46a9d67 2151 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2152 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2153 (fl6->flowi6_oif && any_src))
77d16f45 2154 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2155
d46a9d67 2156 if (!any_src)
adaa70bb 2157 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2158 else if (sk)
2159 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2160
b75cc8f9 2161 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2162}
6f21c96a 2163EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2164
2774c131 2165struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2166{
5c1e6aa3 2167 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2168 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2169 struct dst_entry *new = NULL;
2170
1dbe3252 2171 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2172 DST_OBSOLETE_DEAD, 0);
14e50e57 2173 if (rt) {
0a1f5962 2174 rt6_info_init(rt);
81eb8447 2175 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2176
0a1f5962 2177 new = &rt->dst;
14e50e57 2178 new->__use = 1;
352e512c 2179 new->input = dst_discard;
ede2059d 2180 new->output = dst_discard_out;
14e50e57 2181
0a1f5962 2182 dst_copy_metrics(new, &ort->dst);
14e50e57 2183
1dbe3252 2184 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2185 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2186 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2187 rt->rt6i_metric = 0;
2188
2189 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2190#ifdef CONFIG_IPV6_SUBTREES
2191 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2192#endif
14e50e57
DM
2193 }
2194
69ead7af
DM
2195 dst_release(dst_orig);
2196 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2197}
14e50e57 2198
1da177e4
LT
2199/*
2200 * Destination cache support functions
2201 */
2202
3da59bd9
MKL
2203static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
2204{
36143645 2205 u32 rt_cookie = 0;
c5cff856
WW
2206
2207 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
3da59bd9
MKL
2208 return NULL;
2209
2210 if (rt6_check_expired(rt))
2211 return NULL;
2212
2213 return &rt->dst;
2214}
2215
2216static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
2217{
5973fb1e
MKL
2218 if (!__rt6_check_expired(rt) &&
2219 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3a2232e9 2220 rt6_check(rt->from, cookie))
3da59bd9
MKL
2221 return &rt->dst;
2222 else
2223 return NULL;
2224}
2225
1da177e4
LT
2226static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2227{
2228 struct rt6_info *rt;
2229
2230 rt = (struct rt6_info *) dst;
2231
6f3118b5
ND
2232 /* All IPV6 dsts are created with ->obsolete set to the value
2233 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2234 * into this function always.
2235 */
e3bc10bd 2236
02bcf4e0 2237 if (rt->rt6i_flags & RTF_PCPU ||
3a2232e9 2238 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
3da59bd9
MKL
2239 return rt6_dst_from_check(rt, cookie);
2240 else
2241 return rt6_check(rt, cookie);
1da177e4
LT
2242}
2243
2244static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2245{
2246 struct rt6_info *rt = (struct rt6_info *) dst;
2247
2248 if (rt) {
54c1a859
YH
2249 if (rt->rt6i_flags & RTF_CACHE) {
2250 if (rt6_check_expired(rt)) {
afb1d4b5 2251 ip6_del_rt(dev_net(dst->dev), rt);
54c1a859
YH
2252 dst = NULL;
2253 }
2254 } else {
1da177e4 2255 dst_release(dst);
54c1a859
YH
2256 dst = NULL;
2257 }
1da177e4 2258 }
54c1a859 2259 return dst;
1da177e4
LT
2260}
2261
2262static void ip6_link_failure(struct sk_buff *skb)
2263{
2264 struct rt6_info *rt;
2265
3ffe533c 2266 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2267
adf30907 2268 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2269 if (rt) {
1eb4f758 2270 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0 2271 if (dst_hold_safe(&rt->dst))
afb1d4b5 2272 ip6_del_rt(dev_net(rt->dst.dev), rt);
c5cff856
WW
2273 } else {
2274 struct fib6_node *fn;
2275
2276 rcu_read_lock();
2277 fn = rcu_dereference(rt->rt6i_node);
2278 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2279 fn->fn_sernum = -1;
2280 rcu_read_unlock();
1eb4f758 2281 }
1da177e4
LT
2282 }
2283}
2284
45e4fd26
MKL
2285static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2286{
2287 struct net *net = dev_net(rt->dst.dev);
2288
d4ead6b3 2289 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2290 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2291 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2292}
2293
0d3f6d29
MKL
2294static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2295{
2296 return !(rt->rt6i_flags & RTF_CACHE) &&
4e587ea7
WW
2297 (rt->rt6i_flags & RTF_PCPU ||
2298 rcu_access_pointer(rt->rt6i_node));
0d3f6d29
MKL
2299}
2300
45e4fd26
MKL
2301static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2302 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2303{
0dec879f 2304 const struct in6_addr *daddr, *saddr;
67ba4152 2305 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2306
45e4fd26
MKL
2307 if (rt6->rt6i_flags & RTF_LOCAL)
2308 return;
81aded24 2309
19bda36c
XL
2310 if (dst_metric_locked(dst, RTAX_MTU))
2311 return;
2312
0dec879f
JA
2313 if (iph) {
2314 daddr = &iph->daddr;
2315 saddr = &iph->saddr;
2316 } else if (sk) {
2317 daddr = &sk->sk_v6_daddr;
2318 saddr = &inet6_sk(sk)->saddr;
2319 } else {
2320 daddr = NULL;
2321 saddr = NULL;
2322 }
2323 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2324 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2325 if (mtu >= dst_mtu(dst))
2326 return;
9d289715 2327
0d3f6d29 2328 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2329 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2330 /* update rt6_ex->stamp for cache */
2331 if (rt6->rt6i_flags & RTF_CACHE)
2332 rt6_update_exception_stamp_rt(rt6);
0dec879f 2333 } else if (daddr) {
45e4fd26
MKL
2334 struct rt6_info *nrt6;
2335
d4ead6b3 2336 nrt6 = ip6_rt_cache_alloc(rt6->from, daddr, saddr);
45e4fd26
MKL
2337 if (nrt6) {
2338 rt6_do_update_pmtu(nrt6, mtu);
d4ead6b3 2339 if (rt6_insert_exception(nrt6, rt6->from))
2b760fcf 2340 dst_release_immediate(&nrt6->dst);
45e4fd26 2341 }
1da177e4
LT
2342 }
2343}
2344
45e4fd26
MKL
2345static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2346 struct sk_buff *skb, u32 mtu)
2347{
2348 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2349}
2350
42ae66c8 2351void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2352 int oif, u32 mark, kuid_t uid)
81aded24
DM
2353{
2354 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2355 struct dst_entry *dst;
2356 struct flowi6 fl6;
2357
2358 memset(&fl6, 0, sizeof(fl6));
2359 fl6.flowi6_oif = oif;
1b3c61dc 2360 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2361 fl6.daddr = iph->daddr;
2362 fl6.saddr = iph->saddr;
6502ca52 2363 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2364 fl6.flowi6_uid = uid;
81aded24
DM
2365
2366 dst = ip6_route_output(net, NULL, &fl6);
2367 if (!dst->error)
45e4fd26 2368 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2369 dst_release(dst);
2370}
2371EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2372
2373void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2374{
33c162a9
MKL
2375 struct dst_entry *dst;
2376
81aded24 2377 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2378 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2379
2380 dst = __sk_dst_get(sk);
2381 if (!dst || !dst->obsolete ||
2382 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2383 return;
2384
2385 bh_lock_sock(sk);
2386 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2387 ip6_datagram_dst_update(sk, false);
2388 bh_unlock_sock(sk);
81aded24
DM
2389}
2390EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2391
7d6850f7
AK
2392void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2393 const struct flowi6 *fl6)
2394{
2395#ifdef CONFIG_IPV6_SUBTREES
2396 struct ipv6_pinfo *np = inet6_sk(sk);
2397#endif
2398
2399 ip6_dst_store(sk, dst,
2400 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2401 &sk->sk_v6_daddr : NULL,
2402#ifdef CONFIG_IPV6_SUBTREES
2403 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2404 &np->saddr :
2405#endif
2406 NULL);
2407}
2408
b55b76b2
DJ
2409/* Handle redirects */
2410struct ip6rd_flowi {
2411 struct flowi6 fl6;
2412 struct in6_addr gateway;
2413};
2414
2415static struct rt6_info *__ip6_route_redirect(struct net *net,
2416 struct fib6_table *table,
2417 struct flowi6 *fl6,
b75cc8f9 2418 const struct sk_buff *skb,
b55b76b2
DJ
2419 int flags)
2420{
2421 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2b760fcf 2422 struct rt6_info *rt, *rt_cache;
b55b76b2
DJ
2423 struct fib6_node *fn;
2424
2425 /* Get the "current" route for this destination and
67c408cf 2426 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2427 *
2428 * RFC 4861 specifies that redirects should only be
2429 * accepted if they come from the nexthop to the target.
2430 * Due to the way the routes are chosen, this notion
2431 * is a bit fuzzy and one might need to check all possible
2432 * routes.
2433 */
2434
66f5d6ce 2435 rcu_read_lock();
b55b76b2
DJ
2436 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2437restart:
66f5d6ce 2438 for_each_fib6_node_rt_rcu(fn) {
5e670d84 2439 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c 2440 continue;
14895687 2441 if (fib6_check_expired(rt))
b55b76b2 2442 continue;
6edb3c96 2443 if (rt->rt6i_flags & RTF_REJECT)
b55b76b2
DJ
2444 break;
2445 if (!(rt->rt6i_flags & RTF_GATEWAY))
2446 continue;
5e670d84 2447 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
b55b76b2 2448 continue;
2b760fcf
WW
2449 /* rt_cache's gateway might be different from its 'parent'
2450 * in the case of an ip redirect.
2451 * So we keep searching in the exception table if the gateway
2452 * is different.
2453 */
5e670d84 2454 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2b760fcf
WW
2455 rt_cache = rt6_find_cached_rt(rt,
2456 &fl6->daddr,
2457 &fl6->saddr);
2458 if (rt_cache &&
2459 ipv6_addr_equal(&rdfl->gateway,
2460 &rt_cache->rt6i_gateway)) {
2461 rt = rt_cache;
2462 break;
2463 }
b55b76b2 2464 continue;
2b760fcf 2465 }
b55b76b2
DJ
2466 break;
2467 }
2468
2469 if (!rt)
421842ed 2470 rt = net->ipv6.fib6_null_entry;
6edb3c96 2471 else if (rt->rt6i_flags & RTF_REJECT) {
b55b76b2 2472 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2473 goto out;
2474 }
2475
421842ed 2476 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2477 fn = fib6_backtrack(fn, &fl6->saddr);
2478 if (fn)
2479 goto restart;
b55b76b2 2480 }
a3c00e46 2481
b0a1ba59 2482out:
d3843fe5 2483 ip6_hold_safe(net, &rt, true);
b55b76b2 2484
66f5d6ce 2485 rcu_read_unlock();
b55b76b2 2486
b65f164d 2487 trace_fib6_table_lookup(net, rt, table, fl6);
b55b76b2
DJ
2488 return rt;
2489};
2490
2491static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2492 const struct flowi6 *fl6,
2493 const struct sk_buff *skb,
2494 const struct in6_addr *gateway)
b55b76b2
DJ
2495{
2496 int flags = RT6_LOOKUP_F_HAS_SADDR;
2497 struct ip6rd_flowi rdfl;
2498
2499 rdfl.fl6 = *fl6;
2500 rdfl.gateway = *gateway;
2501
b75cc8f9 2502 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2503 flags, __ip6_route_redirect);
2504}
2505
e2d118a1
LC
2506void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2507 kuid_t uid)
3a5ad2ee
DM
2508{
2509 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2510 struct dst_entry *dst;
2511 struct flowi6 fl6;
2512
2513 memset(&fl6, 0, sizeof(fl6));
e374c618 2514 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2515 fl6.flowi6_oif = oif;
2516 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2517 fl6.daddr = iph->daddr;
2518 fl6.saddr = iph->saddr;
6502ca52 2519 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2520 fl6.flowi6_uid = uid;
3a5ad2ee 2521
b75cc8f9 2522 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2523 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2524 dst_release(dst);
2525}
2526EXPORT_SYMBOL_GPL(ip6_redirect);
2527
c92a59ec
DJ
2528void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2529 u32 mark)
2530{
2531 const struct ipv6hdr *iph = ipv6_hdr(skb);
2532 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2533 struct dst_entry *dst;
2534 struct flowi6 fl6;
2535
2536 memset(&fl6, 0, sizeof(fl6));
e374c618 2537 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2538 fl6.flowi6_oif = oif;
2539 fl6.flowi6_mark = mark;
c92a59ec
DJ
2540 fl6.daddr = msg->dest;
2541 fl6.saddr = iph->daddr;
e2d118a1 2542 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2543
b75cc8f9 2544 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2545 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2546 dst_release(dst);
2547}
2548
3a5ad2ee
DM
2549void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2550{
e2d118a1
LC
2551 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2552 sk->sk_uid);
3a5ad2ee
DM
2553}
2554EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2555
0dbaee3b 2556static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2557{
0dbaee3b
DM
2558 struct net_device *dev = dst->dev;
2559 unsigned int mtu = dst_mtu(dst);
2560 struct net *net = dev_net(dev);
2561
1da177e4
LT
2562 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2563
5578689a
DL
2564 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2565 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2566
2567 /*
1ab1457c
YH
2568 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2569 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2570 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2571 * rely only on pmtu discovery"
2572 */
2573 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2574 mtu = IPV6_MAXPLEN;
2575 return mtu;
2576}
2577
ebb762f2 2578static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2579{
d33e4553 2580 struct inet6_dev *idev;
d4ead6b3 2581 unsigned int mtu;
4b32b5ad
MKL
2582
2583 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2584 if (mtu)
30f78d8e 2585 goto out;
618f9bc7
SK
2586
2587 mtu = IPV6_MIN_MTU;
d33e4553
DM
2588
2589 rcu_read_lock();
2590 idev = __in6_dev_get(dst->dev);
2591 if (idev)
2592 mtu = idev->cnf.mtu6;
2593 rcu_read_unlock();
2594
30f78d8e 2595out:
14972cbd
RP
2596 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2597
2598 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2599}
2600
3b00944c 2601struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2602 struct flowi6 *fl6)
1da177e4 2603{
87a11578 2604 struct dst_entry *dst;
1da177e4
LT
2605 struct rt6_info *rt;
2606 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2607 struct net *net = dev_net(dev);
1da177e4 2608
38308473 2609 if (unlikely(!idev))
122bdf67 2610 return ERR_PTR(-ENODEV);
1da177e4 2611
ad706862 2612 rt = ip6_dst_alloc(net, dev, 0);
38308473 2613 if (unlikely(!rt)) {
1da177e4 2614 in6_dev_put(idev);
87a11578 2615 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2616 goto out;
2617 }
2618
8e2ec639 2619 rt->dst.flags |= DST_HOST;
588753f1 2620 rt->dst.input = ip6_input;
8e2ec639 2621 rt->dst.output = ip6_output;
550bab42 2622 rt->rt6i_gateway = fl6->daddr;
87a11578 2623 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2624 rt->rt6i_dst.plen = 128;
2625 rt->rt6i_idev = idev;
14edd87d 2626 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2627
4c981e28 2628 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2629 * do proper release of the net_device
2630 */
2631 rt6_uncached_list_add(rt);
81eb8447 2632 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2633
87a11578
DM
2634 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2635
1da177e4 2636out:
87a11578 2637 return dst;
1da177e4
LT
2638}
2639
569d3645 2640static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2641{
86393e52 2642 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2643 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2644 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2645 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2646 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2647 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2648 int entries;
7019b78e 2649
fc66f95c 2650 entries = dst_entries_get_fast(ops);
49a18d86 2651 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2652 entries <= rt_max_size)
1da177e4
LT
2653 goto out;
2654
6891a346 2655 net->ipv6.ip6_rt_gc_expire++;
14956643 2656 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2657 entries = dst_entries_get_slow(ops);
2658 if (entries < ops->gc_thresh)
7019b78e 2659 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2660out:
7019b78e 2661 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2662 return entries > rt_max_size;
1da177e4
LT
2663}
2664
d4ead6b3
DA
2665static int ip6_convert_metrics(struct net *net, struct rt6_info *rt,
2666 struct fib6_config *cfg)
e715b6d3 2667{
d4ead6b3 2668 int err = 0;
e715b6d3 2669
d4ead6b3
DA
2670 if (cfg->fc_mx) {
2671 rt->fib6_metrics = kzalloc(sizeof(*rt->fib6_metrics),
2672 GFP_KERNEL);
2673 if (unlikely(!rt->fib6_metrics))
2674 return -ENOMEM;
ea697639 2675
d4ead6b3 2676 refcount_set(&rt->fib6_metrics->refcnt, 1);
e715b6d3 2677
d4ead6b3
DA
2678 err = ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len,
2679 rt->fib6_metrics->metrics);
c3a8d947 2680 }
e715b6d3 2681
d4ead6b3 2682 return err;
e715b6d3 2683}
1da177e4 2684
8c14586f
DA
2685static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2686 struct fib6_config *cfg,
f4797b33
DA
2687 const struct in6_addr *gw_addr,
2688 u32 tbid, int flags)
8c14586f
DA
2689{
2690 struct flowi6 fl6 = {
2691 .flowi6_oif = cfg->fc_ifindex,
2692 .daddr = *gw_addr,
2693 .saddr = cfg->fc_prefsrc,
2694 };
2695 struct fib6_table *table;
2696 struct rt6_info *rt;
8c14586f 2697
f4797b33 2698 table = fib6_get_table(net, tbid);
8c14586f
DA
2699 if (!table)
2700 return NULL;
2701
2702 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2703 flags |= RT6_LOOKUP_F_HAS_SADDR;
2704
f4797b33 2705 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2706 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2707
2708 /* if table lookup failed, fall back to full lookup */
2709 if (rt == net->ipv6.ip6_null_entry) {
2710 ip6_rt_put(rt);
2711 rt = NULL;
2712 }
2713
2714 return rt;
2715}
2716
fc1e64e1
DA
2717static int ip6_route_check_nh_onlink(struct net *net,
2718 struct fib6_config *cfg,
9fbb704c 2719 const struct net_device *dev,
fc1e64e1
DA
2720 struct netlink_ext_ack *extack)
2721{
44750f84 2722 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2723 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2724 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2725 struct rt6_info *grt;
2726 int err;
2727
2728 err = 0;
2729 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2730 if (grt) {
58e354c0
DA
2731 if (!grt->dst.error &&
2732 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2733 NL_SET_ERR_MSG(extack,
2734 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2735 err = -EINVAL;
2736 }
2737
2738 ip6_rt_put(grt);
2739 }
2740
2741 return err;
2742}
2743
1edce99f
DA
2744static int ip6_route_check_nh(struct net *net,
2745 struct fib6_config *cfg,
2746 struct net_device **_dev,
2747 struct inet6_dev **idev)
2748{
2749 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2750 struct net_device *dev = _dev ? *_dev : NULL;
2751 struct rt6_info *grt = NULL;
2752 int err = -EHOSTUNREACH;
2753
2754 if (cfg->fc_table) {
f4797b33
DA
2755 int flags = RT6_LOOKUP_F_IFACE;
2756
2757 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2758 cfg->fc_table, flags);
1edce99f
DA
2759 if (grt) {
2760 if (grt->rt6i_flags & RTF_GATEWAY ||
2761 (dev && dev != grt->dst.dev)) {
2762 ip6_rt_put(grt);
2763 grt = NULL;
2764 }
2765 }
2766 }
2767
2768 if (!grt)
b75cc8f9 2769 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2770
2771 if (!grt)
2772 goto out;
2773
2774 if (dev) {
2775 if (dev != grt->dst.dev) {
2776 ip6_rt_put(grt);
2777 goto out;
2778 }
2779 } else {
2780 *_dev = dev = grt->dst.dev;
2781 *idev = grt->rt6i_idev;
2782 dev_hold(dev);
2783 in6_dev_hold(grt->rt6i_idev);
2784 }
2785
2786 if (!(grt->rt6i_flags & RTF_GATEWAY))
2787 err = 0;
2788
2789 ip6_rt_put(grt);
2790
2791out:
2792 return err;
2793}
2794
9fbb704c
DA
2795static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2796 struct net_device **_dev, struct inet6_dev **idev,
2797 struct netlink_ext_ack *extack)
2798{
2799 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2800 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2801 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2802 const struct net_device *dev = *_dev;
232378e8 2803 bool need_addr_check = !dev;
9fbb704c
DA
2804 int err = -EINVAL;
2805
2806 /* if gw_addr is local we will fail to detect this in case
2807 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2808 * will return already-added prefix route via interface that
2809 * prefix route was assigned to, which might be non-loopback.
2810 */
232378e8
DA
2811 if (dev &&
2812 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2813 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2814 goto out;
2815 }
2816
2817 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2818 /* IPv6 strictly inhibits using not link-local
2819 * addresses as nexthop address.
2820 * Otherwise, router will not able to send redirects.
2821 * It is very good, but in some (rare!) circumstances
2822 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2823 * some exceptions. --ANK
2824 * We allow IPv4-mapped nexthops to support RFC4798-type
2825 * addressing
2826 */
2827 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2828 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2829 goto out;
2830 }
2831
2832 if (cfg->fc_flags & RTNH_F_ONLINK)
2833 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2834 else
2835 err = ip6_route_check_nh(net, cfg, _dev, idev);
2836
2837 if (err)
2838 goto out;
2839 }
2840
2841 /* reload in case device was changed */
2842 dev = *_dev;
2843
2844 err = -EINVAL;
2845 if (!dev) {
2846 NL_SET_ERR_MSG(extack, "Egress device not specified");
2847 goto out;
2848 } else if (dev->flags & IFF_LOOPBACK) {
2849 NL_SET_ERR_MSG(extack,
2850 "Egress device can not be loopback device for this route");
2851 goto out;
2852 }
232378e8
DA
2853
2854 /* if we did not check gw_addr above, do so now that the
2855 * egress device has been resolved.
2856 */
2857 if (need_addr_check &&
2858 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2859 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2860 goto out;
2861 }
2862
9fbb704c
DA
2863 err = 0;
2864out:
2865 return err;
2866}
2867
333c4301
DA
2868static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2869 struct netlink_ext_ack *extack)
1da177e4 2870{
5578689a 2871 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
2872 struct rt6_info *rt = NULL;
2873 struct net_device *dev = NULL;
2874 struct inet6_dev *idev = NULL;
c71099ac 2875 struct fib6_table *table;
1da177e4 2876 int addr_type;
8c5b83f0 2877 int err = -EINVAL;
1da177e4 2878
557c44be 2879 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2880 if (cfg->fc_flags & RTF_PCPU) {
2881 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2882 goto out;
d5d531cb 2883 }
557c44be 2884
2ea2352e
WW
2885 /* RTF_CACHE is an internal flag; can not be set by userspace */
2886 if (cfg->fc_flags & RTF_CACHE) {
2887 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2888 goto out;
2889 }
2890
e8478e80
DA
2891 if (cfg->fc_type > RTN_MAX) {
2892 NL_SET_ERR_MSG(extack, "Invalid route type");
2893 goto out;
2894 }
2895
d5d531cb
DA
2896 if (cfg->fc_dst_len > 128) {
2897 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2898 goto out;
2899 }
2900 if (cfg->fc_src_len > 128) {
2901 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2902 goto out;
d5d531cb 2903 }
1da177e4 2904#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2905 if (cfg->fc_src_len) {
2906 NL_SET_ERR_MSG(extack,
2907 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2908 goto out;
d5d531cb 2909 }
1da177e4 2910#endif
86872cb5 2911 if (cfg->fc_ifindex) {
1da177e4 2912 err = -ENODEV;
5578689a 2913 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2914 if (!dev)
2915 goto out;
2916 idev = in6_dev_get(dev);
2917 if (!idev)
2918 goto out;
2919 }
2920
86872cb5
TG
2921 if (cfg->fc_metric == 0)
2922 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2923
fc1e64e1
DA
2924 if (cfg->fc_flags & RTNH_F_ONLINK) {
2925 if (!dev) {
2926 NL_SET_ERR_MSG(extack,
2927 "Nexthop device required for onlink");
2928 err = -ENODEV;
2929 goto out;
2930 }
2931
2932 if (!(dev->flags & IFF_UP)) {
2933 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2934 err = -ENETDOWN;
2935 goto out;
2936 }
2937 }
2938
d71314b4 2939 err = -ENOBUFS;
38308473
DM
2940 if (cfg->fc_nlinfo.nlh &&
2941 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2942 table = fib6_get_table(net, cfg->fc_table);
38308473 2943 if (!table) {
f3213831 2944 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2945 table = fib6_new_table(net, cfg->fc_table);
2946 }
2947 } else {
2948 table = fib6_new_table(net, cfg->fc_table);
2949 }
38308473
DM
2950
2951 if (!table)
c71099ac 2952 goto out;
c71099ac 2953
ad706862
MKL
2954 rt = ip6_dst_alloc(net, NULL,
2955 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 2956
38308473 2957 if (!rt) {
1da177e4
LT
2958 err = -ENOMEM;
2959 goto out;
2960 }
2961
d4ead6b3
DA
2962 err = ip6_convert_metrics(net, rt, cfg);
2963 if (err < 0)
2964 goto out;
2965
1716a961 2966 if (cfg->fc_flags & RTF_EXPIRES)
14895687 2967 fib6_set_expires(rt, jiffies +
1716a961
G
2968 clock_t_to_jiffies(cfg->fc_expires));
2969 else
14895687 2970 fib6_clean_expires(rt);
1da177e4 2971
86872cb5
TG
2972 if (cfg->fc_protocol == RTPROT_UNSPEC)
2973 cfg->fc_protocol = RTPROT_BOOT;
2974 rt->rt6i_protocol = cfg->fc_protocol;
2975
2976 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4 2977
19e42e45
RP
2978 if (cfg->fc_encap) {
2979 struct lwtunnel_state *lwtstate;
2980
30357d7d 2981 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2982 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2983 &lwtstate, extack);
19e42e45
RP
2984 if (err)
2985 goto out;
5e670d84 2986 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
19e42e45
RP
2987 }
2988
86872cb5
TG
2989 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2990 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 2991 if (rt->rt6i_dst.plen == 128)
3b6761d1 2992 rt->dst_host = true;
e5fd387a 2993
1da177e4 2994#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
2995 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2996 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
2997#endif
2998
86872cb5 2999 rt->rt6i_metric = cfg->fc_metric;
5e670d84 3000 rt->fib6_nh.nh_weight = 1;
1da177e4 3001
e8478e80
DA
3002 rt->fib6_type = cfg->fc_type;
3003
1da177e4
LT
3004 /* We cannot add true routes via loopback here,
3005 they would result in kernel looping; promote them to reject routes
3006 */
86872cb5 3007 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
3008 (dev && (dev->flags & IFF_LOOPBACK) &&
3009 !(addr_type & IPV6_ADDR_LOOPBACK) &&
3010 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 3011 /* hold loopback dev/idev if we haven't done so. */
5578689a 3012 if (dev != net->loopback_dev) {
1da177e4
LT
3013 if (dev) {
3014 dev_put(dev);
3015 in6_dev_put(idev);
3016 }
5578689a 3017 dev = net->loopback_dev;
1da177e4
LT
3018 dev_hold(dev);
3019 idev = in6_dev_get(dev);
3020 if (!idev) {
3021 err = -ENODEV;
3022 goto out;
3023 }
3024 }
1da177e4
LT
3025 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
3026 goto install_route;
3027 }
3028
86872cb5 3029 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
3030 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3031 if (err)
48ed7b26 3032 goto out;
1da177e4 3033
5e670d84 3034 rt->fib6_nh.nh_gw = rt->rt6i_gateway = cfg->fc_gateway;
1da177e4
LT
3035 }
3036
3037 err = -ENODEV;
38308473 3038 if (!dev)
1da177e4
LT
3039 goto out;
3040
428604fb
LB
3041 if (idev->cnf.disable_ipv6) {
3042 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3043 err = -EACCES;
3044 goto out;
3045 }
3046
955ec4cb
DA
3047 if (!(dev->flags & IFF_UP)) {
3048 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3049 err = -ENETDOWN;
3050 goto out;
3051 }
3052
c3968a85
DW
3053 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3054 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3055 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3056 err = -EINVAL;
3057 goto out;
3058 }
4e3fd7a0 3059 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
3060 rt->rt6i_prefsrc.plen = 128;
3061 } else
3062 rt->rt6i_prefsrc.plen = 0;
3063
86872cb5 3064 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
3065
3066install_route:
5609b80a
IS
3067 if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3068 !netif_carrier_ok(dev))
5e670d84
DA
3069 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3070 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
3071 rt->fib6_nh.nh_dev = rt->dst.dev = dev;
1da177e4 3072 rt->rt6i_idev = idev;
c71099ac 3073 rt->rt6i_table = table;
63152fc0 3074
c346dca1 3075 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 3076
8c5b83f0 3077 return rt;
6b9ea5a6
RP
3078out:
3079 if (dev)
3080 dev_put(dev);
3081 if (idev)
3082 in6_dev_put(idev);
587fea74
WW
3083 if (rt)
3084 dst_release_immediate(&rt->dst);
6b9ea5a6 3085
8c5b83f0 3086 return ERR_PTR(err);
6b9ea5a6
RP
3087}
3088
d4ead6b3 3089int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack)
6b9ea5a6 3090{
8c5b83f0 3091 struct rt6_info *rt;
6b9ea5a6
RP
3092 int err;
3093
333c4301 3094 rt = ip6_route_info_create(cfg, extack);
d4ead6b3
DA
3095 if (IS_ERR(rt))
3096 return PTR_ERR(rt);
6b9ea5a6 3097
d4ead6b3 3098 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
6b9ea5a6 3099
1da177e4
LT
3100 return err;
3101}
3102
86872cb5 3103static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4 3104{
afb1d4b5 3105 struct net *net = info->nl_net;
c71099ac 3106 struct fib6_table *table;
afb1d4b5 3107 int err;
1da177e4 3108
421842ed 3109 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3110 err = -ENOENT;
3111 goto out;
3112 }
6c813a72 3113
c71099ac 3114 table = rt->rt6i_table;
66f5d6ce 3115 spin_lock_bh(&table->tb6_lock);
86872cb5 3116 err = fib6_del(rt, info);
66f5d6ce 3117 spin_unlock_bh(&table->tb6_lock);
1da177e4 3118
6825a26c 3119out:
94e187c0 3120 ip6_rt_put(rt);
1da177e4
LT
3121 return err;
3122}
3123
afb1d4b5 3124int ip6_del_rt(struct net *net, struct rt6_info *rt)
e0a1ad73 3125{
afb1d4b5
DA
3126 struct nl_info info = { .nl_net = net };
3127
528c4ceb 3128 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3129}
3130
0ae81335
DA
3131static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
3132{
3133 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3134 struct net *net = info->nl_net;
16a16cd3 3135 struct sk_buff *skb = NULL;
0ae81335 3136 struct fib6_table *table;
e3330039 3137 int err = -ENOENT;
0ae81335 3138
421842ed 3139 if (rt == net->ipv6.fib6_null_entry)
e3330039 3140 goto out_put;
0ae81335 3141 table = rt->rt6i_table;
66f5d6ce 3142 spin_lock_bh(&table->tb6_lock);
0ae81335
DA
3143
3144 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
3145 struct rt6_info *sibling, *next_sibling;
3146
16a16cd3
DA
3147 /* prefer to send a single notification with all hops */
3148 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3149 if (skb) {
3150 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3151
d4ead6b3 3152 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3153 NULL, NULL, 0, RTM_DELROUTE,
3154 info->portid, seq, 0) < 0) {
3155 kfree_skb(skb);
3156 skb = NULL;
3157 } else
3158 info->skip_notify = 1;
3159 }
3160
0ae81335
DA
3161 list_for_each_entry_safe(sibling, next_sibling,
3162 &rt->rt6i_siblings,
3163 rt6i_siblings) {
3164 err = fib6_del(sibling, info);
3165 if (err)
e3330039 3166 goto out_unlock;
0ae81335
DA
3167 }
3168 }
3169
3170 err = fib6_del(rt, info);
e3330039 3171out_unlock:
66f5d6ce 3172 spin_unlock_bh(&table->tb6_lock);
e3330039 3173out_put:
0ae81335 3174 ip6_rt_put(rt);
16a16cd3
DA
3175
3176 if (skb) {
e3330039 3177 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3178 info->nlh, gfp_any());
3179 }
0ae81335
DA
3180 return err;
3181}
3182
333c4301
DA
3183static int ip6_route_del(struct fib6_config *cfg,
3184 struct netlink_ext_ack *extack)
1da177e4 3185{
2b760fcf 3186 struct rt6_info *rt, *rt_cache;
c71099ac 3187 struct fib6_table *table;
1da177e4 3188 struct fib6_node *fn;
1da177e4
LT
3189 int err = -ESRCH;
3190
5578689a 3191 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3192 if (!table) {
3193 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3194 return err;
d5d531cb 3195 }
c71099ac 3196
66f5d6ce 3197 rcu_read_lock();
1da177e4 3198
c71099ac 3199 fn = fib6_locate(&table->tb6_root,
86872cb5 3200 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3201 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3202 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3203
1da177e4 3204 if (fn) {
66f5d6ce 3205 for_each_fib6_node_rt_rcu(fn) {
2b760fcf
WW
3206 if (cfg->fc_flags & RTF_CACHE) {
3207 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3208 &cfg->fc_src);
3209 if (!rt_cache)
3210 continue;
3211 rt = rt_cache;
3212 }
86872cb5 3213 if (cfg->fc_ifindex &&
5e670d84
DA
3214 (!rt->fib6_nh.nh_dev ||
3215 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3216 continue;
86872cb5 3217 if (cfg->fc_flags & RTF_GATEWAY &&
5e670d84 3218 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
1da177e4 3219 continue;
86872cb5 3220 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 3221 continue;
c2ed1880
M
3222 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
3223 continue;
d3843fe5
WW
3224 if (!dst_hold_safe(&rt->dst))
3225 break;
66f5d6ce 3226 rcu_read_unlock();
1da177e4 3227
0ae81335
DA
3228 /* if gateway was specified only delete the one hop */
3229 if (cfg->fc_flags & RTF_GATEWAY)
3230 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3231
3232 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3233 }
3234 }
66f5d6ce 3235 rcu_read_unlock();
1da177e4
LT
3236
3237 return err;
3238}
3239
6700c270 3240static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3241{
a6279458 3242 struct netevent_redirect netevent;
e8599ff4 3243 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3244 struct ndisc_options ndopts;
3245 struct inet6_dev *in6_dev;
3246 struct neighbour *neigh;
71bcdba0 3247 struct rd_msg *msg;
6e157b6a
DM
3248 int optlen, on_link;
3249 u8 *lladdr;
e8599ff4 3250
29a3cad5 3251 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3252 optlen -= sizeof(*msg);
e8599ff4
DM
3253
3254 if (optlen < 0) {
6e157b6a 3255 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3256 return;
3257 }
3258
71bcdba0 3259 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3260
71bcdba0 3261 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3262 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3263 return;
3264 }
3265
6e157b6a 3266 on_link = 0;
71bcdba0 3267 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3268 on_link = 1;
71bcdba0 3269 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3270 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3271 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3272 return;
3273 }
3274
3275 in6_dev = __in6_dev_get(skb->dev);
3276 if (!in6_dev)
3277 return;
3278 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3279 return;
3280
3281 /* RFC2461 8.1:
3282 * The IP source address of the Redirect MUST be the same as the current
3283 * first-hop router for the specified ICMP Destination Address.
3284 */
3285
f997c55c 3286 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3287 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3288 return;
3289 }
6e157b6a
DM
3290
3291 lladdr = NULL;
e8599ff4
DM
3292 if (ndopts.nd_opts_tgt_lladdr) {
3293 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3294 skb->dev);
3295 if (!lladdr) {
3296 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3297 return;
3298 }
3299 }
3300
6e157b6a 3301 rt = (struct rt6_info *) dst;
ec13ad1d 3302 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3303 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3304 return;
6e157b6a 3305 }
e8599ff4 3306
6e157b6a
DM
3307 /* Redirect received -> path was valid.
3308 * Look, redirects are sent only in response to data packets,
3309 * so that this nexthop apparently is reachable. --ANK
3310 */
0dec879f 3311 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3312
71bcdba0 3313 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3314 if (!neigh)
3315 return;
a6279458 3316
1da177e4
LT
3317 /*
3318 * We have finally decided to accept it.
3319 */
3320
f997c55c 3321 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3322 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3323 NEIGH_UPDATE_F_OVERRIDE|
3324 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3325 NEIGH_UPDATE_F_ISROUTER)),
3326 NDISC_REDIRECT, &ndopts);
1da177e4 3327
83a09abd 3328 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 3329 if (!nrt)
1da177e4
LT
3330 goto out;
3331
3332 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3333 if (on_link)
3334 nrt->rt6i_flags &= ~RTF_GATEWAY;
3335
b91d5329 3336 nrt->rt6i_protocol = RTPROT_REDIRECT;
4e3fd7a0 3337 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3338
2b760fcf
WW
3339 /* No need to remove rt from the exception table if rt is
3340 * a cached route because rt6_insert_exception() will
3341 * takes care of it
3342 */
d4ead6b3 3343 if (rt6_insert_exception(nrt, rt->from)) {
2b760fcf
WW
3344 dst_release_immediate(&nrt->dst);
3345 goto out;
3346 }
1da177e4 3347
d8d1f30b
CG
3348 netevent.old = &rt->dst;
3349 netevent.new = &nrt->dst;
71bcdba0 3350 netevent.daddr = &msg->dest;
60592833 3351 netevent.neigh = neigh;
8d71740c
TT
3352 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3353
1da177e4 3354out:
e8599ff4 3355 neigh_release(neigh);
6e157b6a
DM
3356}
3357
70ceb4f5 3358#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 3359static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 3360 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3361 const struct in6_addr *gwaddr,
3362 struct net_device *dev)
70ceb4f5 3363{
830218c1
DA
3364 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3365 int ifindex = dev->ifindex;
70ceb4f5
YH
3366 struct fib6_node *fn;
3367 struct rt6_info *rt = NULL;
c71099ac
TG
3368 struct fib6_table *table;
3369
830218c1 3370 table = fib6_get_table(net, tb_id);
38308473 3371 if (!table)
c71099ac 3372 return NULL;
70ceb4f5 3373
66f5d6ce 3374 rcu_read_lock();
38fbeeee 3375 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3376 if (!fn)
3377 goto out;
3378
66f5d6ce 3379 for_each_fib6_node_rt_rcu(fn) {
5e670d84 3380 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
70ceb4f5
YH
3381 continue;
3382 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3383 continue;
5e670d84 3384 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
70ceb4f5 3385 continue;
d3843fe5 3386 ip6_hold_safe(NULL, &rt, false);
70ceb4f5
YH
3387 break;
3388 }
3389out:
66f5d6ce 3390 rcu_read_unlock();
70ceb4f5
YH
3391 return rt;
3392}
3393
efa2cea0 3394static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 3395 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3396 const struct in6_addr *gwaddr,
3397 struct net_device *dev,
95c96174 3398 unsigned int pref)
70ceb4f5 3399{
86872cb5 3400 struct fib6_config cfg = {
238fc7ea 3401 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3402 .fc_ifindex = dev->ifindex,
86872cb5
TG
3403 .fc_dst_len = prefixlen,
3404 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3405 RTF_UP | RTF_PREF(pref),
b91d5329 3406 .fc_protocol = RTPROT_RA,
e8478e80 3407 .fc_type = RTN_UNICAST,
15e47304 3408 .fc_nlinfo.portid = 0,
efa2cea0
DL
3409 .fc_nlinfo.nlh = NULL,
3410 .fc_nlinfo.nl_net = net,
86872cb5
TG
3411 };
3412
830218c1 3413 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3414 cfg.fc_dst = *prefix;
3415 cfg.fc_gateway = *gwaddr;
70ceb4f5 3416
e317da96
YH
3417 /* We should treat it as a default route if prefix length is 0. */
3418 if (!prefixlen)
86872cb5 3419 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3420
333c4301 3421 ip6_route_add(&cfg, NULL);
70ceb4f5 3422
830218c1 3423 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3424}
3425#endif
3426
afb1d4b5
DA
3427struct rt6_info *rt6_get_dflt_router(struct net *net,
3428 const struct in6_addr *addr,
3429 struct net_device *dev)
1ab1457c 3430{
830218c1 3431 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 3432 struct rt6_info *rt;
c71099ac 3433 struct fib6_table *table;
1da177e4 3434
afb1d4b5 3435 table = fib6_get_table(net, tb_id);
38308473 3436 if (!table)
c71099ac 3437 return NULL;
1da177e4 3438
66f5d6ce
WW
3439 rcu_read_lock();
3440 for_each_fib6_node_rt_rcu(&table->tb6_root) {
5e670d84 3441 if (dev == rt->fib6_nh.nh_dev &&
045927ff 3442 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
5e670d84 3443 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
1da177e4
LT
3444 break;
3445 }
3446 if (rt)
d3843fe5 3447 ip6_hold_safe(NULL, &rt, false);
66f5d6ce 3448 rcu_read_unlock();
1da177e4
LT
3449 return rt;
3450}
3451
afb1d4b5
DA
3452struct rt6_info *rt6_add_dflt_router(struct net *net,
3453 const struct in6_addr *gwaddr,
ebacaaa0
YH
3454 struct net_device *dev,
3455 unsigned int pref)
1da177e4 3456{
86872cb5 3457 struct fib6_config cfg = {
ca254490 3458 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3459 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3460 .fc_ifindex = dev->ifindex,
3461 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3462 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3463 .fc_protocol = RTPROT_RA,
e8478e80 3464 .fc_type = RTN_UNICAST,
15e47304 3465 .fc_nlinfo.portid = 0,
5578689a 3466 .fc_nlinfo.nlh = NULL,
afb1d4b5 3467 .fc_nlinfo.nl_net = net,
86872cb5 3468 };
1da177e4 3469
4e3fd7a0 3470 cfg.fc_gateway = *gwaddr;
1da177e4 3471
333c4301 3472 if (!ip6_route_add(&cfg, NULL)) {
830218c1
DA
3473 struct fib6_table *table;
3474
3475 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3476 if (table)
3477 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3478 }
1da177e4 3479
afb1d4b5 3480 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3481}
3482
afb1d4b5
DA
3483static void __rt6_purge_dflt_routers(struct net *net,
3484 struct fib6_table *table)
1da177e4
LT
3485{
3486 struct rt6_info *rt;
3487
3488restart:
66f5d6ce
WW
3489 rcu_read_lock();
3490 for_each_fib6_node_rt_rcu(&table->tb6_root) {
3e8b0ac3
LC
3491 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3492 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d3843fe5 3493 if (dst_hold_safe(&rt->dst)) {
66f5d6ce 3494 rcu_read_unlock();
afb1d4b5 3495 ip6_del_rt(net, rt);
d3843fe5 3496 } else {
66f5d6ce 3497 rcu_read_unlock();
d3843fe5 3498 }
1da177e4
LT
3499 goto restart;
3500 }
3501 }
66f5d6ce 3502 rcu_read_unlock();
830218c1
DA
3503
3504 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3505}
3506
3507void rt6_purge_dflt_routers(struct net *net)
3508{
3509 struct fib6_table *table;
3510 struct hlist_head *head;
3511 unsigned int h;
3512
3513 rcu_read_lock();
3514
3515 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3516 head = &net->ipv6.fib_table_hash[h];
3517 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3518 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3519 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3520 }
3521 }
3522
3523 rcu_read_unlock();
1da177e4
LT
3524}
3525
5578689a
DL
3526static void rtmsg_to_fib6_config(struct net *net,
3527 struct in6_rtmsg *rtmsg,
86872cb5
TG
3528 struct fib6_config *cfg)
3529{
3530 memset(cfg, 0, sizeof(*cfg));
3531
ca254490
DA
3532 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3533 : RT6_TABLE_MAIN;
86872cb5
TG
3534 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3535 cfg->fc_metric = rtmsg->rtmsg_metric;
3536 cfg->fc_expires = rtmsg->rtmsg_info;
3537 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3538 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3539 cfg->fc_flags = rtmsg->rtmsg_flags;
e8478e80 3540 cfg->fc_type = rtmsg->rtmsg_type;
86872cb5 3541
5578689a 3542 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3543
4e3fd7a0
AD
3544 cfg->fc_dst = rtmsg->rtmsg_dst;
3545 cfg->fc_src = rtmsg->rtmsg_src;
3546 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3547}
3548
5578689a 3549int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3550{
86872cb5 3551 struct fib6_config cfg;
1da177e4
LT
3552 struct in6_rtmsg rtmsg;
3553 int err;
3554
67ba4152 3555 switch (cmd) {
1da177e4
LT
3556 case SIOCADDRT: /* Add a route */
3557 case SIOCDELRT: /* Delete a route */
af31f412 3558 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3559 return -EPERM;
3560 err = copy_from_user(&rtmsg, arg,
3561 sizeof(struct in6_rtmsg));
3562 if (err)
3563 return -EFAULT;
86872cb5 3564
5578689a 3565 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3566
1da177e4
LT
3567 rtnl_lock();
3568 switch (cmd) {
3569 case SIOCADDRT:
333c4301 3570 err = ip6_route_add(&cfg, NULL);
1da177e4
LT
3571 break;
3572 case SIOCDELRT:
333c4301 3573 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3574 break;
3575 default:
3576 err = -EINVAL;
3577 }
3578 rtnl_unlock();
3579
3580 return err;
3ff50b79 3581 }
1da177e4
LT
3582
3583 return -EINVAL;
3584}
3585
3586/*
3587 * Drop the packet on the floor
3588 */
3589
d5fdd6ba 3590static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3591{
612f09e8 3592 int type;
adf30907 3593 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3594 switch (ipstats_mib_noroutes) {
3595 case IPSTATS_MIB_INNOROUTES:
0660e03f 3596 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3597 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3598 IP6_INC_STATS(dev_net(dst->dev),
3599 __in6_dev_get_safely(skb->dev),
3bd653c8 3600 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3601 break;
3602 }
3603 /* FALLTHROUGH */
3604 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3605 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3606 ipstats_mib_noroutes);
612f09e8
YH
3607 break;
3608 }
3ffe533c 3609 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3610 kfree_skb(skb);
3611 return 0;
3612}
3613
9ce8ade0
TG
3614static int ip6_pkt_discard(struct sk_buff *skb)
3615{
612f09e8 3616 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3617}
3618
ede2059d 3619static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3620{
adf30907 3621 skb->dev = skb_dst(skb)->dev;
612f09e8 3622 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3623}
3624
9ce8ade0
TG
3625static int ip6_pkt_prohibit(struct sk_buff *skb)
3626{
612f09e8 3627 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3628}
3629
ede2059d 3630static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3631{
adf30907 3632 skb->dev = skb_dst(skb)->dev;
612f09e8 3633 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3634}
3635
1da177e4
LT
3636/*
3637 * Allocate a dst for local (unicast / anycast) address.
3638 */
3639
afb1d4b5
DA
3640struct rt6_info *addrconf_dst_alloc(struct net *net,
3641 struct inet6_dev *idev,
1da177e4 3642 const struct in6_addr *addr,
8f031519 3643 bool anycast)
1da177e4 3644{
ca254490 3645 u32 tb_id;
4832c30d 3646 struct net_device *dev = idev->dev;
5f02ce24
DA
3647 struct rt6_info *rt;
3648
5f02ce24 3649 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 3650 if (!rt)
1da177e4
LT
3651 return ERR_PTR(-ENOMEM);
3652
3b6761d1
DA
3653 rt->dst_nocount = true;
3654
1da177e4 3655 in6_dev_hold(idev);
1da177e4 3656 rt->rt6i_idev = idev;
1da177e4 3657
3b6761d1 3658 rt->dst_host = true;
94b5e0f9 3659 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 3660 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
e8478e80
DA
3661 if (anycast) {
3662 rt->fib6_type = RTN_ANYCAST;
58c4fb86 3663 rt->rt6i_flags |= RTF_ANYCAST;
e8478e80
DA
3664 } else {
3665 rt->fib6_type = RTN_LOCAL;
1da177e4 3666 rt->rt6i_flags |= RTF_LOCAL;
e8478e80 3667 }
1da177e4 3668
5e670d84
DA
3669 rt->fib6_nh.nh_gw = *addr;
3670 rt->fib6_nh.nh_dev = dev;
550bab42 3671 rt->rt6i_gateway = *addr;
4e3fd7a0 3672 rt->rt6i_dst.addr = *addr;
1da177e4 3673 rt->rt6i_dst.plen = 128;
ca254490
DA
3674 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3675 rt->rt6i_table = fib6_get_table(net, tb_id);
1da177e4 3676
1da177e4
LT
3677 return rt;
3678}
3679
c3968a85
DW
3680/* remove deleted ip from prefsrc entries */
3681struct arg_dev_net_ip {
3682 struct net_device *dev;
3683 struct net *net;
3684 struct in6_addr *addr;
3685};
3686
3687static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3688{
3689 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3690 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3691 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3692
5e670d84 3693 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
421842ed 3694 rt != net->ipv6.fib6_null_entry &&
c3968a85 3695 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
60006a48 3696 spin_lock_bh(&rt6_exception_lock);
c3968a85
DW
3697 /* remove prefsrc entry */
3698 rt->rt6i_prefsrc.plen = 0;
60006a48
WW
3699 /* need to update cache as well */
3700 rt6_exceptions_remove_prefsrc(rt);
3701 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3702 }
3703 return 0;
3704}
3705
3706void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3707{
3708 struct net *net = dev_net(ifp->idev->dev);
3709 struct arg_dev_net_ip adni = {
3710 .dev = ifp->idev->dev,
3711 .net = net,
3712 .addr = &ifp->addr,
3713 };
0c3584d5 3714 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3715}
3716
be7a010d 3717#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3718
3719/* Remove routers and update dst entries when gateway turn into host. */
3720static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3721{
3722 struct in6_addr *gateway = (struct in6_addr *)arg;
3723
2b760fcf 3724 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
5e670d84 3725 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
be7a010d
DJ
3726 return -1;
3727 }
b16cb459
WW
3728
3729 /* Further clean up cached routes in exception table.
3730 * This is needed because cached route may have a different
3731 * gateway than its 'parent' in the case of an ip redirect.
3732 */
3733 rt6_exceptions_clean_tohost(rt, gateway);
3734
be7a010d
DJ
3735 return 0;
3736}
3737
3738void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3739{
3740 fib6_clean_all(net, fib6_clean_tohost, gateway);
3741}
3742
2127d95a
IS
3743struct arg_netdev_event {
3744 const struct net_device *dev;
4c981e28
IS
3745 union {
3746 unsigned int nh_flags;
3747 unsigned long event;
3748 };
2127d95a
IS
3749};
3750
d7dedee1
IS
3751static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
3752{
3753 struct rt6_info *iter;
3754 struct fib6_node *fn;
3755
3756 fn = rcu_dereference_protected(rt->rt6i_node,
3757 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3758 iter = rcu_dereference_protected(fn->leaf,
3759 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3760 while (iter) {
3761 if (iter->rt6i_metric == rt->rt6i_metric &&
3762 rt6_qualify_for_ecmp(iter))
3763 return iter;
3764 iter = rcu_dereference_protected(iter->rt6_next,
3765 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3766 }
3767
3768 return NULL;
3769}
3770
3771static bool rt6_is_dead(const struct rt6_info *rt)
3772{
5e670d84
DA
3773 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3774 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d7dedee1
IS
3775 rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3776 return true;
3777
3778 return false;
3779}
3780
3781static int rt6_multipath_total_weight(const struct rt6_info *rt)
3782{
3783 struct rt6_info *iter;
3784 int total = 0;
3785
3786 if (!rt6_is_dead(rt))
5e670d84 3787 total += rt->fib6_nh.nh_weight;
d7dedee1
IS
3788
3789 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
3790 if (!rt6_is_dead(iter))
5e670d84 3791 total += iter->fib6_nh.nh_weight;
d7dedee1
IS
3792 }
3793
3794 return total;
3795}
3796
3797static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
3798{
3799 int upper_bound = -1;
3800
3801 if (!rt6_is_dead(rt)) {
5e670d84 3802 *weight += rt->fib6_nh.nh_weight;
d7dedee1
IS
3803 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3804 total) - 1;
3805 }
5e670d84 3806 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
d7dedee1
IS
3807}
3808
3809static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
3810{
3811 struct rt6_info *iter;
3812 int weight = 0;
3813
3814 rt6_upper_bound_set(rt, &weight, total);
3815
3816 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3817 rt6_upper_bound_set(iter, &weight, total);
3818}
3819
3820void rt6_multipath_rebalance(struct rt6_info *rt)
3821{
3822 struct rt6_info *first;
3823 int total;
3824
3825 /* In case the entire multipath route was marked for flushing,
3826 * then there is no need to rebalance upon the removal of every
3827 * sibling route.
3828 */
3829 if (!rt->rt6i_nsiblings || rt->should_flush)
3830 return;
3831
3832 /* During lookup routes are evaluated in order, so we need to
3833 * make sure upper bounds are assigned from the first sibling
3834 * onwards.
3835 */
3836 first = rt6_multipath_first_sibling(rt);
3837 if (WARN_ON_ONCE(!first))
3838 return;
3839
3840 total = rt6_multipath_total_weight(first);
3841 rt6_multipath_upper_bound_set(first, total);
3842}
3843
2127d95a
IS
3844static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3845{
3846 const struct arg_netdev_event *arg = p_arg;
7aef6859 3847 struct net *net = dev_net(arg->dev);
2127d95a 3848
421842ed 3849 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
5e670d84 3850 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
7aef6859 3851 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3852 rt6_multipath_rebalance(rt);
1de178ed 3853 }
2127d95a
IS
3854
3855 return 0;
3856}
3857
3858void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3859{
3860 struct arg_netdev_event arg = {
3861 .dev = dev,
6802f3ad
IS
3862 {
3863 .nh_flags = nh_flags,
3864 },
2127d95a
IS
3865 };
3866
3867 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3868 arg.nh_flags |= RTNH_F_LINKDOWN;
3869
3870 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3871}
3872
1de178ed
IS
3873static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
3874 const struct net_device *dev)
3875{
3876 struct rt6_info *iter;
3877
5e670d84 3878 if (rt->fib6_nh.nh_dev == dev)
1de178ed
IS
3879 return true;
3880 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
5e670d84 3881 if (iter->fib6_nh.nh_dev == dev)
1de178ed
IS
3882 return true;
3883
3884 return false;
3885}
3886
3887static void rt6_multipath_flush(struct rt6_info *rt)
3888{
3889 struct rt6_info *iter;
3890
3891 rt->should_flush = 1;
3892 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3893 iter->should_flush = 1;
3894}
3895
3896static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
3897 const struct net_device *down_dev)
3898{
3899 struct rt6_info *iter;
3900 unsigned int dead = 0;
3901
5e670d84
DA
3902 if (rt->fib6_nh.nh_dev == down_dev ||
3903 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
3904 dead++;
3905 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
5e670d84
DA
3906 if (iter->fib6_nh.nh_dev == down_dev ||
3907 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
3908 dead++;
3909
3910 return dead;
3911}
3912
3913static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
3914 const struct net_device *dev,
3915 unsigned int nh_flags)
3916{
3917 struct rt6_info *iter;
3918
5e670d84
DA
3919 if (rt->fib6_nh.nh_dev == dev)
3920 rt->fib6_nh.nh_flags |= nh_flags;
1de178ed 3921 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
5e670d84
DA
3922 if (iter->fib6_nh.nh_dev == dev)
3923 iter->fib6_nh.nh_flags |= nh_flags;
1de178ed
IS
3924}
3925
a1a22c12 3926/* called with write lock held for table with rt */
4c981e28 3927static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
1da177e4 3928{
4c981e28
IS
3929 const struct arg_netdev_event *arg = p_arg;
3930 const struct net_device *dev = arg->dev;
7aef6859 3931 struct net *net = dev_net(dev);
8ed67789 3932
421842ed 3933 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
3934 return 0;
3935
3936 switch (arg->event) {
3937 case NETDEV_UNREGISTER:
5e670d84 3938 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
27c6fa73 3939 case NETDEV_DOWN:
1de178ed 3940 if (rt->should_flush)
27c6fa73 3941 return -1;
1de178ed 3942 if (!rt->rt6i_nsiblings)
5e670d84 3943 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
1de178ed
IS
3944 if (rt6_multipath_uses_dev(rt, dev)) {
3945 unsigned int count;
3946
3947 count = rt6_multipath_dead_count(rt, dev);
3948 if (rt->rt6i_nsiblings + 1 == count) {
3949 rt6_multipath_flush(rt);
3950 return -1;
3951 }
3952 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3953 RTNH_F_LINKDOWN);
7aef6859 3954 fib6_update_sernum(net, rt);
d7dedee1 3955 rt6_multipath_rebalance(rt);
1de178ed
IS
3956 }
3957 return -2;
27c6fa73 3958 case NETDEV_CHANGE:
5e670d84 3959 if (rt->fib6_nh.nh_dev != dev ||
1de178ed 3960 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 3961 break;
5e670d84 3962 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 3963 rt6_multipath_rebalance(rt);
27c6fa73 3964 break;
2b241361 3965 }
c159d30c 3966
1da177e4
LT
3967 return 0;
3968}
3969
27c6fa73 3970void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 3971{
4c981e28 3972 struct arg_netdev_event arg = {
8ed67789 3973 .dev = dev,
6802f3ad
IS
3974 {
3975 .event = event,
3976 },
8ed67789
DL
3977 };
3978
4c981e28
IS
3979 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
3980}
3981
3982void rt6_disable_ip(struct net_device *dev, unsigned long event)
3983{
3984 rt6_sync_down_dev(dev, event);
3985 rt6_uncached_list_flush_dev(dev_net(dev), dev);
3986 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
3987}
3988
95c96174 3989struct rt6_mtu_change_arg {
1da177e4 3990 struct net_device *dev;
95c96174 3991 unsigned int mtu;
1da177e4
LT
3992};
3993
3994static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3995{
3996 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3997 struct inet6_dev *idev;
3998
3999 /* In IPv6 pmtu discovery is not optional,
4000 so that RTAX_MTU lock cannot disable it.
4001 We still use this lock to block changes
4002 caused by addrconf/ndisc.
4003 */
4004
4005 idev = __in6_dev_get(arg->dev);
38308473 4006 if (!idev)
1da177e4
LT
4007 return 0;
4008
4009 /* For administrative MTU increase, there is no way to discover
4010 IPv6 PMTU increase, so PMTU increase should be updated here.
4011 Since RFC 1981 doesn't include administrative MTU increase
4012 update PMTU increase is a MUST. (i.e. jumbo frame)
4013 */
5e670d84 4014 if (rt->fib6_nh.nh_dev == arg->dev &&
d4ead6b3
DA
4015 !fib6_metric_locked(rt, RTAX_MTU)) {
4016 u32 mtu = rt->fib6_pmtu;
4017
4018 if (mtu >= arg->mtu ||
4019 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4020 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4021
f5bbe7ee 4022 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4023 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4024 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4025 }
1da177e4
LT
4026 return 0;
4027}
4028
95c96174 4029void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4030{
c71099ac
TG
4031 struct rt6_mtu_change_arg arg = {
4032 .dev = dev,
4033 .mtu = mtu,
4034 };
1da177e4 4035
0c3584d5 4036 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4037}
4038
ef7c79ed 4039static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4040 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 4041 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4042 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4043 [RTA_PRIORITY] = { .type = NLA_U32 },
4044 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4045 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4046 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4047 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4048 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4049 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4050 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4051 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
4052};
4053
4054static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4055 struct fib6_config *cfg,
4056 struct netlink_ext_ack *extack)
1da177e4 4057{
86872cb5
TG
4058 struct rtmsg *rtm;
4059 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4060 unsigned int pref;
86872cb5 4061 int err;
1da177e4 4062
fceb6435
JB
4063 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4064 NULL);
86872cb5
TG
4065 if (err < 0)
4066 goto errout;
1da177e4 4067
86872cb5
TG
4068 err = -EINVAL;
4069 rtm = nlmsg_data(nlh);
4070 memset(cfg, 0, sizeof(*cfg));
4071
4072 cfg->fc_table = rtm->rtm_table;
4073 cfg->fc_dst_len = rtm->rtm_dst_len;
4074 cfg->fc_src_len = rtm->rtm_src_len;
4075 cfg->fc_flags = RTF_UP;
4076 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 4077 cfg->fc_type = rtm->rtm_type;
86872cb5 4078
ef2c7d7b
ND
4079 if (rtm->rtm_type == RTN_UNREACHABLE ||
4080 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4081 rtm->rtm_type == RTN_PROHIBIT ||
4082 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4083 cfg->fc_flags |= RTF_REJECT;
4084
ab79ad14
4085 if (rtm->rtm_type == RTN_LOCAL)
4086 cfg->fc_flags |= RTF_LOCAL;
4087
1f56a01f
MKL
4088 if (rtm->rtm_flags & RTM_F_CLONED)
4089 cfg->fc_flags |= RTF_CACHE;
4090
fc1e64e1
DA
4091 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4092
15e47304 4093 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 4094 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 4095 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
4096
4097 if (tb[RTA_GATEWAY]) {
67b61f6c 4098 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4099 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4100 }
86872cb5
TG
4101
4102 if (tb[RTA_DST]) {
4103 int plen = (rtm->rtm_dst_len + 7) >> 3;
4104
4105 if (nla_len(tb[RTA_DST]) < plen)
4106 goto errout;
4107
4108 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4109 }
86872cb5
TG
4110
4111 if (tb[RTA_SRC]) {
4112 int plen = (rtm->rtm_src_len + 7) >> 3;
4113
4114 if (nla_len(tb[RTA_SRC]) < plen)
4115 goto errout;
4116
4117 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4118 }
86872cb5 4119
c3968a85 4120 if (tb[RTA_PREFSRC])
67b61f6c 4121 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4122
86872cb5
TG
4123 if (tb[RTA_OIF])
4124 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4125
4126 if (tb[RTA_PRIORITY])
4127 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4128
4129 if (tb[RTA_METRICS]) {
4130 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4131 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4132 }
86872cb5
TG
4133
4134 if (tb[RTA_TABLE])
4135 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4136
51ebd318
ND
4137 if (tb[RTA_MULTIPATH]) {
4138 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4139 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4140
4141 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4142 cfg->fc_mp_len, extack);
9ed59592
DA
4143 if (err < 0)
4144 goto errout;
51ebd318
ND
4145 }
4146
c78ba6d6
LR
4147 if (tb[RTA_PREF]) {
4148 pref = nla_get_u8(tb[RTA_PREF]);
4149 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4150 pref != ICMPV6_ROUTER_PREF_HIGH)
4151 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4152 cfg->fc_flags |= RTF_PREF(pref);
4153 }
4154
19e42e45
RP
4155 if (tb[RTA_ENCAP])
4156 cfg->fc_encap = tb[RTA_ENCAP];
4157
9ed59592 4158 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4159 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4160
c255bd68 4161 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4162 if (err < 0)
4163 goto errout;
4164 }
4165
32bc201e
XL
4166 if (tb[RTA_EXPIRES]) {
4167 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4168
4169 if (addrconf_finite_timeout(timeout)) {
4170 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4171 cfg->fc_flags |= RTF_EXPIRES;
4172 }
4173 }
4174
86872cb5
TG
4175 err = 0;
4176errout:
4177 return err;
1da177e4
LT
4178}
4179
6b9ea5a6
RP
4180struct rt6_nh {
4181 struct rt6_info *rt6_info;
4182 struct fib6_config r_cfg;
6b9ea5a6
RP
4183 struct list_head next;
4184};
4185
4186static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4187{
4188 struct rt6_nh *nh;
4189
4190 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4191 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4192 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4193 nh->r_cfg.fc_ifindex);
4194 }
4195}
4196
d4ead6b3
DA
4197static int ip6_route_info_append(struct net *net,
4198 struct list_head *rt6_nh_list,
6b9ea5a6
RP
4199 struct rt6_info *rt, struct fib6_config *r_cfg)
4200{
4201 struct rt6_nh *nh;
6b9ea5a6
RP
4202 int err = -EEXIST;
4203
4204 list_for_each_entry(nh, rt6_nh_list, next) {
4205 /* check if rt6_info already exists */
f06b7549 4206 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
6b9ea5a6
RP
4207 return err;
4208 }
4209
4210 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4211 if (!nh)
4212 return -ENOMEM;
4213 nh->rt6_info = rt;
d4ead6b3 4214 err = ip6_convert_metrics(net, rt, r_cfg);
6b9ea5a6
RP
4215 if (err) {
4216 kfree(nh);
4217 return err;
4218 }
4219 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4220 list_add_tail(&nh->next, rt6_nh_list);
4221
4222 return 0;
4223}
4224
3b1137fe
DA
4225static void ip6_route_mpath_notify(struct rt6_info *rt,
4226 struct rt6_info *rt_last,
4227 struct nl_info *info,
4228 __u16 nlflags)
4229{
4230 /* if this is an APPEND route, then rt points to the first route
4231 * inserted and rt_last points to last route inserted. Userspace
4232 * wants a consistent dump of the route which starts at the first
4233 * nexthop. Since sibling routes are always added at the end of
4234 * the list, find the first sibling of the last route appended
4235 */
4236 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
4237 rt = list_first_entry(&rt_last->rt6i_siblings,
4238 struct rt6_info,
4239 rt6i_siblings);
4240 }
4241
4242 if (rt)
4243 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4244}
4245
333c4301
DA
4246static int ip6_route_multipath_add(struct fib6_config *cfg,
4247 struct netlink_ext_ack *extack)
51ebd318 4248{
3b1137fe
DA
4249 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
4250 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4251 struct fib6_config r_cfg;
4252 struct rtnexthop *rtnh;
6b9ea5a6
RP
4253 struct rt6_info *rt;
4254 struct rt6_nh *err_nh;
4255 struct rt6_nh *nh, *nh_safe;
3b1137fe 4256 __u16 nlflags;
51ebd318
ND
4257 int remaining;
4258 int attrlen;
6b9ea5a6
RP
4259 int err = 1;
4260 int nhn = 0;
4261 int replace = (cfg->fc_nlinfo.nlh &&
4262 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4263 LIST_HEAD(rt6_nh_list);
51ebd318 4264
3b1137fe
DA
4265 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4266 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4267 nlflags |= NLM_F_APPEND;
4268
35f1b4e9 4269 remaining = cfg->fc_mp_len;
51ebd318 4270 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4271
6b9ea5a6
RP
4272 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4273 * rt6_info structs per nexthop
4274 */
51ebd318
ND
4275 while (rtnh_ok(rtnh, remaining)) {
4276 memcpy(&r_cfg, cfg, sizeof(*cfg));
4277 if (rtnh->rtnh_ifindex)
4278 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4279
4280 attrlen = rtnh_attrlen(rtnh);
4281 if (attrlen > 0) {
4282 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4283
4284 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4285 if (nla) {
67b61f6c 4286 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4287 r_cfg.fc_flags |= RTF_GATEWAY;
4288 }
19e42e45
RP
4289 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4290 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4291 if (nla)
4292 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4293 }
6b9ea5a6 4294
68e2ffde 4295 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
333c4301 4296 rt = ip6_route_info_create(&r_cfg, extack);
8c5b83f0
RP
4297 if (IS_ERR(rt)) {
4298 err = PTR_ERR(rt);
4299 rt = NULL;
6b9ea5a6 4300 goto cleanup;
8c5b83f0 4301 }
6b9ea5a6 4302
5e670d84 4303 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
398958ae 4304
d4ead6b3
DA
4305 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4306 rt, &r_cfg);
51ebd318 4307 if (err) {
587fea74 4308 dst_release_immediate(&rt->dst);
6b9ea5a6
RP
4309 goto cleanup;
4310 }
4311
4312 rtnh = rtnh_next(rtnh, &remaining);
4313 }
4314
3b1137fe
DA
4315 /* for add and replace send one notification with all nexthops.
4316 * Skip the notification in fib6_add_rt2node and send one with
4317 * the full route when done
4318 */
4319 info->skip_notify = 1;
4320
6b9ea5a6
RP
4321 err_nh = NULL;
4322 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe 4323 rt_last = nh->rt6_info;
d4ead6b3 4324 err = __ip6_ins_rt(nh->rt6_info, info, extack);
3b1137fe
DA
4325 /* save reference to first route for notification */
4326 if (!rt_notif && !err)
4327 rt_notif = nh->rt6_info;
4328
6b9ea5a6
RP
4329 /* nh->rt6_info is used or freed at this point, reset to NULL*/
4330 nh->rt6_info = NULL;
4331 if (err) {
4332 if (replace && nhn)
4333 ip6_print_replace_route_err(&rt6_nh_list);
4334 err_nh = nh;
4335 goto add_errout;
51ebd318 4336 }
6b9ea5a6 4337
1a72418b 4338 /* Because each route is added like a single route we remove
27596472
MK
4339 * these flags after the first nexthop: if there is a collision,
4340 * we have already failed to add the first nexthop:
4341 * fib6_add_rt2node() has rejected it; when replacing, old
4342 * nexthops have been replaced by first new, the rest should
4343 * be added to it.
1a72418b 4344 */
27596472
MK
4345 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4346 NLM_F_REPLACE);
6b9ea5a6
RP
4347 nhn++;
4348 }
4349
3b1137fe
DA
4350 /* success ... tell user about new route */
4351 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4352 goto cleanup;
4353
4354add_errout:
3b1137fe
DA
4355 /* send notification for routes that were added so that
4356 * the delete notifications sent by ip6_route_del are
4357 * coherent
4358 */
4359 if (rt_notif)
4360 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4361
6b9ea5a6
RP
4362 /* Delete routes that were already added */
4363 list_for_each_entry(nh, &rt6_nh_list, next) {
4364 if (err_nh == nh)
4365 break;
333c4301 4366 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4367 }
4368
4369cleanup:
4370 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
587fea74
WW
4371 if (nh->rt6_info)
4372 dst_release_immediate(&nh->rt6_info->dst);
6b9ea5a6
RP
4373 list_del(&nh->next);
4374 kfree(nh);
4375 }
4376
4377 return err;
4378}
4379
333c4301
DA
4380static int ip6_route_multipath_del(struct fib6_config *cfg,
4381 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4382{
4383 struct fib6_config r_cfg;
4384 struct rtnexthop *rtnh;
4385 int remaining;
4386 int attrlen;
4387 int err = 1, last_err = 0;
4388
4389 remaining = cfg->fc_mp_len;
4390 rtnh = (struct rtnexthop *)cfg->fc_mp;
4391
4392 /* Parse a Multipath Entry */
4393 while (rtnh_ok(rtnh, remaining)) {
4394 memcpy(&r_cfg, cfg, sizeof(*cfg));
4395 if (rtnh->rtnh_ifindex)
4396 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4397
4398 attrlen = rtnh_attrlen(rtnh);
4399 if (attrlen > 0) {
4400 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4401
4402 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4403 if (nla) {
4404 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4405 r_cfg.fc_flags |= RTF_GATEWAY;
4406 }
4407 }
333c4301 4408 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4409 if (err)
4410 last_err = err;
4411
51ebd318
ND
4412 rtnh = rtnh_next(rtnh, &remaining);
4413 }
4414
4415 return last_err;
4416}
4417
c21ef3e3
DA
4418static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4419 struct netlink_ext_ack *extack)
1da177e4 4420{
86872cb5
TG
4421 struct fib6_config cfg;
4422 int err;
1da177e4 4423
333c4301 4424 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4425 if (err < 0)
4426 return err;
4427
51ebd318 4428 if (cfg.fc_mp)
333c4301 4429 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4430 else {
4431 cfg.fc_delete_all_nh = 1;
333c4301 4432 return ip6_route_del(&cfg, extack);
0ae81335 4433 }
1da177e4
LT
4434}
4435
c21ef3e3
DA
4436static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4437 struct netlink_ext_ack *extack)
1da177e4 4438{
86872cb5
TG
4439 struct fib6_config cfg;
4440 int err;
1da177e4 4441
333c4301 4442 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4443 if (err < 0)
4444 return err;
4445
51ebd318 4446 if (cfg.fc_mp)
333c4301 4447 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4448 else
333c4301 4449 return ip6_route_add(&cfg, extack);
1da177e4
LT
4450}
4451
beb1afac 4452static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 4453{
beb1afac
DA
4454 int nexthop_len = 0;
4455
4456 if (rt->rt6i_nsiblings) {
4457 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4458 + NLA_ALIGN(sizeof(struct rtnexthop))
4459 + nla_total_size(16) /* RTA_GATEWAY */
5e670d84 4460 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
beb1afac
DA
4461
4462 nexthop_len *= rt->rt6i_nsiblings;
4463 }
4464
339bf98f
TG
4465 return NLMSG_ALIGN(sizeof(struct rtmsg))
4466 + nla_total_size(16) /* RTA_SRC */
4467 + nla_total_size(16) /* RTA_DST */
4468 + nla_total_size(16) /* RTA_GATEWAY */
4469 + nla_total_size(16) /* RTA_PREFSRC */
4470 + nla_total_size(4) /* RTA_TABLE */
4471 + nla_total_size(4) /* RTA_IIF */
4472 + nla_total_size(4) /* RTA_OIF */
4473 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4474 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4475 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4476 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4477 + nla_total_size(1) /* RTA_PREF */
5e670d84 4478 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
beb1afac
DA
4479 + nexthop_len;
4480}
4481
4482static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 4483 unsigned int *flags, bool skip_oif)
beb1afac 4484{
5e670d84 4485 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
f9d882ea
IS
4486 *flags |= RTNH_F_DEAD;
4487
5e670d84 4488 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
beb1afac
DA
4489 *flags |= RTNH_F_LINKDOWN;
4490 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
4491 *flags |= RTNH_F_DEAD;
4492 }
4493
4494 if (rt->rt6i_flags & RTF_GATEWAY) {
5e670d84 4495 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
beb1afac
DA
4496 goto nla_put_failure;
4497 }
4498
5e670d84
DA
4499 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4500 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4501 *flags |= RTNH_F_OFFLOAD;
4502
5be083ce 4503 /* not needed for multipath encoding b/c it has a rtnexthop struct */
5e670d84
DA
4504 if (!skip_oif && rt->fib6_nh.nh_dev &&
4505 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
beb1afac
DA
4506 goto nla_put_failure;
4507
5e670d84
DA
4508 if (rt->fib6_nh.nh_lwtstate &&
4509 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
beb1afac
DA
4510 goto nla_put_failure;
4511
4512 return 0;
4513
4514nla_put_failure:
4515 return -EMSGSIZE;
4516}
4517
5be083ce 4518/* add multipath next hop */
beb1afac
DA
4519static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4520{
5e670d84 4521 const struct net_device *dev = rt->fib6_nh.nh_dev;
beb1afac
DA
4522 struct rtnexthop *rtnh;
4523 unsigned int flags = 0;
4524
4525 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4526 if (!rtnh)
4527 goto nla_put_failure;
4528
5e670d84
DA
4529 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4530 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
beb1afac 4531
5be083ce 4532 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4533 goto nla_put_failure;
4534
4535 rtnh->rtnh_flags = flags;
4536
4537 /* length of rtnetlink header + attributes */
4538 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4539
4540 return 0;
4541
4542nla_put_failure:
4543 return -EMSGSIZE;
339bf98f
TG
4544}
4545
d4ead6b3
DA
4546static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4547 struct rt6_info *rt, struct dst_entry *dst,
4548 struct in6_addr *dest, struct in6_addr *src,
15e47304 4549 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4550 unsigned int flags)
1da177e4
LT
4551{
4552 struct rtmsg *rtm;
2d7202bf 4553 struct nlmsghdr *nlh;
d4ead6b3
DA
4554 long expires = 0;
4555 u32 *pmetrics;
9e762a4a 4556 u32 table;
1da177e4 4557
15e47304 4558 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4559 if (!nlh)
26932566 4560 return -EMSGSIZE;
2d7202bf
TG
4561
4562 rtm = nlmsg_data(nlh);
1da177e4
LT
4563 rtm->rtm_family = AF_INET6;
4564 rtm->rtm_dst_len = rt->rt6i_dst.plen;
4565 rtm->rtm_src_len = rt->rt6i_src.plen;
4566 rtm->rtm_tos = 0;
c71099ac 4567 if (rt->rt6i_table)
9e762a4a 4568 table = rt->rt6i_table->tb6_id;
c71099ac 4569 else
9e762a4a
PM
4570 table = RT6_TABLE_UNSPEC;
4571 rtm->rtm_table = table;
c78679e8
DM
4572 if (nla_put_u32(skb, RTA_TABLE, table))
4573 goto nla_put_failure;
e8478e80
DA
4574
4575 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4576 rtm->rtm_flags = 0;
4577 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4578 rtm->rtm_protocol = rt->rt6i_protocol;
1da177e4 4579
38308473 4580 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
4581 rtm->rtm_flags |= RTM_F_CLONED;
4582
d4ead6b3
DA
4583 if (dest) {
4584 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4585 goto nla_put_failure;
1ab1457c 4586 rtm->rtm_dst_len = 128;
1da177e4 4587 } else if (rtm->rtm_dst_len)
930345ea 4588 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 4589 goto nla_put_failure;
1da177e4
LT
4590#ifdef CONFIG_IPV6_SUBTREES
4591 if (src) {
930345ea 4592 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4593 goto nla_put_failure;
1ab1457c 4594 rtm->rtm_src_len = 128;
c78679e8 4595 } else if (rtm->rtm_src_len &&
930345ea 4596 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 4597 goto nla_put_failure;
1da177e4 4598#endif
7bc570c8
YH
4599 if (iif) {
4600#ifdef CONFIG_IPV6_MROUTE
4601 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
4602 int err = ip6mr_get_route(net, skb, rtm, portid);
4603
4604 if (err == 0)
4605 return 0;
4606 if (err < 0)
4607 goto nla_put_failure;
7bc570c8
YH
4608 } else
4609#endif
c78679e8
DM
4610 if (nla_put_u32(skb, RTA_IIF, iif))
4611 goto nla_put_failure;
d4ead6b3 4612 } else if (dest) {
1da177e4 4613 struct in6_addr saddr_buf;
d4ead6b3 4614 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4615 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4616 goto nla_put_failure;
1da177e4 4617 }
2d7202bf 4618
c3968a85
DW
4619 if (rt->rt6i_prefsrc.plen) {
4620 struct in6_addr saddr_buf;
4e3fd7a0 4621 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 4622 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4623 goto nla_put_failure;
c3968a85
DW
4624 }
4625
d4ead6b3
DA
4626 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4627 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4628 goto nla_put_failure;
4629
c78679e8
DM
4630 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4631 goto nla_put_failure;
8253947e 4632
beb1afac
DA
4633 /* For multipath routes, walk the siblings list and add
4634 * each as a nexthop within RTA_MULTIPATH.
4635 */
4636 if (rt->rt6i_nsiblings) {
4637 struct rt6_info *sibling, *next_sibling;
4638 struct nlattr *mp;
4639
4640 mp = nla_nest_start(skb, RTA_MULTIPATH);
4641 if (!mp)
4642 goto nla_put_failure;
4643
4644 if (rt6_add_nexthop(skb, rt) < 0)
4645 goto nla_put_failure;
4646
4647 list_for_each_entry_safe(sibling, next_sibling,
4648 &rt->rt6i_siblings, rt6i_siblings) {
4649 if (rt6_add_nexthop(skb, sibling) < 0)
4650 goto nla_put_failure;
4651 }
4652
4653 nla_nest_end(skb, mp);
4654 } else {
5be083ce 4655 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4656 goto nla_put_failure;
4657 }
4658
14895687
DA
4659 if (rt->rt6i_flags & RTF_EXPIRES) {
4660 expires = dst ? dst->expires : rt->expires;
4661 expires -= jiffies;
4662 }
69cdf8f9 4663
d4ead6b3 4664 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4665 goto nla_put_failure;
2d7202bf 4666
c78ba6d6
LR
4667 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4668 goto nla_put_failure;
4669
19e42e45 4670
053c095a
JB
4671 nlmsg_end(skb, nlh);
4672 return 0;
2d7202bf
TG
4673
4674nla_put_failure:
26932566
PM
4675 nlmsg_cancel(skb, nlh);
4676 return -EMSGSIZE;
1da177e4
LT
4677}
4678
1b43af54 4679int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
4680{
4681 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4682 struct net *net = arg->net;
4683
421842ed 4684 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4685 return 0;
1da177e4 4686
2d7202bf
TG
4687 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4688 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4689
4690 /* user wants prefix routes only */
4691 if (rtm->rtm_flags & RTM_F_PREFIX &&
4692 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4693 /* success since this is not a prefix route */
4694 return 1;
4695 }
4696 }
1da177e4 4697
d4ead6b3
DA
4698 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4699 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4700 arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
1da177e4
LT
4701}
4702
c21ef3e3
DA
4703static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4704 struct netlink_ext_ack *extack)
1da177e4 4705{
3b1e0a65 4706 struct net *net = sock_net(in_skb->sk);
ab364a6f 4707 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
4708 int err, iif = 0, oif = 0;
4709 struct dst_entry *dst;
ab364a6f 4710 struct rt6_info *rt;
1da177e4 4711 struct sk_buff *skb;
ab364a6f 4712 struct rtmsg *rtm;
4c9483b2 4713 struct flowi6 fl6;
18c3a61c 4714 bool fibmatch;
1da177e4 4715
fceb6435 4716 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4717 extack);
ab364a6f
TG
4718 if (err < 0)
4719 goto errout;
1da177e4 4720
ab364a6f 4721 err = -EINVAL;
4c9483b2 4722 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4723 rtm = nlmsg_data(nlh);
4724 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4725 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4726
ab364a6f
TG
4727 if (tb[RTA_SRC]) {
4728 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4729 goto errout;
4730
4e3fd7a0 4731 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4732 }
4733
4734 if (tb[RTA_DST]) {
4735 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4736 goto errout;
4737
4e3fd7a0 4738 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4739 }
4740
4741 if (tb[RTA_IIF])
4742 iif = nla_get_u32(tb[RTA_IIF]);
4743
4744 if (tb[RTA_OIF])
72331bc0 4745 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4746
2e47b291
LC
4747 if (tb[RTA_MARK])
4748 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4749
622ec2c9
LC
4750 if (tb[RTA_UID])
4751 fl6.flowi6_uid = make_kuid(current_user_ns(),
4752 nla_get_u32(tb[RTA_UID]));
4753 else
4754 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4755
1da177e4
LT
4756 if (iif) {
4757 struct net_device *dev;
72331bc0
SL
4758 int flags = 0;
4759
121622db
FW
4760 rcu_read_lock();
4761
4762 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4763 if (!dev) {
121622db 4764 rcu_read_unlock();
1da177e4 4765 err = -ENODEV;
ab364a6f 4766 goto errout;
1da177e4 4767 }
72331bc0
SL
4768
4769 fl6.flowi6_iif = iif;
4770
4771 if (!ipv6_addr_any(&fl6.saddr))
4772 flags |= RT6_LOOKUP_F_HAS_SADDR;
4773
b75cc8f9 4774 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4775
4776 rcu_read_unlock();
72331bc0
SL
4777 } else {
4778 fl6.flowi6_oif = oif;
4779
58acfd71 4780 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4781 }
4782
18c3a61c
RP
4783
4784 rt = container_of(dst, struct rt6_info, dst);
4785 if (rt->dst.error) {
4786 err = rt->dst.error;
4787 ip6_rt_put(rt);
4788 goto errout;
1da177e4
LT
4789 }
4790
9d6acb3b
WC
4791 if (rt == net->ipv6.ip6_null_entry) {
4792 err = rt->dst.error;
4793 ip6_rt_put(rt);
4794 goto errout;
4795 }
4796
fba961ab
DM
4797 if (fibmatch && rt->from) {
4798 struct rt6_info *ort = rt->from;
58acfd71
IS
4799
4800 dst_hold(&ort->dst);
4801 ip6_rt_put(rt);
4802 rt = ort;
4803 }
4804
ab364a6f 4805 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4806 if (!skb) {
94e187c0 4807 ip6_rt_put(rt);
ab364a6f
TG
4808 err = -ENOBUFS;
4809 goto errout;
4810 }
1da177e4 4811
d8d1f30b 4812 skb_dst_set(skb, &rt->dst);
18c3a61c 4813 if (fibmatch)
d4ead6b3 4814 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, iif,
18c3a61c
RP
4815 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4816 nlh->nlmsg_seq, 0);
4817 else
d4ead6b3
DA
4818 err = rt6_fill_node(net, skb, rt, dst, &fl6.daddr, &fl6.saddr,
4819 iif, RTM_NEWROUTE,
4820 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4821 0);
1da177e4 4822 if (err < 0) {
ab364a6f
TG
4823 kfree_skb(skb);
4824 goto errout;
1da177e4
LT
4825 }
4826
15e47304 4827 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4828errout:
1da177e4 4829 return err;
1da177e4
LT
4830}
4831
37a1d361
RP
4832void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4833 unsigned int nlm_flags)
1da177e4
LT
4834{
4835 struct sk_buff *skb;
5578689a 4836 struct net *net = info->nl_net;
528c4ceb
DL
4837 u32 seq;
4838 int err;
4839
4840 err = -ENOBUFS;
38308473 4841 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4842
19e42e45 4843 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4844 if (!skb)
21713ebc
TG
4845 goto errout;
4846
d4ead6b3
DA
4847 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4848 event, info->portid, seq, nlm_flags);
26932566
PM
4849 if (err < 0) {
4850 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4851 WARN_ON(err == -EMSGSIZE);
4852 kfree_skb(skb);
4853 goto errout;
4854 }
15e47304 4855 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4856 info->nlh, gfp_any());
4857 return;
21713ebc
TG
4858errout:
4859 if (err < 0)
5578689a 4860 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4861}
4862
8ed67789 4863static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4864 unsigned long event, void *ptr)
8ed67789 4865{
351638e7 4866 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4867 struct net *net = dev_net(dev);
8ed67789 4868
242d3a49
WC
4869 if (!(dev->flags & IFF_LOOPBACK))
4870 return NOTIFY_OK;
4871
4872 if (event == NETDEV_REGISTER) {
421842ed
DA
4873 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
4874 net->ipv6.fib6_null_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4875 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4876 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4877#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4878 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4879 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4880 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4881 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4882#endif
76da0704
WC
4883 } else if (event == NETDEV_UNREGISTER &&
4884 dev->reg_state != NETREG_UNREGISTERED) {
4885 /* NETDEV_UNREGISTER could be fired for multiple times by
4886 * netdev_wait_allrefs(). Make sure we only call this once.
4887 */
421842ed 4888 in6_dev_put_clear(&net->ipv6.fib6_null_entry->rt6i_idev);
12d94a80 4889 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4890#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4891 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4892 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4893#endif
4894 }
4895
4896 return NOTIFY_OK;
4897}
4898
1da177e4
LT
4899/*
4900 * /proc
4901 */
4902
4903#ifdef CONFIG_PROC_FS
4904
33120b30 4905static const struct file_operations ipv6_route_proc_fops = {
33120b30
AD
4906 .open = ipv6_route_open,
4907 .read = seq_read,
4908 .llseek = seq_lseek,
8d2ca1d7 4909 .release = seq_release_net,
33120b30
AD
4910};
4911
1da177e4
LT
4912static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4913{
69ddb805 4914 struct net *net = (struct net *)seq->private;
1da177e4 4915 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4916 net->ipv6.rt6_stats->fib_nodes,
4917 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 4918 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
4919 net->ipv6.rt6_stats->fib_rt_entries,
4920 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4921 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4922 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4923
4924 return 0;
4925}
4926
4927static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4928{
de05c557 4929 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4930}
4931
9a32144e 4932static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4933 .open = rt6_stats_seq_open,
4934 .read = seq_read,
4935 .llseek = seq_lseek,
b6fcbdb4 4936 .release = single_release_net,
1da177e4
LT
4937};
4938#endif /* CONFIG_PROC_FS */
4939
4940#ifdef CONFIG_SYSCTL
4941
1da177e4 4942static
fe2c6338 4943int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4944 void __user *buffer, size_t *lenp, loff_t *ppos)
4945{
c486da34
LAG
4946 struct net *net;
4947 int delay;
4948 if (!write)
1da177e4 4949 return -EINVAL;
c486da34
LAG
4950
4951 net = (struct net *)ctl->extra1;
4952 delay = net->ipv6.sysctl.flush_delay;
4953 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4954 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4955 return 0;
1da177e4
LT
4956}
4957
fe2c6338 4958struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4959 {
1da177e4 4960 .procname = "flush",
4990509f 4961 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4962 .maxlen = sizeof(int),
89c8b3a1 4963 .mode = 0200,
6d9f239a 4964 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4965 },
4966 {
1da177e4 4967 .procname = "gc_thresh",
9a7ec3a9 4968 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4969 .maxlen = sizeof(int),
4970 .mode = 0644,
6d9f239a 4971 .proc_handler = proc_dointvec,
1da177e4
LT
4972 },
4973 {
1da177e4 4974 .procname = "max_size",
4990509f 4975 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
4976 .maxlen = sizeof(int),
4977 .mode = 0644,
6d9f239a 4978 .proc_handler = proc_dointvec,
1da177e4
LT
4979 },
4980 {
1da177e4 4981 .procname = "gc_min_interval",
4990509f 4982 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4983 .maxlen = sizeof(int),
4984 .mode = 0644,
6d9f239a 4985 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4986 },
4987 {
1da177e4 4988 .procname = "gc_timeout",
4990509f 4989 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
4990 .maxlen = sizeof(int),
4991 .mode = 0644,
6d9f239a 4992 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4993 },
4994 {
1da177e4 4995 .procname = "gc_interval",
4990509f 4996 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
4997 .maxlen = sizeof(int),
4998 .mode = 0644,
6d9f239a 4999 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5000 },
5001 {
1da177e4 5002 .procname = "gc_elasticity",
4990509f 5003 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5004 .maxlen = sizeof(int),
5005 .mode = 0644,
f3d3f616 5006 .proc_handler = proc_dointvec,
1da177e4
LT
5007 },
5008 {
1da177e4 5009 .procname = "mtu_expires",
4990509f 5010 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5011 .maxlen = sizeof(int),
5012 .mode = 0644,
6d9f239a 5013 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5014 },
5015 {
1da177e4 5016 .procname = "min_adv_mss",
4990509f 5017 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5018 .maxlen = sizeof(int),
5019 .mode = 0644,
f3d3f616 5020 .proc_handler = proc_dointvec,
1da177e4
LT
5021 },
5022 {
1da177e4 5023 .procname = "gc_min_interval_ms",
4990509f 5024 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5025 .maxlen = sizeof(int),
5026 .mode = 0644,
6d9f239a 5027 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5028 },
f8572d8f 5029 { }
1da177e4
LT
5030};
5031
2c8c1e72 5032struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5033{
5034 struct ctl_table *table;
5035
5036 table = kmemdup(ipv6_route_table_template,
5037 sizeof(ipv6_route_table_template),
5038 GFP_KERNEL);
5ee09105
YH
5039
5040 if (table) {
5041 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5042 table[0].extra1 = net;
86393e52 5043 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5044 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5045 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5046 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5047 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5048 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5049 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5050 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5051 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
5052
5053 /* Don't export sysctls to unprivileged users */
5054 if (net->user_ns != &init_user_ns)
5055 table[0].procname = NULL;
5ee09105
YH
5056 }
5057
760f2d01
DL
5058 return table;
5059}
1da177e4
LT
5060#endif
5061
2c8c1e72 5062static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5063{
633d424b 5064 int ret = -ENOMEM;
8ed67789 5065
86393e52
AD
5066 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5067 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5068
fc66f95c
ED
5069 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5070 goto out_ip6_dst_ops;
5071
421842ed
DA
5072 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5073 sizeof(*net->ipv6.fib6_null_entry),
5074 GFP_KERNEL);
5075 if (!net->ipv6.fib6_null_entry)
5076 goto out_ip6_dst_entries;
5077
8ed67789
DL
5078 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5079 sizeof(*net->ipv6.ip6_null_entry),
5080 GFP_KERNEL);
5081 if (!net->ipv6.ip6_null_entry)
421842ed 5082 goto out_fib6_null_entry;
d8d1f30b 5083 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5084 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5085 ip6_template_metrics, true);
8ed67789
DL
5086
5087#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5088 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5089 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5090 sizeof(*net->ipv6.ip6_prohibit_entry),
5091 GFP_KERNEL);
68fffc67
PZ
5092 if (!net->ipv6.ip6_prohibit_entry)
5093 goto out_ip6_null_entry;
d8d1f30b 5094 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5095 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5096 ip6_template_metrics, true);
8ed67789
DL
5097
5098 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5099 sizeof(*net->ipv6.ip6_blk_hole_entry),
5100 GFP_KERNEL);
68fffc67
PZ
5101 if (!net->ipv6.ip6_blk_hole_entry)
5102 goto out_ip6_prohibit_entry;
d8d1f30b 5103 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5104 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5105 ip6_template_metrics, true);
8ed67789
DL
5106#endif
5107
b339a47c
PZ
5108 net->ipv6.sysctl.flush_delay = 0;
5109 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5110 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5111 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5112 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5113 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5114 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5115 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5116
6891a346
BT
5117 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5118
8ed67789
DL
5119 ret = 0;
5120out:
5121 return ret;
f2fc6a54 5122
68fffc67
PZ
5123#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5124out_ip6_prohibit_entry:
5125 kfree(net->ipv6.ip6_prohibit_entry);
5126out_ip6_null_entry:
5127 kfree(net->ipv6.ip6_null_entry);
5128#endif
421842ed
DA
5129out_fib6_null_entry:
5130 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5131out_ip6_dst_entries:
5132 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5133out_ip6_dst_ops:
f2fc6a54 5134 goto out;
cdb18761
DL
5135}
5136
2c8c1e72 5137static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5138{
421842ed 5139 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5140 kfree(net->ipv6.ip6_null_entry);
5141#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5142 kfree(net->ipv6.ip6_prohibit_entry);
5143 kfree(net->ipv6.ip6_blk_hole_entry);
5144#endif
41bb78b4 5145 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5146}
5147
d189634e
TG
5148static int __net_init ip6_route_net_init_late(struct net *net)
5149{
5150#ifdef CONFIG_PROC_FS
d4beaa66 5151 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
d6444062 5152 proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
5153#endif
5154 return 0;
5155}
5156
5157static void __net_exit ip6_route_net_exit_late(struct net *net)
5158{
5159#ifdef CONFIG_PROC_FS
ece31ffd
G
5160 remove_proc_entry("ipv6_route", net->proc_net);
5161 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5162#endif
5163}
5164
cdb18761
DL
5165static struct pernet_operations ip6_route_net_ops = {
5166 .init = ip6_route_net_init,
5167 .exit = ip6_route_net_exit,
5168};
5169
c3426b47
DM
5170static int __net_init ipv6_inetpeer_init(struct net *net)
5171{
5172 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5173
5174 if (!bp)
5175 return -ENOMEM;
5176 inet_peer_base_init(bp);
5177 net->ipv6.peers = bp;
5178 return 0;
5179}
5180
5181static void __net_exit ipv6_inetpeer_exit(struct net *net)
5182{
5183 struct inet_peer_base *bp = net->ipv6.peers;
5184
5185 net->ipv6.peers = NULL;
56a6b248 5186 inetpeer_invalidate_tree(bp);
c3426b47
DM
5187 kfree(bp);
5188}
5189
2b823f72 5190static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5191 .init = ipv6_inetpeer_init,
5192 .exit = ipv6_inetpeer_exit,
5193};
5194
d189634e
TG
5195static struct pernet_operations ip6_route_net_late_ops = {
5196 .init = ip6_route_net_init_late,
5197 .exit = ip6_route_net_exit_late,
5198};
5199
8ed67789
DL
5200static struct notifier_block ip6_route_dev_notifier = {
5201 .notifier_call = ip6_route_dev_notify,
242d3a49 5202 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5203};
5204
2f460933
WC
5205void __init ip6_route_init_special_entries(void)
5206{
5207 /* Registering of the loopback is done before this portion of code,
5208 * the loopback reference in rt6_info will not be taken, do it
5209 * manually for init_net */
421842ed
DA
5210 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
5211 init_net.ipv6.fib6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2f460933
WC
5212 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5213 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5214 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5215 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5216 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5217 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5218 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5219 #endif
5220}
5221
433d49c3 5222int __init ip6_route_init(void)
1da177e4 5223{
433d49c3 5224 int ret;
8d0b94af 5225 int cpu;
433d49c3 5226
9a7ec3a9
DL
5227 ret = -ENOMEM;
5228 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5229 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5230 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5231 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5232 goto out;
14e50e57 5233
fc66f95c 5234 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5235 if (ret)
bdb3289f 5236 goto out_kmem_cache;
bdb3289f 5237
c3426b47
DM
5238 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5239 if (ret)
e8803b6c 5240 goto out_dst_entries;
2a0c451a 5241
7e52b33b
DM
5242 ret = register_pernet_subsys(&ip6_route_net_ops);
5243 if (ret)
5244 goto out_register_inetpeer;
c3426b47 5245
5dc121e9
AE
5246 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5247
e8803b6c 5248 ret = fib6_init();
433d49c3 5249 if (ret)
8ed67789 5250 goto out_register_subsys;
433d49c3 5251
433d49c3
DL
5252 ret = xfrm6_init();
5253 if (ret)
e8803b6c 5254 goto out_fib6_init;
c35b7e72 5255
433d49c3
DL
5256 ret = fib6_rules_init();
5257 if (ret)
5258 goto xfrm6_init;
7e5449c2 5259
d189634e
TG
5260 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5261 if (ret)
5262 goto fib6_rules_init;
5263
16feebcf
FW
5264 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5265 inet6_rtm_newroute, NULL, 0);
5266 if (ret < 0)
5267 goto out_register_late_subsys;
5268
5269 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5270 inet6_rtm_delroute, NULL, 0);
5271 if (ret < 0)
5272 goto out_register_late_subsys;
5273
5274 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5275 inet6_rtm_getroute, NULL,
5276 RTNL_FLAG_DOIT_UNLOCKED);
5277 if (ret < 0)
d189634e 5278 goto out_register_late_subsys;
c127ea2c 5279
8ed67789 5280 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5281 if (ret)
d189634e 5282 goto out_register_late_subsys;
8ed67789 5283
8d0b94af
MKL
5284 for_each_possible_cpu(cpu) {
5285 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5286
5287 INIT_LIST_HEAD(&ul->head);
5288 spin_lock_init(&ul->lock);
5289 }
5290
433d49c3
DL
5291out:
5292 return ret;
5293
d189634e 5294out_register_late_subsys:
16feebcf 5295 rtnl_unregister_all(PF_INET6);
d189634e 5296 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5297fib6_rules_init:
433d49c3
DL
5298 fib6_rules_cleanup();
5299xfrm6_init:
433d49c3 5300 xfrm6_fini();
2a0c451a
TG
5301out_fib6_init:
5302 fib6_gc_cleanup();
8ed67789
DL
5303out_register_subsys:
5304 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5305out_register_inetpeer:
5306 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5307out_dst_entries:
5308 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5309out_kmem_cache:
f2fc6a54 5310 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5311 goto out;
1da177e4
LT
5312}
5313
5314void ip6_route_cleanup(void)
5315{
8ed67789 5316 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5317 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5318 fib6_rules_cleanup();
1da177e4 5319 xfrm6_fini();
1da177e4 5320 fib6_gc_cleanup();
c3426b47 5321 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5322 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5323 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5324 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5325}