net/ipv6: introduce fib6_info struct and helpers
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
52bd4c0c 99static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3 100static size_t rt6_nlmsg_size(struct rt6_info *rt);
d4ead6b3
DA
101static int rt6_fill_node(struct net *net, struct sk_buff *skb,
102 struct rt6_info *rt, struct dst_entry *dst,
103 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
104 int iif, int type, u32 portid, u32 seq,
105 unsigned int flags);
35732d01
WW
106static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
107 struct in6_addr *daddr,
108 struct in6_addr *saddr);
1da177e4 109
70ceb4f5 110#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 111static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 112 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
113 const struct in6_addr *gwaddr,
114 struct net_device *dev,
95c96174 115 unsigned int pref);
efa2cea0 116static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 117 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
118 const struct in6_addr *gwaddr,
119 struct net_device *dev);
70ceb4f5
YH
120#endif
121
8d0b94af
MKL
122struct uncached_list {
123 spinlock_t lock;
124 struct list_head head;
125};
126
127static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
128
510c321b 129void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
130{
131 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
132
8d0b94af
MKL
133 rt->rt6i_uncached_list = ul;
134
135 spin_lock_bh(&ul->lock);
136 list_add_tail(&rt->rt6i_uncached, &ul->head);
137 spin_unlock_bh(&ul->lock);
138}
139
510c321b 140void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
141{
142 if (!list_empty(&rt->rt6i_uncached)) {
143 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 144 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
145
146 spin_lock_bh(&ul->lock);
147 list_del(&rt->rt6i_uncached);
81eb8447 148 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
149 spin_unlock_bh(&ul->lock);
150 }
151}
152
153static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154{
155 struct net_device *loopback_dev = net->loopback_dev;
156 int cpu;
157
e332bc67
EB
158 if (dev == loopback_dev)
159 return;
160
8d0b94af
MKL
161 for_each_possible_cpu(cpu) {
162 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 struct rt6_info *rt;
164
165 spin_lock_bh(&ul->lock);
166 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 struct inet6_dev *rt_idev = rt->rt6i_idev;
168 struct net_device *rt_dev = rt->dst.dev;
169
e332bc67 170 if (rt_idev->dev == dev) {
8d0b94af
MKL
171 rt->rt6i_idev = in6_dev_get(loopback_dev);
172 in6_dev_put(rt_idev);
173 }
174
e332bc67 175 if (rt_dev == dev) {
8d0b94af
MKL
176 rt->dst.dev = loopback_dev;
177 dev_hold(rt->dst.dev);
178 dev_put(rt_dev);
179 }
180 }
181 spin_unlock_bh(&ul->lock);
182 }
183}
184
f8a1b43b 185static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
186 struct sk_buff *skb,
187 const void *daddr)
39232973 188{
a7563f34 189 if (!ipv6_addr_any(p))
39232973 190 return (const void *) p;
f894cbf8
DM
191 else if (skb)
192 return &ipv6_hdr(skb)->daddr;
39232973
DM
193 return daddr;
194}
195
f8a1b43b
DA
196struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
197 struct net_device *dev,
198 struct sk_buff *skb,
199 const void *daddr)
d3aaeb38 200{
39232973
DM
201 struct neighbour *n;
202
f8a1b43b
DA
203 daddr = choose_neigh_daddr(gw, skb, daddr);
204 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
205 if (n)
206 return n;
f8a1b43b
DA
207 return neigh_create(&nd_tbl, daddr, dev);
208}
209
210static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
211 struct sk_buff *skb,
212 const void *daddr)
213{
214 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
215
216 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
217}
218
63fca65d
JA
219static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
220{
221 struct net_device *dev = dst->dev;
222 struct rt6_info *rt = (struct rt6_info *)dst;
223
f8a1b43b 224 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
225 if (!daddr)
226 return;
227 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
228 return;
229 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
230 return;
231 __ipv6_confirm_neigh(dev, daddr);
232}
233
9a7ec3a9 234static struct dst_ops ip6_dst_ops_template = {
1da177e4 235 .family = AF_INET6,
1da177e4
LT
236 .gc = ip6_dst_gc,
237 .gc_thresh = 1024,
238 .check = ip6_dst_check,
0dbaee3b 239 .default_advmss = ip6_default_advmss,
ebb762f2 240 .mtu = ip6_mtu,
d4ead6b3 241 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
242 .destroy = ip6_dst_destroy,
243 .ifdown = ip6_dst_ifdown,
244 .negative_advice = ip6_negative_advice,
245 .link_failure = ip6_link_failure,
246 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 247 .redirect = rt6_do_redirect,
9f8955cc 248 .local_out = __ip6_local_out,
f8a1b43b 249 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 250 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
251};
252
ebb762f2 253static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 254{
618f9bc7
SK
255 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
256
257 return mtu ? : dst->dev->mtu;
ec831ea7
RD
258}
259
6700c270
DM
260static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
261 struct sk_buff *skb, u32 mtu)
14e50e57
DM
262{
263}
264
6700c270
DM
265static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
266 struct sk_buff *skb)
b587ee3b
DM
267{
268}
269
14e50e57
DM
270static struct dst_ops ip6_dst_blackhole_ops = {
271 .family = AF_INET6,
14e50e57
DM
272 .destroy = ip6_dst_destroy,
273 .check = ip6_dst_check,
ebb762f2 274 .mtu = ip6_blackhole_mtu,
214f45c9 275 .default_advmss = ip6_default_advmss,
14e50e57 276 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 277 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 278 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 279 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
280};
281
62fa8a84 282static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 283 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
284};
285
421842ed
DA
286static const struct rt6_info fib6_null_entry_template = {
287 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
288 .rt6i_protocol = RTPROT_KERNEL,
289 .rt6i_metric = ~(u32)0,
290 .rt6i_ref = ATOMIC_INIT(1),
291 .fib6_type = RTN_UNREACHABLE,
292 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
293};
294
fb0af4c7 295static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
296 .dst = {
297 .__refcnt = ATOMIC_INIT(1),
298 .__use = 1,
2c20cbd7 299 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 300 .error = -ENETUNREACH,
d8d1f30b
CG
301 .input = ip6_pkt_discard,
302 .output = ip6_pkt_discard_out,
1da177e4
LT
303 },
304 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 305 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
306 .rt6i_metric = ~(u32) 0,
307 .rt6i_ref = ATOMIC_INIT(1),
e8478e80 308 .fib6_type = RTN_UNREACHABLE,
1da177e4
LT
309};
310
101367c2
TG
311#ifdef CONFIG_IPV6_MULTIPLE_TABLES
312
fb0af4c7 313static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
314 .dst = {
315 .__refcnt = ATOMIC_INIT(1),
316 .__use = 1,
2c20cbd7 317 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 318 .error = -EACCES,
d8d1f30b
CG
319 .input = ip6_pkt_prohibit,
320 .output = ip6_pkt_prohibit_out,
101367c2
TG
321 },
322 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 323 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
324 .rt6i_metric = ~(u32) 0,
325 .rt6i_ref = ATOMIC_INIT(1),
e8478e80 326 .fib6_type = RTN_PROHIBIT,
101367c2
TG
327};
328
fb0af4c7 329static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
330 .dst = {
331 .__refcnt = ATOMIC_INIT(1),
332 .__use = 1,
2c20cbd7 333 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 334 .error = -EINVAL,
d8d1f30b 335 .input = dst_discard,
ede2059d 336 .output = dst_discard_out,
101367c2
TG
337 },
338 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 339 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
340 .rt6i_metric = ~(u32) 0,
341 .rt6i_ref = ATOMIC_INIT(1),
e8478e80 342 .fib6_type = RTN_BLACKHOLE,
101367c2
TG
343};
344
345#endif
346
ebfa45f0
MKL
347static void rt6_info_init(struct rt6_info *rt)
348{
349 struct dst_entry *dst = &rt->dst;
350
351 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
352 INIT_LIST_HEAD(&rt->rt6i_siblings);
353 INIT_LIST_HEAD(&rt->rt6i_uncached);
d4ead6b3 354 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
ebfa45f0
MKL
355}
356
1da177e4 357/* allocate dst with ip6_dst_ops */
d52d3997
MKL
358static struct rt6_info *__ip6_dst_alloc(struct net *net,
359 struct net_device *dev,
ad706862 360 int flags)
1da177e4 361{
97bab73f 362 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 363 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 364
81eb8447 365 if (rt) {
ebfa45f0 366 rt6_info_init(rt);
81eb8447
WW
367 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
368 }
8104891b 369
cf911662 370 return rt;
1da177e4
LT
371}
372
9ab179d8
DA
373struct rt6_info *ip6_dst_alloc(struct net *net,
374 struct net_device *dev,
375 int flags)
d52d3997 376{
ad706862 377 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
378
379 if (rt) {
380 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
bfd8e5a4 381 if (!rt->rt6i_pcpu) {
587fea74 382 dst_release_immediate(&rt->dst);
d52d3997
MKL
383 return NULL;
384 }
385 }
386
387 return rt;
388}
9ab179d8 389EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 390
1da177e4
LT
391static void ip6_dst_destroy(struct dst_entry *dst)
392{
393 struct rt6_info *rt = (struct rt6_info *)dst;
35732d01 394 struct rt6_exception_bucket *bucket;
3a2232e9 395 struct rt6_info *from = rt->from;
8d0b94af 396 struct inet6_dev *idev;
d4ead6b3 397 struct dst_metrics *m;
1da177e4 398
4b32b5ad 399 dst_destroy_metrics_generic(dst);
87775312 400 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
401 rt6_uncached_list_del(rt);
402
403 idev = rt->rt6i_idev;
38308473 404 if (idev) {
1da177e4
LT
405 rt->rt6i_idev = NULL;
406 in6_dev_put(idev);
1ab1457c 407 }
35732d01
WW
408 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
409 if (bucket) {
410 rt->rt6i_exception_bucket = NULL;
411 kfree(bucket);
412 }
1716a961 413
d4ead6b3
DA
414 m = rt->fib6_metrics;
415 if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
416 kfree(m);
417
3a2232e9
DM
418 rt->from = NULL;
419 dst_release(&from->dst);
b3419363
DM
420}
421
1da177e4
LT
422static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
423 int how)
424{
425 struct rt6_info *rt = (struct rt6_info *)dst;
426 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 427 struct net_device *loopback_dev =
c346dca1 428 dev_net(dev)->loopback_dev;
1da177e4 429
e5645f51
WW
430 if (idev && idev->dev != loopback_dev) {
431 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
432 if (loopback_idev) {
433 rt->rt6i_idev = loopback_idev;
434 in6_dev_put(idev);
97cac082 435 }
1da177e4
LT
436 }
437}
438
5973fb1e
MKL
439static bool __rt6_check_expired(const struct rt6_info *rt)
440{
441 if (rt->rt6i_flags & RTF_EXPIRES)
442 return time_after(jiffies, rt->dst.expires);
443 else
444 return false;
445}
446
a50feda5 447static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 448{
1716a961
G
449 if (rt->rt6i_flags & RTF_EXPIRES) {
450 if (time_after(jiffies, rt->dst.expires))
a50feda5 451 return true;
3a2232e9 452 } else if (rt->from) {
1e2ea8ad 453 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
14895687 454 fib6_check_expired(rt->from);
1716a961 455 }
a50feda5 456 return false;
1da177e4
LT
457}
458
b4bac172
DA
459static struct rt6_info *rt6_multipath_select(const struct net *net,
460 struct rt6_info *match,
52bd4c0c 461 struct flowi6 *fl6, int oif,
b75cc8f9 462 const struct sk_buff *skb,
52bd4c0c 463 int strict)
51ebd318
ND
464{
465 struct rt6_info *sibling, *next_sibling;
51ebd318 466
b673d6cc
JS
467 /* We might have already computed the hash for ICMPv6 errors. In such
468 * case it will always be non-zero. Otherwise now is the time to do it.
469 */
470 if (!fl6->mp_hash)
b4bac172 471 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 472
5e670d84 473 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
3d709f69
IS
474 return match;
475
476 list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
477 rt6i_siblings) {
5e670d84
DA
478 int nh_upper_bound;
479
480 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
481 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
482 continue;
483 if (rt6_score_route(sibling, oif, strict) < 0)
484 break;
485 match = sibling;
486 break;
487 }
488
51ebd318
ND
489 return match;
490}
491
1da177e4 492/*
66f5d6ce 493 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
494 */
495
8ed67789
DL
496static inline struct rt6_info *rt6_device_match(struct net *net,
497 struct rt6_info *rt,
b71d1d42 498 const struct in6_addr *saddr,
1da177e4 499 int oif,
d420895e 500 int flags)
1da177e4
LT
501{
502 struct rt6_info *local = NULL;
503 struct rt6_info *sprt;
504
5e670d84
DA
505 if (!oif && ipv6_addr_any(saddr) &&
506 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
8067bb8c 507 return rt;
dd3abc4e 508
071fb37e 509 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
5e670d84 510 const struct net_device *dev = sprt->fib6_nh.nh_dev;
dd3abc4e 511
5e670d84 512 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
513 continue;
514
dd3abc4e 515 if (oif) {
1da177e4
LT
516 if (dev->ifindex == oif)
517 return sprt;
518 if (dev->flags & IFF_LOOPBACK) {
38308473 519 if (!sprt->rt6i_idev ||
1da177e4 520 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 521 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 522 continue;
17fb0b2b
DA
523 if (local &&
524 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
525 continue;
526 }
527 local = sprt;
528 }
dd3abc4e
YH
529 } else {
530 if (ipv6_chk_addr(net, saddr, dev,
531 flags & RT6_LOOKUP_F_IFACE))
532 return sprt;
1da177e4 533 }
dd3abc4e 534 }
1da177e4 535
dd3abc4e 536 if (oif) {
1da177e4
LT
537 if (local)
538 return local;
539
d420895e 540 if (flags & RT6_LOOKUP_F_IFACE)
421842ed 541 return net->ipv6.fib6_null_entry;
1da177e4 542 }
8067bb8c 543
421842ed 544 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
545}
546
27097255 547#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
548struct __rt6_probe_work {
549 struct work_struct work;
550 struct in6_addr target;
551 struct net_device *dev;
552};
553
554static void rt6_probe_deferred(struct work_struct *w)
555{
556 struct in6_addr mcaddr;
557 struct __rt6_probe_work *work =
558 container_of(w, struct __rt6_probe_work, work);
559
560 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 561 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 562 dev_put(work->dev);
662f5533 563 kfree(work);
c2f17e82
HFS
564}
565
27097255
YH
566static void rt6_probe(struct rt6_info *rt)
567{
990edb42 568 struct __rt6_probe_work *work;
5e670d84 569 const struct in6_addr *nh_gw;
f2c31e32 570 struct neighbour *neigh;
5e670d84
DA
571 struct net_device *dev;
572
27097255
YH
573 /*
574 * Okay, this does not seem to be appropriate
575 * for now, however, we need to check if it
576 * is really so; aka Router Reachability Probing.
577 *
578 * Router Reachability Probe MUST be rate-limited
579 * to no more than one per minute.
580 */
2152caea 581 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 582 return;
5e670d84
DA
583
584 nh_gw = &rt->fib6_nh.nh_gw;
585 dev = rt->fib6_nh.nh_dev;
2152caea 586 rcu_read_lock_bh();
5e670d84 587 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 588 if (neigh) {
8d6c31bf
MKL
589 if (neigh->nud_state & NUD_VALID)
590 goto out;
591
990edb42 592 work = NULL;
2152caea 593 write_lock(&neigh->lock);
990edb42
MKL
594 if (!(neigh->nud_state & NUD_VALID) &&
595 time_after(jiffies,
596 neigh->updated +
597 rt->rt6i_idev->cnf.rtr_probe_interval)) {
598 work = kmalloc(sizeof(*work), GFP_ATOMIC);
599 if (work)
600 __neigh_set_probe_once(neigh);
c2f17e82 601 }
2152caea 602 write_unlock(&neigh->lock);
990edb42
MKL
603 } else {
604 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 605 }
990edb42
MKL
606
607 if (work) {
608 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
609 work->target = *nh_gw;
610 dev_hold(dev);
611 work->dev = dev;
990edb42
MKL
612 schedule_work(&work->work);
613 }
614
8d6c31bf 615out:
2152caea 616 rcu_read_unlock_bh();
27097255
YH
617}
618#else
619static inline void rt6_probe(struct rt6_info *rt)
620{
27097255
YH
621}
622#endif
623
1da177e4 624/*
554cfb7e 625 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 626 */
b6f99a21 627static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 628{
5e670d84
DA
629 const struct net_device *dev = rt->fib6_nh.nh_dev;
630
161980f4 631 if (!oif || dev->ifindex == oif)
554cfb7e 632 return 2;
161980f4
DM
633 if ((dev->flags & IFF_LOOPBACK) &&
634 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
635 return 1;
636 return 0;
554cfb7e 637}
1da177e4 638
afc154e9 639static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 640{
afc154e9 641 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 642 struct neighbour *neigh;
f2c31e32 643
4d0c5911
YH
644 if (rt->rt6i_flags & RTF_NONEXTHOP ||
645 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 646 return RT6_NUD_SUCCEED;
145a3621
YH
647
648 rcu_read_lock_bh();
5e670d84
DA
649 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
650 &rt->fib6_nh.nh_gw);
145a3621
YH
651 if (neigh) {
652 read_lock(&neigh->lock);
554cfb7e 653 if (neigh->nud_state & NUD_VALID)
afc154e9 654 ret = RT6_NUD_SUCCEED;
398bcbeb 655#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 656 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 657 ret = RT6_NUD_SUCCEED;
7e980569
JB
658 else
659 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 660#endif
145a3621 661 read_unlock(&neigh->lock);
afc154e9
HFS
662 } else {
663 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 664 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 665 }
145a3621
YH
666 rcu_read_unlock_bh();
667
a5a81f0b 668 return ret;
1da177e4
LT
669}
670
554cfb7e
YH
671static int rt6_score_route(struct rt6_info *rt, int oif,
672 int strict)
1da177e4 673{
a5a81f0b 674 int m;
1ab1457c 675
4d0c5911 676 m = rt6_check_dev(rt, oif);
77d16f45 677 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 678 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
679#ifdef CONFIG_IPV6_ROUTER_PREF
680 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
681#endif
afc154e9
HFS
682 if (strict & RT6_LOOKUP_F_REACHABLE) {
683 int n = rt6_check_neigh(rt);
684 if (n < 0)
685 return n;
686 }
554cfb7e
YH
687 return m;
688}
689
f11e6659 690static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
691 int *mpri, struct rt6_info *match,
692 bool *do_rr)
554cfb7e 693{
f11e6659 694 int m;
afc154e9 695 bool match_do_rr = false;
35103d11 696 struct inet6_dev *idev = rt->rt6i_idev;
35103d11 697
5e670d84 698 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
699 goto out;
700
14c5206c 701 if (idev->cnf.ignore_routes_with_linkdown &&
5e670d84 702 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 703 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 704 goto out;
f11e6659 705
14895687 706 if (fib6_check_expired(rt))
f11e6659
DM
707 goto out;
708
709 m = rt6_score_route(rt, oif, strict);
7e980569 710 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
711 match_do_rr = true;
712 m = 0; /* lowest valid score */
7e980569 713 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 714 goto out;
afc154e9
HFS
715 }
716
717 if (strict & RT6_LOOKUP_F_REACHABLE)
718 rt6_probe(rt);
f11e6659 719
7e980569 720 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 721 if (m > *mpri) {
afc154e9 722 *do_rr = match_do_rr;
f11e6659
DM
723 *mpri = m;
724 match = rt;
f11e6659 725 }
f11e6659
DM
726out:
727 return match;
728}
729
730static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
8d1040e8 731 struct rt6_info *leaf,
f11e6659 732 struct rt6_info *rr_head,
afc154e9
HFS
733 u32 metric, int oif, int strict,
734 bool *do_rr)
f11e6659 735{
9fbdcfaf 736 struct rt6_info *rt, *match, *cont;
554cfb7e 737 int mpri = -1;
1da177e4 738
f11e6659 739 match = NULL;
9fbdcfaf 740 cont = NULL;
071fb37e 741 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
742 if (rt->rt6i_metric != metric) {
743 cont = rt;
744 break;
745 }
746
747 match = find_match(rt, oif, strict, &mpri, match, do_rr);
748 }
749
66f5d6ce 750 for (rt = leaf; rt && rt != rr_head;
071fb37e 751 rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
752 if (rt->rt6i_metric != metric) {
753 cont = rt;
754 break;
755 }
756
afc154e9 757 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
758 }
759
760 if (match || !cont)
761 return match;
762
071fb37e 763 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
afc154e9 764 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 765
f11e6659
DM
766 return match;
767}
1da177e4 768
8d1040e8
WW
769static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
770 int oif, int strict)
f11e6659 771{
66f5d6ce 772 struct rt6_info *leaf = rcu_dereference(fn->leaf);
f11e6659 773 struct rt6_info *match, *rt0;
afc154e9 774 bool do_rr = false;
17ecf590 775 int key_plen;
1da177e4 776
421842ed
DA
777 if (!leaf || leaf == net->ipv6.fib6_null_entry)
778 return net->ipv6.fib6_null_entry;
8d1040e8 779
66f5d6ce 780 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 781 if (!rt0)
66f5d6ce 782 rt0 = leaf;
1da177e4 783
17ecf590
WW
784 /* Double check to make sure fn is not an intermediate node
785 * and fn->leaf does not points to its child's leaf
786 * (This might happen if all routes under fn are deleted from
787 * the tree and fib6_repair_tree() is called on the node.)
788 */
789 key_plen = rt0->rt6i_dst.plen;
790#ifdef CONFIG_IPV6_SUBTREES
791 if (rt0->rt6i_src.plen)
792 key_plen = rt0->rt6i_src.plen;
793#endif
794 if (fn->fn_bit != key_plen)
421842ed 795 return net->ipv6.fib6_null_entry;
17ecf590 796
8d1040e8 797 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
afc154e9 798 &do_rr);
1da177e4 799
afc154e9 800 if (do_rr) {
071fb37e 801 struct rt6_info *next = rcu_dereference(rt0->rt6_next);
f11e6659 802
554cfb7e 803 /* no entries matched; do round-robin */
f11e6659 804 if (!next || next->rt6i_metric != rt0->rt6i_metric)
8d1040e8 805 next = leaf;
f11e6659 806
66f5d6ce
WW
807 if (next != rt0) {
808 spin_lock_bh(&leaf->rt6i_table->tb6_lock);
809 /* make sure next is not being deleted from the tree */
810 if (next->rt6i_node)
811 rcu_assign_pointer(fn->rr_ptr, next);
812 spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
813 }
1da177e4 814 }
1da177e4 815
421842ed 816 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
817}
818
8b9df265
MKL
819static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
820{
821 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
822}
823
70ceb4f5
YH
824#ifdef CONFIG_IPV6_ROUTE_INFO
825int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 826 const struct in6_addr *gwaddr)
70ceb4f5 827{
c346dca1 828 struct net *net = dev_net(dev);
70ceb4f5
YH
829 struct route_info *rinfo = (struct route_info *) opt;
830 struct in6_addr prefix_buf, *prefix;
831 unsigned int pref;
4bed72e4 832 unsigned long lifetime;
70ceb4f5
YH
833 struct rt6_info *rt;
834
835 if (len < sizeof(struct route_info)) {
836 return -EINVAL;
837 }
838
839 /* Sanity check for prefix_len and length */
840 if (rinfo->length > 3) {
841 return -EINVAL;
842 } else if (rinfo->prefix_len > 128) {
843 return -EINVAL;
844 } else if (rinfo->prefix_len > 64) {
845 if (rinfo->length < 2) {
846 return -EINVAL;
847 }
848 } else if (rinfo->prefix_len > 0) {
849 if (rinfo->length < 1) {
850 return -EINVAL;
851 }
852 }
853
854 pref = rinfo->route_pref;
855 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 856 return -EINVAL;
70ceb4f5 857
4bed72e4 858 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
859
860 if (rinfo->length == 3)
861 prefix = (struct in6_addr *)rinfo->prefix;
862 else {
863 /* this function is safe */
864 ipv6_addr_prefix(&prefix_buf,
865 (struct in6_addr *)rinfo->prefix,
866 rinfo->prefix_len);
867 prefix = &prefix_buf;
868 }
869
f104a567 870 if (rinfo->prefix_len == 0)
afb1d4b5 871 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
872 else
873 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 874 gwaddr, dev);
70ceb4f5
YH
875
876 if (rt && !lifetime) {
afb1d4b5 877 ip6_del_rt(net, rt);
70ceb4f5
YH
878 rt = NULL;
879 }
880
881 if (!rt && lifetime)
830218c1
DA
882 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
883 dev, pref);
70ceb4f5
YH
884 else if (rt)
885 rt->rt6i_flags = RTF_ROUTEINFO |
886 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
887
888 if (rt) {
1716a961 889 if (!addrconf_finite_timeout(lifetime))
14895687 890 fib6_clean_expires(rt);
1716a961 891 else
14895687 892 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 893
94e187c0 894 ip6_rt_put(rt);
70ceb4f5
YH
895 }
896 return 0;
897}
898#endif
899
ae90d867
DA
900/*
901 * Misc support functions
902 */
903
904/* called with rcu_lock held */
905static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
906{
5e670d84 907 struct net_device *dev = rt->fib6_nh.nh_dev;
ae90d867
DA
908
909 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
910 /* for copies of local routes, dst->dev needs to be the
911 * device if it is a master device, the master device if
912 * device is enslaved, and the loopback as the default
913 */
914 if (netif_is_l3_slave(dev) &&
915 !rt6_need_strict(&rt->rt6i_dst.addr))
916 dev = l3mdev_master_dev_rcu(dev);
917 else if (!netif_is_l3_master(dev))
918 dev = dev_net(dev)->loopback_dev;
919 /* last case is netif_is_l3_master(dev) is true in which
920 * case we want dev returned to be dev
921 */
922 }
923
924 return dev;
925}
926
6edb3c96
DA
927static const int fib6_prop[RTN_MAX + 1] = {
928 [RTN_UNSPEC] = 0,
929 [RTN_UNICAST] = 0,
930 [RTN_LOCAL] = 0,
931 [RTN_BROADCAST] = 0,
932 [RTN_ANYCAST] = 0,
933 [RTN_MULTICAST] = 0,
934 [RTN_BLACKHOLE] = -EINVAL,
935 [RTN_UNREACHABLE] = -EHOSTUNREACH,
936 [RTN_PROHIBIT] = -EACCES,
937 [RTN_THROW] = -EAGAIN,
938 [RTN_NAT] = -EINVAL,
939 [RTN_XRESOLVE] = -EINVAL,
940};
941
942static int ip6_rt_type_to_error(u8 fib6_type)
943{
944 return fib6_prop[fib6_type];
945}
946
3b6761d1
DA
947static unsigned short fib6_info_dst_flags(struct rt6_info *rt)
948{
949 unsigned short flags = 0;
950
951 if (rt->dst_nocount)
952 flags |= DST_NOCOUNT;
953 if (rt->dst_nopolicy)
954 flags |= DST_NOPOLICY;
955 if (rt->dst_host)
956 flags |= DST_HOST;
957
958 return flags;
959}
960
6edb3c96
DA
961static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct rt6_info *ort)
962{
963 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
964
965 switch (ort->fib6_type) {
966 case RTN_BLACKHOLE:
967 rt->dst.output = dst_discard_out;
968 rt->dst.input = dst_discard;
969 break;
970 case RTN_PROHIBIT:
971 rt->dst.output = ip6_pkt_prohibit_out;
972 rt->dst.input = ip6_pkt_prohibit;
973 break;
974 case RTN_THROW:
975 case RTN_UNREACHABLE:
976 default:
977 rt->dst.output = ip6_pkt_discard_out;
978 rt->dst.input = ip6_pkt_discard;
979 break;
980 }
981}
982
983static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
984{
3b6761d1
DA
985 rt->dst.flags |= fib6_info_dst_flags(ort);
986
6edb3c96
DA
987 if (ort->rt6i_flags & RTF_REJECT) {
988 ip6_rt_init_dst_reject(rt, ort);
989 return;
990 }
991
992 rt->dst.error = 0;
993 rt->dst.output = ip6_output;
994
995 if (ort->fib6_type == RTN_LOCAL) {
6edb3c96
DA
996 rt->dst.input = ip6_input;
997 } else if (ipv6_addr_type(&ort->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) {
998 rt->dst.input = ip6_mc_input;
999 } else {
1000 rt->dst.input = ip6_forward;
1001 }
1002
1003 if (ort->fib6_nh.nh_lwtstate) {
1004 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
1005 lwtunnel_set_redirect(&rt->dst);
1006 }
1007
1008 rt->dst.lastuse = jiffies;
1009}
1010
ae90d867
DA
1011static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
1012{
1013 BUG_ON(from->from);
1014
1015 rt->rt6i_flags &= ~RTF_EXPIRES;
23fb93a4
DA
1016 if (dst_hold_safe(&from->dst))
1017 rt->from = from;
d4ead6b3
DA
1018 dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
1019 if (from->fib6_metrics != &dst_default_metrics) {
1020 rt->dst._metrics |= DST_METRICS_REFCOUNTED;
1021 refcount_inc(&from->fib6_metrics->refcnt);
1022 }
ae90d867
DA
1023}
1024
1025static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
1026{
6edb3c96
DA
1027 ip6_rt_init_dst(rt, ort);
1028
ae90d867 1029 rt->rt6i_dst = ort->rt6i_dst;
ae90d867
DA
1030 rt->rt6i_idev = ort->rt6i_idev;
1031 if (rt->rt6i_idev)
1032 in6_dev_hold(rt->rt6i_idev);
5e670d84 1033 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
ae90d867
DA
1034 rt->rt6i_flags = ort->rt6i_flags;
1035 rt6_set_from(rt, ort);
1036 rt->rt6i_metric = ort->rt6i_metric;
1037#ifdef CONFIG_IPV6_SUBTREES
1038 rt->rt6i_src = ort->rt6i_src;
1039#endif
1040 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
1041 rt->rt6i_table = ort->rt6i_table;
5e670d84 1042 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
ae90d867
DA
1043}
1044
a3c00e46
MKL
1045static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1046 struct in6_addr *saddr)
1047{
66f5d6ce 1048 struct fib6_node *pn, *sn;
a3c00e46
MKL
1049 while (1) {
1050 if (fn->fn_flags & RTN_TL_ROOT)
1051 return NULL;
66f5d6ce
WW
1052 pn = rcu_dereference(fn->parent);
1053 sn = FIB6_SUBTREE(pn);
1054 if (sn && sn != fn)
1055 fn = fib6_lookup(sn, NULL, saddr);
a3c00e46
MKL
1056 else
1057 fn = pn;
1058 if (fn->fn_flags & RTN_RTINFO)
1059 return fn;
1060 }
1061}
c71099ac 1062
d3843fe5
WW
1063static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1064 bool null_fallback)
1065{
1066 struct rt6_info *rt = *prt;
1067
1068 if (dst_hold_safe(&rt->dst))
1069 return true;
1070 if (null_fallback) {
1071 rt = net->ipv6.ip6_null_entry;
1072 dst_hold(&rt->dst);
1073 } else {
1074 rt = NULL;
1075 }
1076 *prt = rt;
1077 return false;
1078}
1079
dec9b0e2
DA
1080/* called with rcu_lock held */
1081static struct rt6_info *ip6_create_rt_rcu(struct rt6_info *rt)
1082{
3b6761d1 1083 unsigned short flags = fib6_info_dst_flags(rt);
dec9b0e2
DA
1084 struct net_device *dev = rt->fib6_nh.nh_dev;
1085 struct rt6_info *nrt;
1086
3b6761d1 1087 nrt = __ip6_dst_alloc(dev_net(dev), dev, flags);
dec9b0e2
DA
1088 if (nrt)
1089 ip6_rt_copy_init(nrt, rt);
1090
1091 return nrt;
1092}
1093
8ed67789
DL
1094static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1095 struct fib6_table *table,
b75cc8f9
DA
1096 struct flowi6 *fl6,
1097 const struct sk_buff *skb,
1098 int flags)
1da177e4 1099{
23fb93a4 1100 struct rt6_info *f6i;
1da177e4 1101 struct fib6_node *fn;
23fb93a4 1102 struct rt6_info *rt;
1da177e4 1103
b6cdbc85
DA
1104 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1105 flags &= ~RT6_LOOKUP_F_IFACE;
1106
66f5d6ce 1107 rcu_read_lock();
4c9483b2 1108 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1109restart:
23fb93a4
DA
1110 f6i = rcu_dereference(fn->leaf);
1111 if (!f6i) {
1112 f6i = net->ipv6.fib6_null_entry;
66f5d6ce 1113 } else {
23fb93a4 1114 f6i = rt6_device_match(net, f6i, &fl6->saddr,
66f5d6ce 1115 fl6->flowi6_oif, flags);
23fb93a4
DA
1116 if (f6i->rt6i_nsiblings && fl6->flowi6_oif == 0)
1117 f6i = rt6_multipath_select(net, f6i, fl6,
1118 fl6->flowi6_oif, skb, flags);
66f5d6ce 1119 }
23fb93a4 1120 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1121 fn = fib6_backtrack(fn, &fl6->saddr);
1122 if (fn)
1123 goto restart;
1124 }
23fb93a4 1125
2b760fcf 1126 /* Search through exception table */
23fb93a4
DA
1127 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1128 if (rt) {
dec9b0e2
DA
1129 if (ip6_hold_safe(net, &rt, true))
1130 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1131 } else if (f6i == net->ipv6.fib6_null_entry) {
dec9b0e2
DA
1132 rt = net->ipv6.ip6_null_entry;
1133 dst_hold(&rt->dst);
23fb93a4
DA
1134 } else {
1135 rt = ip6_create_rt_rcu(f6i);
1136 if (!rt) {
1137 rt = net->ipv6.ip6_null_entry;
1138 dst_hold(&rt->dst);
1139 }
dec9b0e2 1140 }
d3843fe5 1141
66f5d6ce 1142 rcu_read_unlock();
b811580d 1143
b65f164d 1144 trace_fib6_table_lookup(net, rt, table, fl6);
b811580d 1145
c71099ac 1146 return rt;
c71099ac
TG
1147}
1148
67ba4152 1149struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1150 const struct sk_buff *skb, int flags)
ea6e574e 1151{
b75cc8f9 1152 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1153}
1154EXPORT_SYMBOL_GPL(ip6_route_lookup);
1155
9acd9f3a 1156struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1157 const struct in6_addr *saddr, int oif,
1158 const struct sk_buff *skb, int strict)
c71099ac 1159{
4c9483b2
DM
1160 struct flowi6 fl6 = {
1161 .flowi6_oif = oif,
1162 .daddr = *daddr,
c71099ac
TG
1163 };
1164 struct dst_entry *dst;
77d16f45 1165 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1166
adaa70bb 1167 if (saddr) {
4c9483b2 1168 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1169 flags |= RT6_LOOKUP_F_HAS_SADDR;
1170 }
1171
b75cc8f9 1172 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1173 if (dst->error == 0)
1174 return (struct rt6_info *) dst;
1175
1176 dst_release(dst);
1177
1da177e4
LT
1178 return NULL;
1179}
7159039a
YH
1180EXPORT_SYMBOL(rt6_lookup);
1181
c71099ac 1182/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1183 * It takes new route entry, the addition fails by any reason the
1184 * route is released.
1185 * Caller must hold dst before calling it.
1da177e4
LT
1186 */
1187
e5fd387a 1188static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301 1189 struct netlink_ext_ack *extack)
1da177e4
LT
1190{
1191 int err;
c71099ac 1192 struct fib6_table *table;
1da177e4 1193
c71099ac 1194 table = rt->rt6i_table;
66f5d6ce 1195 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1196 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1197 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1198
1199 return err;
1200}
1201
afb1d4b5 1202int ip6_ins_rt(struct net *net, struct rt6_info *rt)
40e22e8f 1203{
afb1d4b5 1204 struct nl_info info = { .nl_net = net, };
e715b6d3 1205
1cfb71ee
WW
1206 /* Hold dst to account for the reference from the fib6 tree */
1207 dst_hold(&rt->dst);
d4ead6b3 1208 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1209}
1210
8b9df265
MKL
1211static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1212 const struct in6_addr *daddr,
1213 const struct in6_addr *saddr)
1da177e4 1214{
4832c30d 1215 struct net_device *dev;
1da177e4
LT
1216 struct rt6_info *rt;
1217
1218 /*
1219 * Clone the route.
1220 */
1221
4832c30d
DA
1222 rcu_read_lock();
1223 dev = ip6_rt_get_dev_rcu(ort);
1224 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1225 rcu_read_unlock();
83a09abd
MKL
1226 if (!rt)
1227 return NULL;
1228
1229 ip6_rt_copy_init(rt, ort);
1230 rt->rt6i_flags |= RTF_CACHE;
1231 rt->rt6i_metric = 0;
1232 rt->dst.flags |= DST_HOST;
1233 rt->rt6i_dst.addr = *daddr;
1234 rt->rt6i_dst.plen = 128;
1da177e4 1235
83a09abd
MKL
1236 if (!rt6_is_gw_or_nonexthop(ort)) {
1237 if (ort->rt6i_dst.plen != 128 &&
1238 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1239 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1240#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1241 if (rt->rt6i_src.plen && saddr) {
1242 rt->rt6i_src.addr = *saddr;
1243 rt->rt6i_src.plen = 128;
8b9df265 1244 }
83a09abd 1245#endif
95a9a5ba 1246 }
1da177e4 1247
95a9a5ba
YH
1248 return rt;
1249}
1da177e4 1250
d52d3997
MKL
1251static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1252{
3b6761d1 1253 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1254 struct net_device *dev;
d52d3997
MKL
1255 struct rt6_info *pcpu_rt;
1256
4832c30d
DA
1257 rcu_read_lock();
1258 dev = ip6_rt_get_dev_rcu(rt);
3b6761d1 1259 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1260 rcu_read_unlock();
d52d3997
MKL
1261 if (!pcpu_rt)
1262 return NULL;
1263 ip6_rt_copy_init(pcpu_rt, rt);
1264 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1265 pcpu_rt->rt6i_flags |= RTF_PCPU;
1266 return pcpu_rt;
1267}
1268
66f5d6ce 1269/* It should be called with rcu_read_lock() acquired */
d52d3997
MKL
1270static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1271{
a73e4195 1272 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1273
1274 p = this_cpu_ptr(rt->rt6i_pcpu);
1275 pcpu_rt = *p;
1276
d4ead6b3
DA
1277 if (pcpu_rt)
1278 ip6_hold_safe(NULL, &pcpu_rt, false);
d3843fe5 1279
a73e4195
MKL
1280 return pcpu_rt;
1281}
1282
afb1d4b5
DA
1283static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1284 struct rt6_info *rt)
a73e4195
MKL
1285{
1286 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1287
1288 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1289 if (!pcpu_rt) {
9c7370a1
MKL
1290 dst_hold(&net->ipv6.ip6_null_entry->dst);
1291 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1292 }
1293
a94b9367
WW
1294 dst_hold(&pcpu_rt->dst);
1295 p = this_cpu_ptr(rt->rt6i_pcpu);
1296 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1297 BUG_ON(prev);
a94b9367 1298
d52d3997
MKL
1299 return pcpu_rt;
1300}
1301
35732d01
WW
1302/* exception hash table implementation
1303 */
1304static DEFINE_SPINLOCK(rt6_exception_lock);
1305
1306/* Remove rt6_ex from hash table and free the memory
1307 * Caller must hold rt6_exception_lock
1308 */
1309static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1310 struct rt6_exception *rt6_ex)
1311{
b2427e67 1312 struct net *net;
81eb8447 1313
35732d01
WW
1314 if (!bucket || !rt6_ex)
1315 return;
b2427e67
CIK
1316
1317 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01
WW
1318 rt6_ex->rt6i->rt6i_node = NULL;
1319 hlist_del_rcu(&rt6_ex->hlist);
1320 rt6_release(rt6_ex->rt6i);
1321 kfree_rcu(rt6_ex, rcu);
1322 WARN_ON_ONCE(!bucket->depth);
1323 bucket->depth--;
81eb8447 1324 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1325}
1326
1327/* Remove oldest rt6_ex in bucket and free the memory
1328 * Caller must hold rt6_exception_lock
1329 */
1330static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1331{
1332 struct rt6_exception *rt6_ex, *oldest = NULL;
1333
1334 if (!bucket)
1335 return;
1336
1337 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1338 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1339 oldest = rt6_ex;
1340 }
1341 rt6_remove_exception(bucket, oldest);
1342}
1343
1344static u32 rt6_exception_hash(const struct in6_addr *dst,
1345 const struct in6_addr *src)
1346{
1347 static u32 seed __read_mostly;
1348 u32 val;
1349
1350 net_get_random_once(&seed, sizeof(seed));
1351 val = jhash(dst, sizeof(*dst), seed);
1352
1353#ifdef CONFIG_IPV6_SUBTREES
1354 if (src)
1355 val = jhash(src, sizeof(*src), val);
1356#endif
1357 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1358}
1359
1360/* Helper function to find the cached rt in the hash table
1361 * and update bucket pointer to point to the bucket for this
1362 * (daddr, saddr) pair
1363 * Caller must hold rt6_exception_lock
1364 */
1365static struct rt6_exception *
1366__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1367 const struct in6_addr *daddr,
1368 const struct in6_addr *saddr)
1369{
1370 struct rt6_exception *rt6_ex;
1371 u32 hval;
1372
1373 if (!(*bucket) || !daddr)
1374 return NULL;
1375
1376 hval = rt6_exception_hash(daddr, saddr);
1377 *bucket += hval;
1378
1379 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1380 struct rt6_info *rt6 = rt6_ex->rt6i;
1381 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1382
1383#ifdef CONFIG_IPV6_SUBTREES
1384 if (matched && saddr)
1385 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1386#endif
1387 if (matched)
1388 return rt6_ex;
1389 }
1390 return NULL;
1391}
1392
1393/* Helper function to find the cached rt in the hash table
1394 * and update bucket pointer to point to the bucket for this
1395 * (daddr, saddr) pair
1396 * Caller must hold rcu_read_lock()
1397 */
1398static struct rt6_exception *
1399__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1400 const struct in6_addr *daddr,
1401 const struct in6_addr *saddr)
1402{
1403 struct rt6_exception *rt6_ex;
1404 u32 hval;
1405
1406 WARN_ON_ONCE(!rcu_read_lock_held());
1407
1408 if (!(*bucket) || !daddr)
1409 return NULL;
1410
1411 hval = rt6_exception_hash(daddr, saddr);
1412 *bucket += hval;
1413
1414 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1415 struct rt6_info *rt6 = rt6_ex->rt6i;
1416 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1417
1418#ifdef CONFIG_IPV6_SUBTREES
1419 if (matched && saddr)
1420 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1421#endif
1422 if (matched)
1423 return rt6_ex;
1424 }
1425 return NULL;
1426}
1427
d4ead6b3
DA
1428static unsigned int fib6_mtu(const struct rt6_info *rt)
1429{
1430 unsigned int mtu;
1431
1432 mtu = rt->fib6_pmtu ? : rt->rt6i_idev->cnf.mtu6;
1433 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1434
1435 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1436}
1437
35732d01
WW
1438static int rt6_insert_exception(struct rt6_info *nrt,
1439 struct rt6_info *ort)
1440{
5e670d84 1441 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1442 struct rt6_exception_bucket *bucket;
1443 struct in6_addr *src_key = NULL;
1444 struct rt6_exception *rt6_ex;
1445 int err = 0;
1446
35732d01
WW
1447 spin_lock_bh(&rt6_exception_lock);
1448
1449 if (ort->exception_bucket_flushed) {
1450 err = -EINVAL;
1451 goto out;
1452 }
1453
1454 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1455 lockdep_is_held(&rt6_exception_lock));
1456 if (!bucket) {
1457 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1458 GFP_ATOMIC);
1459 if (!bucket) {
1460 err = -ENOMEM;
1461 goto out;
1462 }
1463 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1464 }
1465
1466#ifdef CONFIG_IPV6_SUBTREES
1467 /* rt6i_src.plen != 0 indicates ort is in subtree
1468 * and exception table is indexed by a hash of
1469 * both rt6i_dst and rt6i_src.
1470 * Otherwise, the exception table is indexed by
1471 * a hash of only rt6i_dst.
1472 */
1473 if (ort->rt6i_src.plen)
1474 src_key = &nrt->rt6i_src.addr;
1475#endif
60006a48
WW
1476
1477 /* Update rt6i_prefsrc as it could be changed
1478 * in rt6_remove_prefsrc()
1479 */
1480 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
f5bbe7ee
WW
1481 /* rt6_mtu_change() might lower mtu on ort.
1482 * Only insert this exception route if its mtu
1483 * is less than ort's mtu value.
1484 */
d4ead6b3 1485 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1486 err = -EINVAL;
1487 goto out;
1488 }
60006a48 1489
35732d01
WW
1490 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1491 src_key);
1492 if (rt6_ex)
1493 rt6_remove_exception(bucket, rt6_ex);
1494
1495 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1496 if (!rt6_ex) {
1497 err = -ENOMEM;
1498 goto out;
1499 }
1500 rt6_ex->rt6i = nrt;
1501 rt6_ex->stamp = jiffies;
1502 atomic_inc(&nrt->rt6i_ref);
1503 nrt->rt6i_node = ort->rt6i_node;
1504 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1505 bucket->depth++;
81eb8447 1506 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1507
1508 if (bucket->depth > FIB6_MAX_DEPTH)
1509 rt6_exception_remove_oldest(bucket);
1510
1511out:
1512 spin_unlock_bh(&rt6_exception_lock);
1513
1514 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1515 if (!err) {
922c2ac8 1516 spin_lock_bh(&ort->rt6i_table->tb6_lock);
7aef6859 1517 fib6_update_sernum(net, ort);
922c2ac8 1518 spin_unlock_bh(&ort->rt6i_table->tb6_lock);
b886d5f2
PA
1519 fib6_force_start_gc(net);
1520 }
35732d01
WW
1521
1522 return err;
1523}
1524
1525void rt6_flush_exceptions(struct rt6_info *rt)
1526{
1527 struct rt6_exception_bucket *bucket;
1528 struct rt6_exception *rt6_ex;
1529 struct hlist_node *tmp;
1530 int i;
1531
1532 spin_lock_bh(&rt6_exception_lock);
1533 /* Prevent rt6_insert_exception() to recreate the bucket list */
1534 rt->exception_bucket_flushed = 1;
1535
1536 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1537 lockdep_is_held(&rt6_exception_lock));
1538 if (!bucket)
1539 goto out;
1540
1541 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1542 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1543 rt6_remove_exception(bucket, rt6_ex);
1544 WARN_ON_ONCE(bucket->depth);
1545 bucket++;
1546 }
1547
1548out:
1549 spin_unlock_bh(&rt6_exception_lock);
1550}
1551
1552/* Find cached rt in the hash table inside passed in rt
1553 * Caller has to hold rcu_read_lock()
1554 */
1555static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1556 struct in6_addr *daddr,
1557 struct in6_addr *saddr)
1558{
1559 struct rt6_exception_bucket *bucket;
1560 struct in6_addr *src_key = NULL;
1561 struct rt6_exception *rt6_ex;
1562 struct rt6_info *res = NULL;
1563
1564 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1565
1566#ifdef CONFIG_IPV6_SUBTREES
1567 /* rt6i_src.plen != 0 indicates rt is in subtree
1568 * and exception table is indexed by a hash of
1569 * both rt6i_dst and rt6i_src.
1570 * Otherwise, the exception table is indexed by
1571 * a hash of only rt6i_dst.
1572 */
1573 if (rt->rt6i_src.plen)
1574 src_key = saddr;
1575#endif
1576 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1577
1578 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1579 res = rt6_ex->rt6i;
1580
1581 return res;
1582}
1583
1584/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1585static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1586{
35732d01 1587 struct rt6_exception_bucket *bucket;
3a2232e9 1588 struct rt6_info *from = rt->from;
35732d01
WW
1589 struct in6_addr *src_key = NULL;
1590 struct rt6_exception *rt6_ex;
1591 int err;
1592
1593 if (!from ||
442d713b 1594 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1595 return -EINVAL;
1596
1597 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1598 return -ENOENT;
1599
1600 spin_lock_bh(&rt6_exception_lock);
1601 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1602 lockdep_is_held(&rt6_exception_lock));
1603#ifdef CONFIG_IPV6_SUBTREES
1604 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1605 * and exception table is indexed by a hash of
1606 * both rt6i_dst and rt6i_src.
1607 * Otherwise, the exception table is indexed by
1608 * a hash of only rt6i_dst.
1609 */
1610 if (from->rt6i_src.plen)
1611 src_key = &rt->rt6i_src.addr;
1612#endif
1613 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1614 &rt->rt6i_dst.addr,
1615 src_key);
1616 if (rt6_ex) {
1617 rt6_remove_exception(bucket, rt6_ex);
1618 err = 0;
1619 } else {
1620 err = -ENOENT;
1621 }
1622
1623 spin_unlock_bh(&rt6_exception_lock);
1624 return err;
1625}
1626
1627/* Find rt6_ex which contains the passed in rt cache and
1628 * refresh its stamp
1629 */
1630static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1631{
35732d01 1632 struct rt6_exception_bucket *bucket;
3a2232e9 1633 struct rt6_info *from = rt->from;
35732d01
WW
1634 struct in6_addr *src_key = NULL;
1635 struct rt6_exception *rt6_ex;
1636
1637 if (!from ||
442d713b 1638 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1639 return;
1640
1641 rcu_read_lock();
1642 bucket = rcu_dereference(from->rt6i_exception_bucket);
1643
1644#ifdef CONFIG_IPV6_SUBTREES
1645 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1646 * and exception table is indexed by a hash of
1647 * both rt6i_dst and rt6i_src.
1648 * Otherwise, the exception table is indexed by
1649 * a hash of only rt6i_dst.
1650 */
1651 if (from->rt6i_src.plen)
1652 src_key = &rt->rt6i_src.addr;
1653#endif
1654 rt6_ex = __rt6_find_exception_rcu(&bucket,
1655 &rt->rt6i_dst.addr,
1656 src_key);
1657 if (rt6_ex)
1658 rt6_ex->stamp = jiffies;
1659
1660 rcu_read_unlock();
1661}
1662
60006a48
WW
1663static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1664{
1665 struct rt6_exception_bucket *bucket;
1666 struct rt6_exception *rt6_ex;
1667 int i;
1668
1669 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1670 lockdep_is_held(&rt6_exception_lock));
1671
1672 if (bucket) {
1673 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1674 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1675 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1676 }
1677 bucket++;
1678 }
1679 }
1680}
1681
e9fa1495
SB
1682static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1683 struct rt6_info *rt, int mtu)
1684{
1685 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1686 * lowest MTU in the path: always allow updating the route PMTU to
1687 * reflect PMTU decreases.
1688 *
1689 * If the new MTU is higher, and the route PMTU is equal to the local
1690 * MTU, this means the old MTU is the lowest in the path, so allow
1691 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1692 * handle this.
1693 */
1694
1695 if (dst_mtu(&rt->dst) >= mtu)
1696 return true;
1697
1698 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1699 return true;
1700
1701 return false;
1702}
1703
1704static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1705 struct rt6_info *rt, int mtu)
f5bbe7ee
WW
1706{
1707 struct rt6_exception_bucket *bucket;
1708 struct rt6_exception *rt6_ex;
1709 int i;
1710
1711 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1712 lockdep_is_held(&rt6_exception_lock));
1713
e9fa1495
SB
1714 if (!bucket)
1715 return;
1716
1717 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1718 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1719 struct rt6_info *entry = rt6_ex->rt6i;
1720
1721 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1722 * route), the metrics of its rt->from have already
e9fa1495
SB
1723 * been updated.
1724 */
d4ead6b3 1725 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1726 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1727 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1728 }
e9fa1495 1729 bucket++;
f5bbe7ee
WW
1730 }
1731}
1732
b16cb459
WW
1733#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1734
1735static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1736 struct in6_addr *gateway)
1737{
1738 struct rt6_exception_bucket *bucket;
1739 struct rt6_exception *rt6_ex;
1740 struct hlist_node *tmp;
1741 int i;
1742
1743 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1744 return;
1745
1746 spin_lock_bh(&rt6_exception_lock);
1747 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1748 lockdep_is_held(&rt6_exception_lock));
1749
1750 if (bucket) {
1751 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1752 hlist_for_each_entry_safe(rt6_ex, tmp,
1753 &bucket->chain, hlist) {
1754 struct rt6_info *entry = rt6_ex->rt6i;
1755
1756 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1757 RTF_CACHE_GATEWAY &&
1758 ipv6_addr_equal(gateway,
1759 &entry->rt6i_gateway)) {
1760 rt6_remove_exception(bucket, rt6_ex);
1761 }
1762 }
1763 bucket++;
1764 }
1765 }
1766
1767 spin_unlock_bh(&rt6_exception_lock);
1768}
1769
c757faa8
WW
1770static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1771 struct rt6_exception *rt6_ex,
1772 struct fib6_gc_args *gc_args,
1773 unsigned long now)
1774{
1775 struct rt6_info *rt = rt6_ex->rt6i;
1776
1859bac0
PA
1777 /* we are pruning and obsoleting aged-out and non gateway exceptions
1778 * even if others have still references to them, so that on next
1779 * dst_check() such references can be dropped.
1780 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1781 * expired, independently from their aging, as per RFC 8201 section 4
1782 */
31afeb42
WW
1783 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1784 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1785 RT6_TRACE("aging clone %p\n", rt);
1786 rt6_remove_exception(bucket, rt6_ex);
1787 return;
1788 }
1789 } else if (time_after(jiffies, rt->dst.expires)) {
1790 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1791 rt6_remove_exception(bucket, rt6_ex);
1792 return;
31afeb42
WW
1793 }
1794
1795 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1796 struct neighbour *neigh;
1797 __u8 neigh_flags = 0;
1798
1bfa26ff
ED
1799 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1800 if (neigh)
c757faa8 1801 neigh_flags = neigh->flags;
1bfa26ff 1802
c757faa8
WW
1803 if (!(neigh_flags & NTF_ROUTER)) {
1804 RT6_TRACE("purging route %p via non-router but gateway\n",
1805 rt);
1806 rt6_remove_exception(bucket, rt6_ex);
1807 return;
1808 }
1809 }
31afeb42 1810
c757faa8
WW
1811 gc_args->more++;
1812}
1813
1814void rt6_age_exceptions(struct rt6_info *rt,
1815 struct fib6_gc_args *gc_args,
1816 unsigned long now)
1817{
1818 struct rt6_exception_bucket *bucket;
1819 struct rt6_exception *rt6_ex;
1820 struct hlist_node *tmp;
1821 int i;
1822
1823 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1824 return;
1825
1bfa26ff
ED
1826 rcu_read_lock_bh();
1827 spin_lock(&rt6_exception_lock);
c757faa8
WW
1828 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1829 lockdep_is_held(&rt6_exception_lock));
1830
1831 if (bucket) {
1832 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1833 hlist_for_each_entry_safe(rt6_ex, tmp,
1834 &bucket->chain, hlist) {
1835 rt6_age_examine_exception(bucket, rt6_ex,
1836 gc_args, now);
1837 }
1838 bucket++;
1839 }
1840 }
1bfa26ff
ED
1841 spin_unlock(&rt6_exception_lock);
1842 rcu_read_unlock_bh();
c757faa8
WW
1843}
1844
9ff74384 1845struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
b75cc8f9
DA
1846 int oif, struct flowi6 *fl6,
1847 const struct sk_buff *skb, int flags)
1da177e4 1848{
367efcb9 1849 struct fib6_node *fn, *saved_fn;
23fb93a4
DA
1850 struct rt6_info *f6i;
1851 struct rt6_info *rt;
c71099ac 1852 int strict = 0;
1da177e4 1853
77d16f45 1854 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1855 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1856 if (net->ipv6.devconf_all->forwarding == 0)
1857 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1858
66f5d6ce 1859 rcu_read_lock();
1da177e4 1860
4c9483b2 1861 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1862 saved_fn = fn;
1da177e4 1863
ca254490
DA
1864 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1865 oif = 0;
1866
a3c00e46 1867redo_rt6_select:
23fb93a4
DA
1868 f6i = rt6_select(net, fn, oif, strict);
1869 if (f6i->rt6i_nsiblings)
1870 f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
1871 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1872 fn = fib6_backtrack(fn, &fl6->saddr);
1873 if (fn)
1874 goto redo_rt6_select;
367efcb9
MKL
1875 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1876 /* also consider unreachable route */
1877 strict &= ~RT6_LOOKUP_F_REACHABLE;
1878 fn = saved_fn;
1879 goto redo_rt6_select;
367efcb9 1880 }
a3c00e46
MKL
1881 }
1882
23fb93a4 1883 if (f6i == net->ipv6.fib6_null_entry) {
421842ed 1884 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1885 rcu_read_unlock();
d3843fe5 1886 dst_hold(&rt->dst);
b65f164d 1887 trace_fib6_table_lookup(net, rt, table, fl6);
d3843fe5 1888 return rt;
23fb93a4
DA
1889 }
1890
1891 /*Search through exception table */
1892 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1893 if (rt) {
d4ead6b3 1894 if (ip6_hold_safe(net, &rt, true))
d3843fe5 1895 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1896
66f5d6ce 1897 rcu_read_unlock();
b65f164d 1898 trace_fib6_table_lookup(net, rt, table, fl6);
d52d3997 1899 return rt;
3da59bd9 1900 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
23fb93a4 1901 !(f6i->rt6i_flags & RTF_GATEWAY))) {
3da59bd9
MKL
1902 /* Create a RTF_CACHE clone which will not be
1903 * owned by the fib6 tree. It is for the special case where
1904 * the daddr in the skb during the neighbor look-up is different
1905 * from the fl6->daddr used to look-up route here.
1906 */
1907
1908 struct rt6_info *uncached_rt;
1909
23fb93a4
DA
1910 if (ip6_hold_safe(net, &f6i, true)) {
1911 dst_use_noref(&f6i->dst, jiffies);
d3843fe5 1912 } else {
66f5d6ce 1913 rcu_read_unlock();
23fb93a4 1914 uncached_rt = f6i;
d3843fe5
WW
1915 goto uncached_rt_out;
1916 }
66f5d6ce 1917 rcu_read_unlock();
d52d3997 1918
23fb93a4 1919 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
3da59bd9 1920 dst_release(&rt->dst);
c71099ac 1921
1cfb71ee
WW
1922 if (uncached_rt) {
1923 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1924 * No need for another dst_hold()
1925 */
8d0b94af 1926 rt6_uncached_list_add(uncached_rt);
81eb8447 1927 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1928 } else {
3da59bd9 1929 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1930 dst_hold(&uncached_rt->dst);
1931 }
b811580d 1932
d3843fe5 1933uncached_rt_out:
b65f164d 1934 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
3da59bd9 1935 return uncached_rt;
3da59bd9 1936
d52d3997
MKL
1937 } else {
1938 /* Get a percpu copy */
1939
1940 struct rt6_info *pcpu_rt;
1941
23fb93a4 1942 dst_use_noref(&f6i->dst, jiffies);
951f788a 1943 local_bh_disable();
23fb93a4 1944 pcpu_rt = rt6_get_pcpu_route(f6i);
d52d3997 1945
951f788a 1946 if (!pcpu_rt) {
a94b9367 1947 /* atomic_inc_not_zero() is needed when using rcu */
23fb93a4 1948 if (atomic_inc_not_zero(&f6i->rt6i_ref)) {
951f788a 1949 /* No dst_hold() on rt is needed because grabbing
a94b9367
WW
1950 * rt->rt6i_ref makes sure rt can't be released.
1951 */
23fb93a4
DA
1952 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1953 rt6_release(f6i);
a94b9367
WW
1954 } else {
1955 /* rt is already removed from tree */
a94b9367
WW
1956 pcpu_rt = net->ipv6.ip6_null_entry;
1957 dst_hold(&pcpu_rt->dst);
1958 }
9c7370a1 1959 }
951f788a
ED
1960 local_bh_enable();
1961 rcu_read_unlock();
b65f164d 1962 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
d52d3997
MKL
1963 return pcpu_rt;
1964 }
1da177e4 1965}
9ff74384 1966EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1967
b75cc8f9
DA
1968static struct rt6_info *ip6_pol_route_input(struct net *net,
1969 struct fib6_table *table,
1970 struct flowi6 *fl6,
1971 const struct sk_buff *skb,
1972 int flags)
4acad72d 1973{
b75cc8f9 1974 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1975}
1976
d409b847
MB
1977struct dst_entry *ip6_route_input_lookup(struct net *net,
1978 struct net_device *dev,
b75cc8f9
DA
1979 struct flowi6 *fl6,
1980 const struct sk_buff *skb,
1981 int flags)
72331bc0
SL
1982{
1983 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1984 flags |= RT6_LOOKUP_F_IFACE;
1985
b75cc8f9 1986 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1987}
d409b847 1988EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1989
23aebdac 1990static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1991 struct flow_keys *keys,
1992 struct flow_keys *flkeys)
23aebdac
JS
1993{
1994 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1995 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1996 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1997 const struct ipv6hdr *inner_iph;
1998 const struct icmp6hdr *icmph;
1999 struct ipv6hdr _inner_iph;
2000
2001 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2002 goto out;
2003
2004 icmph = icmp6_hdr(skb);
2005 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
2006 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
2007 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
2008 icmph->icmp6_type != ICMPV6_PARAMPROB)
2009 goto out;
2010
2011 inner_iph = skb_header_pointer(skb,
2012 skb_transport_offset(skb) + sizeof(*icmph),
2013 sizeof(_inner_iph), &_inner_iph);
2014 if (!inner_iph)
2015 goto out;
2016
2017 key_iph = inner_iph;
5e5d6fed 2018 _flkeys = NULL;
23aebdac 2019out:
5e5d6fed
RP
2020 if (_flkeys) {
2021 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2022 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2023 keys->tags.flow_label = _flkeys->tags.flow_label;
2024 keys->basic.ip_proto = _flkeys->basic.ip_proto;
2025 } else {
2026 keys->addrs.v6addrs.src = key_iph->saddr;
2027 keys->addrs.v6addrs.dst = key_iph->daddr;
2028 keys->tags.flow_label = ip6_flowinfo(key_iph);
2029 keys->basic.ip_proto = key_iph->nexthdr;
2030 }
23aebdac
JS
2031}
2032
2033/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
2034u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2035 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
2036{
2037 struct flow_keys hash_keys;
9a2a537a 2038 u32 mhash;
23aebdac 2039
bbfa047a 2040 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
2041 case 0:
2042 memset(&hash_keys, 0, sizeof(hash_keys));
2043 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2044 if (skb) {
2045 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2046 } else {
2047 hash_keys.addrs.v6addrs.src = fl6->saddr;
2048 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2049 hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
2050 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2051 }
2052 break;
2053 case 1:
2054 if (skb) {
2055 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2056 struct flow_keys keys;
2057
2058 /* short-circuit if we already have L4 hash present */
2059 if (skb->l4_hash)
2060 return skb_get_hash_raw(skb) >> 1;
2061
2062 memset(&hash_keys, 0, sizeof(hash_keys));
2063
2064 if (!flkeys) {
2065 skb_flow_dissect_flow_keys(skb, &keys, flag);
2066 flkeys = &keys;
2067 }
2068 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2069 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2070 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2071 hash_keys.ports.src = flkeys->ports.src;
2072 hash_keys.ports.dst = flkeys->ports.dst;
2073 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2074 } else {
2075 memset(&hash_keys, 0, sizeof(hash_keys));
2076 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2077 hash_keys.addrs.v6addrs.src = fl6->saddr;
2078 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2079 hash_keys.ports.src = fl6->fl6_sport;
2080 hash_keys.ports.dst = fl6->fl6_dport;
2081 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2082 }
2083 break;
23aebdac 2084 }
9a2a537a 2085 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2086
9a2a537a 2087 return mhash >> 1;
23aebdac
JS
2088}
2089
c71099ac
TG
2090void ip6_route_input(struct sk_buff *skb)
2091{
b71d1d42 2092 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2093 struct net *net = dev_net(skb->dev);
adaa70bb 2094 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2095 struct ip_tunnel_info *tun_info;
4c9483b2 2096 struct flowi6 fl6 = {
e0d56fdd 2097 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2098 .daddr = iph->daddr,
2099 .saddr = iph->saddr,
6502ca52 2100 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2101 .flowi6_mark = skb->mark,
2102 .flowi6_proto = iph->nexthdr,
c71099ac 2103 };
5e5d6fed 2104 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2105
904af04d 2106 tun_info = skb_tunnel_info(skb);
46fa062a 2107 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2108 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2109
2110 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2111 flkeys = &_flkeys;
2112
23aebdac 2113 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2114 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2115 skb_dst_drop(skb);
b75cc8f9
DA
2116 skb_dst_set(skb,
2117 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2118}
2119
b75cc8f9
DA
2120static struct rt6_info *ip6_pol_route_output(struct net *net,
2121 struct fib6_table *table,
2122 struct flowi6 *fl6,
2123 const struct sk_buff *skb,
2124 int flags)
1da177e4 2125{
b75cc8f9 2126 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2127}
2128
6f21c96a
PA
2129struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2130 struct flowi6 *fl6, int flags)
c71099ac 2131{
d46a9d67 2132 bool any_src;
c71099ac 2133
4c1feac5
DA
2134 if (rt6_need_strict(&fl6->daddr)) {
2135 struct dst_entry *dst;
2136
2137 dst = l3mdev_link_scope_lookup(net, fl6);
2138 if (dst)
2139 return dst;
2140 }
ca254490 2141
1fb9489b 2142 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2143
d46a9d67 2144 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2145 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2146 (fl6->flowi6_oif && any_src))
77d16f45 2147 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2148
d46a9d67 2149 if (!any_src)
adaa70bb 2150 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2151 else if (sk)
2152 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2153
b75cc8f9 2154 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2155}
6f21c96a 2156EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2157
2774c131 2158struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2159{
5c1e6aa3 2160 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2161 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2162 struct dst_entry *new = NULL;
2163
1dbe3252 2164 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2165 DST_OBSOLETE_DEAD, 0);
14e50e57 2166 if (rt) {
0a1f5962 2167 rt6_info_init(rt);
81eb8447 2168 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2169
0a1f5962 2170 new = &rt->dst;
14e50e57 2171 new->__use = 1;
352e512c 2172 new->input = dst_discard;
ede2059d 2173 new->output = dst_discard_out;
14e50e57 2174
0a1f5962 2175 dst_copy_metrics(new, &ort->dst);
14e50e57 2176
1dbe3252 2177 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2178 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2179 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2180 rt->rt6i_metric = 0;
2181
2182 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2183#ifdef CONFIG_IPV6_SUBTREES
2184 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2185#endif
14e50e57
DM
2186 }
2187
69ead7af
DM
2188 dst_release(dst_orig);
2189 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2190}
14e50e57 2191
1da177e4
LT
2192/*
2193 * Destination cache support functions
2194 */
2195
3da59bd9
MKL
2196static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
2197{
36143645 2198 u32 rt_cookie = 0;
c5cff856
WW
2199
2200 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
3da59bd9
MKL
2201 return NULL;
2202
2203 if (rt6_check_expired(rt))
2204 return NULL;
2205
2206 return &rt->dst;
2207}
2208
2209static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
2210{
5973fb1e
MKL
2211 if (!__rt6_check_expired(rt) &&
2212 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3a2232e9 2213 rt6_check(rt->from, cookie))
3da59bd9
MKL
2214 return &rt->dst;
2215 else
2216 return NULL;
2217}
2218
1da177e4
LT
2219static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2220{
2221 struct rt6_info *rt;
2222
2223 rt = (struct rt6_info *) dst;
2224
6f3118b5
ND
2225 /* All IPV6 dsts are created with ->obsolete set to the value
2226 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2227 * into this function always.
2228 */
e3bc10bd 2229
02bcf4e0 2230 if (rt->rt6i_flags & RTF_PCPU ||
3a2232e9 2231 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
3da59bd9
MKL
2232 return rt6_dst_from_check(rt, cookie);
2233 else
2234 return rt6_check(rt, cookie);
1da177e4
LT
2235}
2236
2237static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2238{
2239 struct rt6_info *rt = (struct rt6_info *) dst;
2240
2241 if (rt) {
54c1a859
YH
2242 if (rt->rt6i_flags & RTF_CACHE) {
2243 if (rt6_check_expired(rt)) {
afb1d4b5 2244 ip6_del_rt(dev_net(dst->dev), rt);
54c1a859
YH
2245 dst = NULL;
2246 }
2247 } else {
1da177e4 2248 dst_release(dst);
54c1a859
YH
2249 dst = NULL;
2250 }
1da177e4 2251 }
54c1a859 2252 return dst;
1da177e4
LT
2253}
2254
2255static void ip6_link_failure(struct sk_buff *skb)
2256{
2257 struct rt6_info *rt;
2258
3ffe533c 2259 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2260
adf30907 2261 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2262 if (rt) {
1eb4f758 2263 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0 2264 if (dst_hold_safe(&rt->dst))
afb1d4b5 2265 ip6_del_rt(dev_net(rt->dst.dev), rt);
c5cff856
WW
2266 } else {
2267 struct fib6_node *fn;
2268
2269 rcu_read_lock();
2270 fn = rcu_dereference(rt->rt6i_node);
2271 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2272 fn->fn_sernum = -1;
2273 rcu_read_unlock();
1eb4f758 2274 }
1da177e4
LT
2275 }
2276}
2277
45e4fd26
MKL
2278static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2279{
2280 struct net *net = dev_net(rt->dst.dev);
2281
d4ead6b3 2282 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2283 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2284 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2285}
2286
0d3f6d29
MKL
2287static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2288{
2289 return !(rt->rt6i_flags & RTF_CACHE) &&
4e587ea7
WW
2290 (rt->rt6i_flags & RTF_PCPU ||
2291 rcu_access_pointer(rt->rt6i_node));
0d3f6d29
MKL
2292}
2293
45e4fd26
MKL
2294static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2295 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2296{
0dec879f 2297 const struct in6_addr *daddr, *saddr;
67ba4152 2298 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2299
45e4fd26
MKL
2300 if (rt6->rt6i_flags & RTF_LOCAL)
2301 return;
81aded24 2302
19bda36c
XL
2303 if (dst_metric_locked(dst, RTAX_MTU))
2304 return;
2305
0dec879f
JA
2306 if (iph) {
2307 daddr = &iph->daddr;
2308 saddr = &iph->saddr;
2309 } else if (sk) {
2310 daddr = &sk->sk_v6_daddr;
2311 saddr = &inet6_sk(sk)->saddr;
2312 } else {
2313 daddr = NULL;
2314 saddr = NULL;
2315 }
2316 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2317 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2318 if (mtu >= dst_mtu(dst))
2319 return;
9d289715 2320
0d3f6d29 2321 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2322 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2323 /* update rt6_ex->stamp for cache */
2324 if (rt6->rt6i_flags & RTF_CACHE)
2325 rt6_update_exception_stamp_rt(rt6);
0dec879f 2326 } else if (daddr) {
45e4fd26
MKL
2327 struct rt6_info *nrt6;
2328
d4ead6b3 2329 nrt6 = ip6_rt_cache_alloc(rt6->from, daddr, saddr);
45e4fd26
MKL
2330 if (nrt6) {
2331 rt6_do_update_pmtu(nrt6, mtu);
d4ead6b3 2332 if (rt6_insert_exception(nrt6, rt6->from))
2b760fcf 2333 dst_release_immediate(&nrt6->dst);
45e4fd26 2334 }
1da177e4
LT
2335 }
2336}
2337
45e4fd26
MKL
2338static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2339 struct sk_buff *skb, u32 mtu)
2340{
2341 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2342}
2343
42ae66c8 2344void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2345 int oif, u32 mark, kuid_t uid)
81aded24
DM
2346{
2347 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2348 struct dst_entry *dst;
2349 struct flowi6 fl6;
2350
2351 memset(&fl6, 0, sizeof(fl6));
2352 fl6.flowi6_oif = oif;
1b3c61dc 2353 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2354 fl6.daddr = iph->daddr;
2355 fl6.saddr = iph->saddr;
6502ca52 2356 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2357 fl6.flowi6_uid = uid;
81aded24
DM
2358
2359 dst = ip6_route_output(net, NULL, &fl6);
2360 if (!dst->error)
45e4fd26 2361 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2362 dst_release(dst);
2363}
2364EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2365
2366void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2367{
33c162a9
MKL
2368 struct dst_entry *dst;
2369
81aded24 2370 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2371 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2372
2373 dst = __sk_dst_get(sk);
2374 if (!dst || !dst->obsolete ||
2375 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2376 return;
2377
2378 bh_lock_sock(sk);
2379 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2380 ip6_datagram_dst_update(sk, false);
2381 bh_unlock_sock(sk);
81aded24
DM
2382}
2383EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2384
7d6850f7
AK
2385void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2386 const struct flowi6 *fl6)
2387{
2388#ifdef CONFIG_IPV6_SUBTREES
2389 struct ipv6_pinfo *np = inet6_sk(sk);
2390#endif
2391
2392 ip6_dst_store(sk, dst,
2393 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2394 &sk->sk_v6_daddr : NULL,
2395#ifdef CONFIG_IPV6_SUBTREES
2396 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2397 &np->saddr :
2398#endif
2399 NULL);
2400}
2401
b55b76b2
DJ
2402/* Handle redirects */
2403struct ip6rd_flowi {
2404 struct flowi6 fl6;
2405 struct in6_addr gateway;
2406};
2407
2408static struct rt6_info *__ip6_route_redirect(struct net *net,
2409 struct fib6_table *table,
2410 struct flowi6 *fl6,
b75cc8f9 2411 const struct sk_buff *skb,
b55b76b2
DJ
2412 int flags)
2413{
2414 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
23fb93a4
DA
2415 struct rt6_info *ret = NULL, *rt_cache;
2416 struct rt6_info *rt;
b55b76b2
DJ
2417 struct fib6_node *fn;
2418
2419 /* Get the "current" route for this destination and
67c408cf 2420 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2421 *
2422 * RFC 4861 specifies that redirects should only be
2423 * accepted if they come from the nexthop to the target.
2424 * Due to the way the routes are chosen, this notion
2425 * is a bit fuzzy and one might need to check all possible
2426 * routes.
2427 */
2428
66f5d6ce 2429 rcu_read_lock();
b55b76b2
DJ
2430 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2431restart:
66f5d6ce 2432 for_each_fib6_node_rt_rcu(fn) {
5e670d84 2433 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c 2434 continue;
14895687 2435 if (fib6_check_expired(rt))
b55b76b2 2436 continue;
6edb3c96 2437 if (rt->rt6i_flags & RTF_REJECT)
b55b76b2
DJ
2438 break;
2439 if (!(rt->rt6i_flags & RTF_GATEWAY))
2440 continue;
5e670d84 2441 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
b55b76b2 2442 continue;
2b760fcf
WW
2443 /* rt_cache's gateway might be different from its 'parent'
2444 * in the case of an ip redirect.
2445 * So we keep searching in the exception table if the gateway
2446 * is different.
2447 */
5e670d84 2448 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2b760fcf
WW
2449 rt_cache = rt6_find_cached_rt(rt,
2450 &fl6->daddr,
2451 &fl6->saddr);
2452 if (rt_cache &&
2453 ipv6_addr_equal(&rdfl->gateway,
2454 &rt_cache->rt6i_gateway)) {
23fb93a4 2455 ret = rt_cache;
2b760fcf
WW
2456 break;
2457 }
b55b76b2 2458 continue;
2b760fcf 2459 }
b55b76b2
DJ
2460 break;
2461 }
2462
2463 if (!rt)
421842ed 2464 rt = net->ipv6.fib6_null_entry;
6edb3c96 2465 else if (rt->rt6i_flags & RTF_REJECT) {
23fb93a4 2466 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2467 goto out;
2468 }
2469
421842ed 2470 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2471 fn = fib6_backtrack(fn, &fl6->saddr);
2472 if (fn)
2473 goto restart;
b55b76b2 2474 }
a3c00e46 2475
b0a1ba59 2476out:
23fb93a4
DA
2477 if (ret)
2478 dst_hold(&ret->dst);
2479 else
2480 ret = ip6_create_rt_rcu(rt);
b55b76b2 2481
66f5d6ce 2482 rcu_read_unlock();
b55b76b2 2483
23fb93a4
DA
2484 trace_fib6_table_lookup(net, ret, table, fl6);
2485 return ret;
b55b76b2
DJ
2486};
2487
2488static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2489 const struct flowi6 *fl6,
2490 const struct sk_buff *skb,
2491 const struct in6_addr *gateway)
b55b76b2
DJ
2492{
2493 int flags = RT6_LOOKUP_F_HAS_SADDR;
2494 struct ip6rd_flowi rdfl;
2495
2496 rdfl.fl6 = *fl6;
2497 rdfl.gateway = *gateway;
2498
b75cc8f9 2499 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2500 flags, __ip6_route_redirect);
2501}
2502
e2d118a1
LC
2503void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2504 kuid_t uid)
3a5ad2ee
DM
2505{
2506 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2507 struct dst_entry *dst;
2508 struct flowi6 fl6;
2509
2510 memset(&fl6, 0, sizeof(fl6));
e374c618 2511 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2512 fl6.flowi6_oif = oif;
2513 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2514 fl6.daddr = iph->daddr;
2515 fl6.saddr = iph->saddr;
6502ca52 2516 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2517 fl6.flowi6_uid = uid;
3a5ad2ee 2518
b75cc8f9 2519 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2520 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2521 dst_release(dst);
2522}
2523EXPORT_SYMBOL_GPL(ip6_redirect);
2524
c92a59ec
DJ
2525void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2526 u32 mark)
2527{
2528 const struct ipv6hdr *iph = ipv6_hdr(skb);
2529 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2530 struct dst_entry *dst;
2531 struct flowi6 fl6;
2532
2533 memset(&fl6, 0, sizeof(fl6));
e374c618 2534 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2535 fl6.flowi6_oif = oif;
2536 fl6.flowi6_mark = mark;
c92a59ec
DJ
2537 fl6.daddr = msg->dest;
2538 fl6.saddr = iph->daddr;
e2d118a1 2539 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2540
b75cc8f9 2541 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2542 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2543 dst_release(dst);
2544}
2545
3a5ad2ee
DM
2546void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2547{
e2d118a1
LC
2548 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2549 sk->sk_uid);
3a5ad2ee
DM
2550}
2551EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2552
0dbaee3b 2553static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2554{
0dbaee3b
DM
2555 struct net_device *dev = dst->dev;
2556 unsigned int mtu = dst_mtu(dst);
2557 struct net *net = dev_net(dev);
2558
1da177e4
LT
2559 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2560
5578689a
DL
2561 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2562 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2563
2564 /*
1ab1457c
YH
2565 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2566 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2567 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2568 * rely only on pmtu discovery"
2569 */
2570 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2571 mtu = IPV6_MAXPLEN;
2572 return mtu;
2573}
2574
ebb762f2 2575static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2576{
d33e4553 2577 struct inet6_dev *idev;
d4ead6b3 2578 unsigned int mtu;
4b32b5ad
MKL
2579
2580 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2581 if (mtu)
30f78d8e 2582 goto out;
618f9bc7
SK
2583
2584 mtu = IPV6_MIN_MTU;
d33e4553
DM
2585
2586 rcu_read_lock();
2587 idev = __in6_dev_get(dst->dev);
2588 if (idev)
2589 mtu = idev->cnf.mtu6;
2590 rcu_read_unlock();
2591
30f78d8e 2592out:
14972cbd
RP
2593 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2594
2595 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2596}
2597
3b00944c 2598struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2599 struct flowi6 *fl6)
1da177e4 2600{
87a11578 2601 struct dst_entry *dst;
1da177e4
LT
2602 struct rt6_info *rt;
2603 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2604 struct net *net = dev_net(dev);
1da177e4 2605
38308473 2606 if (unlikely(!idev))
122bdf67 2607 return ERR_PTR(-ENODEV);
1da177e4 2608
ad706862 2609 rt = ip6_dst_alloc(net, dev, 0);
38308473 2610 if (unlikely(!rt)) {
1da177e4 2611 in6_dev_put(idev);
87a11578 2612 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2613 goto out;
2614 }
2615
8e2ec639 2616 rt->dst.flags |= DST_HOST;
588753f1 2617 rt->dst.input = ip6_input;
8e2ec639 2618 rt->dst.output = ip6_output;
550bab42 2619 rt->rt6i_gateway = fl6->daddr;
87a11578 2620 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2621 rt->rt6i_dst.plen = 128;
2622 rt->rt6i_idev = idev;
14edd87d 2623 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2624
4c981e28 2625 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2626 * do proper release of the net_device
2627 */
2628 rt6_uncached_list_add(rt);
81eb8447 2629 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2630
87a11578
DM
2631 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2632
1da177e4 2633out:
87a11578 2634 return dst;
1da177e4
LT
2635}
2636
569d3645 2637static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2638{
86393e52 2639 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2640 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2641 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2642 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2643 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2644 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2645 int entries;
7019b78e 2646
fc66f95c 2647 entries = dst_entries_get_fast(ops);
49a18d86 2648 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2649 entries <= rt_max_size)
1da177e4
LT
2650 goto out;
2651
6891a346 2652 net->ipv6.ip6_rt_gc_expire++;
14956643 2653 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2654 entries = dst_entries_get_slow(ops);
2655 if (entries < ops->gc_thresh)
7019b78e 2656 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2657out:
7019b78e 2658 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2659 return entries > rt_max_size;
1da177e4
LT
2660}
2661
d4ead6b3
DA
2662static int ip6_convert_metrics(struct net *net, struct rt6_info *rt,
2663 struct fib6_config *cfg)
e715b6d3 2664{
d4ead6b3 2665 int err = 0;
e715b6d3 2666
d4ead6b3
DA
2667 if (cfg->fc_mx) {
2668 rt->fib6_metrics = kzalloc(sizeof(*rt->fib6_metrics),
2669 GFP_KERNEL);
2670 if (unlikely(!rt->fib6_metrics))
2671 return -ENOMEM;
ea697639 2672
d4ead6b3 2673 refcount_set(&rt->fib6_metrics->refcnt, 1);
e715b6d3 2674
d4ead6b3
DA
2675 err = ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len,
2676 rt->fib6_metrics->metrics);
c3a8d947 2677 }
e715b6d3 2678
d4ead6b3 2679 return err;
e715b6d3 2680}
1da177e4 2681
8c14586f
DA
2682static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2683 struct fib6_config *cfg,
f4797b33
DA
2684 const struct in6_addr *gw_addr,
2685 u32 tbid, int flags)
8c14586f
DA
2686{
2687 struct flowi6 fl6 = {
2688 .flowi6_oif = cfg->fc_ifindex,
2689 .daddr = *gw_addr,
2690 .saddr = cfg->fc_prefsrc,
2691 };
2692 struct fib6_table *table;
2693 struct rt6_info *rt;
8c14586f 2694
f4797b33 2695 table = fib6_get_table(net, tbid);
8c14586f
DA
2696 if (!table)
2697 return NULL;
2698
2699 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2700 flags |= RT6_LOOKUP_F_HAS_SADDR;
2701
f4797b33 2702 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2703 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2704
2705 /* if table lookup failed, fall back to full lookup */
2706 if (rt == net->ipv6.ip6_null_entry) {
2707 ip6_rt_put(rt);
2708 rt = NULL;
2709 }
2710
2711 return rt;
2712}
2713
fc1e64e1
DA
2714static int ip6_route_check_nh_onlink(struct net *net,
2715 struct fib6_config *cfg,
9fbb704c 2716 const struct net_device *dev,
fc1e64e1
DA
2717 struct netlink_ext_ack *extack)
2718{
44750f84 2719 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2720 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2721 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2722 struct rt6_info *grt;
2723 int err;
2724
2725 err = 0;
2726 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2727 if (grt) {
58e354c0
DA
2728 if (!grt->dst.error &&
2729 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2730 NL_SET_ERR_MSG(extack,
2731 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2732 err = -EINVAL;
2733 }
2734
2735 ip6_rt_put(grt);
2736 }
2737
2738 return err;
2739}
2740
1edce99f
DA
2741static int ip6_route_check_nh(struct net *net,
2742 struct fib6_config *cfg,
2743 struct net_device **_dev,
2744 struct inet6_dev **idev)
2745{
2746 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2747 struct net_device *dev = _dev ? *_dev : NULL;
2748 struct rt6_info *grt = NULL;
2749 int err = -EHOSTUNREACH;
2750
2751 if (cfg->fc_table) {
f4797b33
DA
2752 int flags = RT6_LOOKUP_F_IFACE;
2753
2754 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2755 cfg->fc_table, flags);
1edce99f
DA
2756 if (grt) {
2757 if (grt->rt6i_flags & RTF_GATEWAY ||
2758 (dev && dev != grt->dst.dev)) {
2759 ip6_rt_put(grt);
2760 grt = NULL;
2761 }
2762 }
2763 }
2764
2765 if (!grt)
b75cc8f9 2766 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2767
2768 if (!grt)
2769 goto out;
2770
2771 if (dev) {
2772 if (dev != grt->dst.dev) {
2773 ip6_rt_put(grt);
2774 goto out;
2775 }
2776 } else {
2777 *_dev = dev = grt->dst.dev;
2778 *idev = grt->rt6i_idev;
2779 dev_hold(dev);
2780 in6_dev_hold(grt->rt6i_idev);
2781 }
2782
2783 if (!(grt->rt6i_flags & RTF_GATEWAY))
2784 err = 0;
2785
2786 ip6_rt_put(grt);
2787
2788out:
2789 return err;
2790}
2791
9fbb704c
DA
2792static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2793 struct net_device **_dev, struct inet6_dev **idev,
2794 struct netlink_ext_ack *extack)
2795{
2796 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2797 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2798 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2799 const struct net_device *dev = *_dev;
232378e8 2800 bool need_addr_check = !dev;
9fbb704c
DA
2801 int err = -EINVAL;
2802
2803 /* if gw_addr is local we will fail to detect this in case
2804 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2805 * will return already-added prefix route via interface that
2806 * prefix route was assigned to, which might be non-loopback.
2807 */
232378e8
DA
2808 if (dev &&
2809 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2810 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2811 goto out;
2812 }
2813
2814 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2815 /* IPv6 strictly inhibits using not link-local
2816 * addresses as nexthop address.
2817 * Otherwise, router will not able to send redirects.
2818 * It is very good, but in some (rare!) circumstances
2819 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2820 * some exceptions. --ANK
2821 * We allow IPv4-mapped nexthops to support RFC4798-type
2822 * addressing
2823 */
2824 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2825 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2826 goto out;
2827 }
2828
2829 if (cfg->fc_flags & RTNH_F_ONLINK)
2830 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2831 else
2832 err = ip6_route_check_nh(net, cfg, _dev, idev);
2833
2834 if (err)
2835 goto out;
2836 }
2837
2838 /* reload in case device was changed */
2839 dev = *_dev;
2840
2841 err = -EINVAL;
2842 if (!dev) {
2843 NL_SET_ERR_MSG(extack, "Egress device not specified");
2844 goto out;
2845 } else if (dev->flags & IFF_LOOPBACK) {
2846 NL_SET_ERR_MSG(extack,
2847 "Egress device can not be loopback device for this route");
2848 goto out;
2849 }
232378e8
DA
2850
2851 /* if we did not check gw_addr above, do so now that the
2852 * egress device has been resolved.
2853 */
2854 if (need_addr_check &&
2855 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2856 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2857 goto out;
2858 }
2859
9fbb704c
DA
2860 err = 0;
2861out:
2862 return err;
2863}
2864
333c4301 2865static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 2866 gfp_t gfp_flags,
333c4301 2867 struct netlink_ext_ack *extack)
1da177e4 2868{
5578689a 2869 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
2870 struct rt6_info *rt = NULL;
2871 struct net_device *dev = NULL;
2872 struct inet6_dev *idev = NULL;
c71099ac 2873 struct fib6_table *table;
1da177e4 2874 int addr_type;
8c5b83f0 2875 int err = -EINVAL;
1da177e4 2876
557c44be 2877 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2878 if (cfg->fc_flags & RTF_PCPU) {
2879 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2880 goto out;
d5d531cb 2881 }
557c44be 2882
2ea2352e
WW
2883 /* RTF_CACHE is an internal flag; can not be set by userspace */
2884 if (cfg->fc_flags & RTF_CACHE) {
2885 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2886 goto out;
2887 }
2888
e8478e80
DA
2889 if (cfg->fc_type > RTN_MAX) {
2890 NL_SET_ERR_MSG(extack, "Invalid route type");
2891 goto out;
2892 }
2893
d5d531cb
DA
2894 if (cfg->fc_dst_len > 128) {
2895 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2896 goto out;
2897 }
2898 if (cfg->fc_src_len > 128) {
2899 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2900 goto out;
d5d531cb 2901 }
1da177e4 2902#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2903 if (cfg->fc_src_len) {
2904 NL_SET_ERR_MSG(extack,
2905 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2906 goto out;
d5d531cb 2907 }
1da177e4 2908#endif
86872cb5 2909 if (cfg->fc_ifindex) {
1da177e4 2910 err = -ENODEV;
5578689a 2911 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2912 if (!dev)
2913 goto out;
2914 idev = in6_dev_get(dev);
2915 if (!idev)
2916 goto out;
2917 }
2918
86872cb5
TG
2919 if (cfg->fc_metric == 0)
2920 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2921
fc1e64e1
DA
2922 if (cfg->fc_flags & RTNH_F_ONLINK) {
2923 if (!dev) {
2924 NL_SET_ERR_MSG(extack,
2925 "Nexthop device required for onlink");
2926 err = -ENODEV;
2927 goto out;
2928 }
2929
2930 if (!(dev->flags & IFF_UP)) {
2931 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2932 err = -ENETDOWN;
2933 goto out;
2934 }
2935 }
2936
d71314b4 2937 err = -ENOBUFS;
38308473
DM
2938 if (cfg->fc_nlinfo.nlh &&
2939 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2940 table = fib6_get_table(net, cfg->fc_table);
38308473 2941 if (!table) {
f3213831 2942 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2943 table = fib6_new_table(net, cfg->fc_table);
2944 }
2945 } else {
2946 table = fib6_new_table(net, cfg->fc_table);
2947 }
38308473
DM
2948
2949 if (!table)
c71099ac 2950 goto out;
c71099ac 2951
ad706862
MKL
2952 rt = ip6_dst_alloc(net, NULL,
2953 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 2954
38308473 2955 if (!rt) {
1da177e4
LT
2956 err = -ENOMEM;
2957 goto out;
2958 }
2959
d4ead6b3
DA
2960 err = ip6_convert_metrics(net, rt, cfg);
2961 if (err < 0)
2962 goto out;
2963
1716a961 2964 if (cfg->fc_flags & RTF_EXPIRES)
14895687 2965 fib6_set_expires(rt, jiffies +
1716a961
G
2966 clock_t_to_jiffies(cfg->fc_expires));
2967 else
14895687 2968 fib6_clean_expires(rt);
1da177e4 2969
86872cb5
TG
2970 if (cfg->fc_protocol == RTPROT_UNSPEC)
2971 cfg->fc_protocol = RTPROT_BOOT;
2972 rt->rt6i_protocol = cfg->fc_protocol;
2973
2974 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4 2975
19e42e45
RP
2976 if (cfg->fc_encap) {
2977 struct lwtunnel_state *lwtstate;
2978
30357d7d 2979 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2980 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2981 &lwtstate, extack);
19e42e45
RP
2982 if (err)
2983 goto out;
5e670d84 2984 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
19e42e45
RP
2985 }
2986
86872cb5
TG
2987 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2988 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 2989 if (rt->rt6i_dst.plen == 128)
3b6761d1 2990 rt->dst_host = true;
e5fd387a 2991
1da177e4 2992#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
2993 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2994 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
2995#endif
2996
86872cb5 2997 rt->rt6i_metric = cfg->fc_metric;
5e670d84 2998 rt->fib6_nh.nh_weight = 1;
1da177e4 2999
e8478e80
DA
3000 rt->fib6_type = cfg->fc_type;
3001
1da177e4
LT
3002 /* We cannot add true routes via loopback here,
3003 they would result in kernel looping; promote them to reject routes
3004 */
86872cb5 3005 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
3006 (dev && (dev->flags & IFF_LOOPBACK) &&
3007 !(addr_type & IPV6_ADDR_LOOPBACK) &&
3008 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 3009 /* hold loopback dev/idev if we haven't done so. */
5578689a 3010 if (dev != net->loopback_dev) {
1da177e4
LT
3011 if (dev) {
3012 dev_put(dev);
3013 in6_dev_put(idev);
3014 }
5578689a 3015 dev = net->loopback_dev;
1da177e4
LT
3016 dev_hold(dev);
3017 idev = in6_dev_get(dev);
3018 if (!idev) {
3019 err = -ENODEV;
3020 goto out;
3021 }
3022 }
1da177e4
LT
3023 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
3024 goto install_route;
3025 }
3026
86872cb5 3027 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
3028 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3029 if (err)
48ed7b26 3030 goto out;
1da177e4 3031
5e670d84 3032 rt->fib6_nh.nh_gw = rt->rt6i_gateway = cfg->fc_gateway;
1da177e4
LT
3033 }
3034
3035 err = -ENODEV;
38308473 3036 if (!dev)
1da177e4
LT
3037 goto out;
3038
428604fb
LB
3039 if (idev->cnf.disable_ipv6) {
3040 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3041 err = -EACCES;
3042 goto out;
3043 }
3044
955ec4cb
DA
3045 if (!(dev->flags & IFF_UP)) {
3046 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3047 err = -ENETDOWN;
3048 goto out;
3049 }
3050
c3968a85
DW
3051 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3052 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3053 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3054 err = -EINVAL;
3055 goto out;
3056 }
4e3fd7a0 3057 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
3058 rt->rt6i_prefsrc.plen = 128;
3059 } else
3060 rt->rt6i_prefsrc.plen = 0;
3061
86872cb5 3062 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
3063
3064install_route:
5609b80a
IS
3065 if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3066 !netif_carrier_ok(dev))
5e670d84
DA
3067 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3068 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
3069 rt->fib6_nh.nh_dev = rt->dst.dev = dev;
1da177e4 3070 rt->rt6i_idev = idev;
c71099ac 3071 rt->rt6i_table = table;
63152fc0 3072
c346dca1 3073 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 3074
8c5b83f0 3075 return rt;
6b9ea5a6
RP
3076out:
3077 if (dev)
3078 dev_put(dev);
3079 if (idev)
3080 in6_dev_put(idev);
587fea74
WW
3081 if (rt)
3082 dst_release_immediate(&rt->dst);
6b9ea5a6 3083
8c5b83f0 3084 return ERR_PTR(err);
6b9ea5a6
RP
3085}
3086
acb54e3c
DA
3087int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3088 struct netlink_ext_ack *extack)
6b9ea5a6 3089{
8c5b83f0 3090 struct rt6_info *rt;
6b9ea5a6
RP
3091 int err;
3092
acb54e3c 3093 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3094 if (IS_ERR(rt))
3095 return PTR_ERR(rt);
6b9ea5a6 3096
d4ead6b3 3097 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
6b9ea5a6 3098
1da177e4
LT
3099 return err;
3100}
3101
86872cb5 3102static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4 3103{
afb1d4b5 3104 struct net *net = info->nl_net;
c71099ac 3105 struct fib6_table *table;
afb1d4b5 3106 int err;
1da177e4 3107
421842ed 3108 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3109 err = -ENOENT;
3110 goto out;
3111 }
6c813a72 3112
c71099ac 3113 table = rt->rt6i_table;
66f5d6ce 3114 spin_lock_bh(&table->tb6_lock);
86872cb5 3115 err = fib6_del(rt, info);
66f5d6ce 3116 spin_unlock_bh(&table->tb6_lock);
1da177e4 3117
6825a26c 3118out:
94e187c0 3119 ip6_rt_put(rt);
1da177e4
LT
3120 return err;
3121}
3122
afb1d4b5 3123int ip6_del_rt(struct net *net, struct rt6_info *rt)
e0a1ad73 3124{
afb1d4b5
DA
3125 struct nl_info info = { .nl_net = net };
3126
528c4ceb 3127 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3128}
3129
0ae81335
DA
3130static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
3131{
3132 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3133 struct net *net = info->nl_net;
16a16cd3 3134 struct sk_buff *skb = NULL;
0ae81335 3135 struct fib6_table *table;
e3330039 3136 int err = -ENOENT;
0ae81335 3137
421842ed 3138 if (rt == net->ipv6.fib6_null_entry)
e3330039 3139 goto out_put;
0ae81335 3140 table = rt->rt6i_table;
66f5d6ce 3141 spin_lock_bh(&table->tb6_lock);
0ae81335
DA
3142
3143 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
3144 struct rt6_info *sibling, *next_sibling;
3145
16a16cd3
DA
3146 /* prefer to send a single notification with all hops */
3147 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3148 if (skb) {
3149 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3150
d4ead6b3 3151 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3152 NULL, NULL, 0, RTM_DELROUTE,
3153 info->portid, seq, 0) < 0) {
3154 kfree_skb(skb);
3155 skb = NULL;
3156 } else
3157 info->skip_notify = 1;
3158 }
3159
0ae81335
DA
3160 list_for_each_entry_safe(sibling, next_sibling,
3161 &rt->rt6i_siblings,
3162 rt6i_siblings) {
3163 err = fib6_del(sibling, info);
3164 if (err)
e3330039 3165 goto out_unlock;
0ae81335
DA
3166 }
3167 }
3168
3169 err = fib6_del(rt, info);
e3330039 3170out_unlock:
66f5d6ce 3171 spin_unlock_bh(&table->tb6_lock);
e3330039 3172out_put:
0ae81335 3173 ip6_rt_put(rt);
16a16cd3
DA
3174
3175 if (skb) {
e3330039 3176 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3177 info->nlh, gfp_any());
3178 }
0ae81335
DA
3179 return err;
3180}
3181
23fb93a4
DA
3182static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3183{
3184 int rc = -ESRCH;
3185
3186 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3187 goto out;
3188
3189 if (cfg->fc_flags & RTF_GATEWAY &&
3190 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3191 goto out;
3192 if (dst_hold_safe(&rt->dst))
3193 rc = rt6_remove_exception_rt(rt);
3194out:
3195 return rc;
3196}
3197
333c4301
DA
3198static int ip6_route_del(struct fib6_config *cfg,
3199 struct netlink_ext_ack *extack)
1da177e4 3200{
2b760fcf 3201 struct rt6_info *rt, *rt_cache;
c71099ac 3202 struct fib6_table *table;
1da177e4 3203 struct fib6_node *fn;
1da177e4
LT
3204 int err = -ESRCH;
3205
5578689a 3206 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3207 if (!table) {
3208 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3209 return err;
d5d531cb 3210 }
c71099ac 3211
66f5d6ce 3212 rcu_read_lock();
1da177e4 3213
c71099ac 3214 fn = fib6_locate(&table->tb6_root,
86872cb5 3215 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3216 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3217 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3218
1da177e4 3219 if (fn) {
66f5d6ce 3220 for_each_fib6_node_rt_rcu(fn) {
2b760fcf 3221 if (cfg->fc_flags & RTF_CACHE) {
23fb93a4
DA
3222 int rc;
3223
2b760fcf
WW
3224 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3225 &cfg->fc_src);
23fb93a4
DA
3226 if (rt_cache) {
3227 rc = ip6_del_cached_rt(rt_cache, cfg);
3228 if (rc != -ESRCH)
3229 return rc;
3230 }
3231 continue;
2b760fcf 3232 }
86872cb5 3233 if (cfg->fc_ifindex &&
5e670d84
DA
3234 (!rt->fib6_nh.nh_dev ||
3235 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3236 continue;
86872cb5 3237 if (cfg->fc_flags & RTF_GATEWAY &&
5e670d84 3238 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
1da177e4 3239 continue;
86872cb5 3240 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 3241 continue;
c2ed1880
M
3242 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
3243 continue;
d3843fe5
WW
3244 if (!dst_hold_safe(&rt->dst))
3245 break;
66f5d6ce 3246 rcu_read_unlock();
1da177e4 3247
0ae81335
DA
3248 /* if gateway was specified only delete the one hop */
3249 if (cfg->fc_flags & RTF_GATEWAY)
3250 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3251
3252 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3253 }
3254 }
66f5d6ce 3255 rcu_read_unlock();
1da177e4
LT
3256
3257 return err;
3258}
3259
6700c270 3260static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3261{
a6279458 3262 struct netevent_redirect netevent;
e8599ff4 3263 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3264 struct ndisc_options ndopts;
3265 struct inet6_dev *in6_dev;
3266 struct neighbour *neigh;
71bcdba0 3267 struct rd_msg *msg;
6e157b6a
DM
3268 int optlen, on_link;
3269 u8 *lladdr;
e8599ff4 3270
29a3cad5 3271 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3272 optlen -= sizeof(*msg);
e8599ff4
DM
3273
3274 if (optlen < 0) {
6e157b6a 3275 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3276 return;
3277 }
3278
71bcdba0 3279 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3280
71bcdba0 3281 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3282 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3283 return;
3284 }
3285
6e157b6a 3286 on_link = 0;
71bcdba0 3287 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3288 on_link = 1;
71bcdba0 3289 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3290 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3291 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3292 return;
3293 }
3294
3295 in6_dev = __in6_dev_get(skb->dev);
3296 if (!in6_dev)
3297 return;
3298 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3299 return;
3300
3301 /* RFC2461 8.1:
3302 * The IP source address of the Redirect MUST be the same as the current
3303 * first-hop router for the specified ICMP Destination Address.
3304 */
3305
f997c55c 3306 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3307 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3308 return;
3309 }
6e157b6a
DM
3310
3311 lladdr = NULL;
e8599ff4
DM
3312 if (ndopts.nd_opts_tgt_lladdr) {
3313 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3314 skb->dev);
3315 if (!lladdr) {
3316 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3317 return;
3318 }
3319 }
3320
6e157b6a 3321 rt = (struct rt6_info *) dst;
ec13ad1d 3322 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3323 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3324 return;
6e157b6a 3325 }
e8599ff4 3326
6e157b6a
DM
3327 /* Redirect received -> path was valid.
3328 * Look, redirects are sent only in response to data packets,
3329 * so that this nexthop apparently is reachable. --ANK
3330 */
0dec879f 3331 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3332
71bcdba0 3333 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3334 if (!neigh)
3335 return;
a6279458 3336
1da177e4
LT
3337 /*
3338 * We have finally decided to accept it.
3339 */
3340
f997c55c 3341 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3342 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3343 NEIGH_UPDATE_F_OVERRIDE|
3344 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3345 NEIGH_UPDATE_F_ISROUTER)),
3346 NDISC_REDIRECT, &ndopts);
1da177e4 3347
23fb93a4 3348 nrt = ip6_rt_cache_alloc(rt->from, &msg->dest, NULL);
38308473 3349 if (!nrt)
1da177e4
LT
3350 goto out;
3351
3352 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3353 if (on_link)
3354 nrt->rt6i_flags &= ~RTF_GATEWAY;
3355
b91d5329 3356 nrt->rt6i_protocol = RTPROT_REDIRECT;
4e3fd7a0 3357 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3358
2b760fcf
WW
3359 /* No need to remove rt from the exception table if rt is
3360 * a cached route because rt6_insert_exception() will
3361 * takes care of it
3362 */
d4ead6b3 3363 if (rt6_insert_exception(nrt, rt->from)) {
2b760fcf
WW
3364 dst_release_immediate(&nrt->dst);
3365 goto out;
3366 }
1da177e4 3367
d8d1f30b
CG
3368 netevent.old = &rt->dst;
3369 netevent.new = &nrt->dst;
71bcdba0 3370 netevent.daddr = &msg->dest;
60592833 3371 netevent.neigh = neigh;
8d71740c
TT
3372 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3373
1da177e4 3374out:
e8599ff4 3375 neigh_release(neigh);
6e157b6a
DM
3376}
3377
70ceb4f5 3378#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 3379static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 3380 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3381 const struct in6_addr *gwaddr,
3382 struct net_device *dev)
70ceb4f5 3383{
830218c1
DA
3384 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3385 int ifindex = dev->ifindex;
70ceb4f5
YH
3386 struct fib6_node *fn;
3387 struct rt6_info *rt = NULL;
c71099ac
TG
3388 struct fib6_table *table;
3389
830218c1 3390 table = fib6_get_table(net, tb_id);
38308473 3391 if (!table)
c71099ac 3392 return NULL;
70ceb4f5 3393
66f5d6ce 3394 rcu_read_lock();
38fbeeee 3395 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3396 if (!fn)
3397 goto out;
3398
66f5d6ce 3399 for_each_fib6_node_rt_rcu(fn) {
5e670d84 3400 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
70ceb4f5
YH
3401 continue;
3402 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3403 continue;
5e670d84 3404 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
70ceb4f5 3405 continue;
d3843fe5 3406 ip6_hold_safe(NULL, &rt, false);
70ceb4f5
YH
3407 break;
3408 }
3409out:
66f5d6ce 3410 rcu_read_unlock();
70ceb4f5
YH
3411 return rt;
3412}
3413
efa2cea0 3414static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 3415 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3416 const struct in6_addr *gwaddr,
3417 struct net_device *dev,
95c96174 3418 unsigned int pref)
70ceb4f5 3419{
86872cb5 3420 struct fib6_config cfg = {
238fc7ea 3421 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3422 .fc_ifindex = dev->ifindex,
86872cb5
TG
3423 .fc_dst_len = prefixlen,
3424 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3425 RTF_UP | RTF_PREF(pref),
b91d5329 3426 .fc_protocol = RTPROT_RA,
e8478e80 3427 .fc_type = RTN_UNICAST,
15e47304 3428 .fc_nlinfo.portid = 0,
efa2cea0
DL
3429 .fc_nlinfo.nlh = NULL,
3430 .fc_nlinfo.nl_net = net,
86872cb5
TG
3431 };
3432
830218c1 3433 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3434 cfg.fc_dst = *prefix;
3435 cfg.fc_gateway = *gwaddr;
70ceb4f5 3436
e317da96
YH
3437 /* We should treat it as a default route if prefix length is 0. */
3438 if (!prefixlen)
86872cb5 3439 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3440
acb54e3c 3441 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3442
830218c1 3443 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3444}
3445#endif
3446
afb1d4b5
DA
3447struct rt6_info *rt6_get_dflt_router(struct net *net,
3448 const struct in6_addr *addr,
3449 struct net_device *dev)
1ab1457c 3450{
830218c1 3451 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 3452 struct rt6_info *rt;
c71099ac 3453 struct fib6_table *table;
1da177e4 3454
afb1d4b5 3455 table = fib6_get_table(net, tb_id);
38308473 3456 if (!table)
c71099ac 3457 return NULL;
1da177e4 3458
66f5d6ce
WW
3459 rcu_read_lock();
3460 for_each_fib6_node_rt_rcu(&table->tb6_root) {
5e670d84 3461 if (dev == rt->fib6_nh.nh_dev &&
045927ff 3462 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
5e670d84 3463 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
1da177e4
LT
3464 break;
3465 }
3466 if (rt)
d3843fe5 3467 ip6_hold_safe(NULL, &rt, false);
66f5d6ce 3468 rcu_read_unlock();
1da177e4
LT
3469 return rt;
3470}
3471
afb1d4b5
DA
3472struct rt6_info *rt6_add_dflt_router(struct net *net,
3473 const struct in6_addr *gwaddr,
ebacaaa0
YH
3474 struct net_device *dev,
3475 unsigned int pref)
1da177e4 3476{
86872cb5 3477 struct fib6_config cfg = {
ca254490 3478 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3479 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3480 .fc_ifindex = dev->ifindex,
3481 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3482 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3483 .fc_protocol = RTPROT_RA,
e8478e80 3484 .fc_type = RTN_UNICAST,
15e47304 3485 .fc_nlinfo.portid = 0,
5578689a 3486 .fc_nlinfo.nlh = NULL,
afb1d4b5 3487 .fc_nlinfo.nl_net = net,
86872cb5 3488 };
1da177e4 3489
4e3fd7a0 3490 cfg.fc_gateway = *gwaddr;
1da177e4 3491
acb54e3c 3492 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3493 struct fib6_table *table;
3494
3495 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3496 if (table)
3497 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3498 }
1da177e4 3499
afb1d4b5 3500 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3501}
3502
afb1d4b5
DA
3503static void __rt6_purge_dflt_routers(struct net *net,
3504 struct fib6_table *table)
1da177e4
LT
3505{
3506 struct rt6_info *rt;
3507
3508restart:
66f5d6ce
WW
3509 rcu_read_lock();
3510 for_each_fib6_node_rt_rcu(&table->tb6_root) {
3e8b0ac3
LC
3511 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3512 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d3843fe5 3513 if (dst_hold_safe(&rt->dst)) {
66f5d6ce 3514 rcu_read_unlock();
afb1d4b5 3515 ip6_del_rt(net, rt);
d3843fe5 3516 } else {
66f5d6ce 3517 rcu_read_unlock();
d3843fe5 3518 }
1da177e4
LT
3519 goto restart;
3520 }
3521 }
66f5d6ce 3522 rcu_read_unlock();
830218c1
DA
3523
3524 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3525}
3526
3527void rt6_purge_dflt_routers(struct net *net)
3528{
3529 struct fib6_table *table;
3530 struct hlist_head *head;
3531 unsigned int h;
3532
3533 rcu_read_lock();
3534
3535 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3536 head = &net->ipv6.fib_table_hash[h];
3537 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3538 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3539 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3540 }
3541 }
3542
3543 rcu_read_unlock();
1da177e4
LT
3544}
3545
5578689a
DL
3546static void rtmsg_to_fib6_config(struct net *net,
3547 struct in6_rtmsg *rtmsg,
86872cb5
TG
3548 struct fib6_config *cfg)
3549{
3550 memset(cfg, 0, sizeof(*cfg));
3551
ca254490
DA
3552 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3553 : RT6_TABLE_MAIN;
86872cb5
TG
3554 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3555 cfg->fc_metric = rtmsg->rtmsg_metric;
3556 cfg->fc_expires = rtmsg->rtmsg_info;
3557 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3558 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3559 cfg->fc_flags = rtmsg->rtmsg_flags;
e8478e80 3560 cfg->fc_type = rtmsg->rtmsg_type;
86872cb5 3561
5578689a 3562 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3563
4e3fd7a0
AD
3564 cfg->fc_dst = rtmsg->rtmsg_dst;
3565 cfg->fc_src = rtmsg->rtmsg_src;
3566 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3567}
3568
5578689a 3569int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3570{
86872cb5 3571 struct fib6_config cfg;
1da177e4
LT
3572 struct in6_rtmsg rtmsg;
3573 int err;
3574
67ba4152 3575 switch (cmd) {
1da177e4
LT
3576 case SIOCADDRT: /* Add a route */
3577 case SIOCDELRT: /* Delete a route */
af31f412 3578 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3579 return -EPERM;
3580 err = copy_from_user(&rtmsg, arg,
3581 sizeof(struct in6_rtmsg));
3582 if (err)
3583 return -EFAULT;
86872cb5 3584
5578689a 3585 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3586
1da177e4
LT
3587 rtnl_lock();
3588 switch (cmd) {
3589 case SIOCADDRT:
acb54e3c 3590 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3591 break;
3592 case SIOCDELRT:
333c4301 3593 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3594 break;
3595 default:
3596 err = -EINVAL;
3597 }
3598 rtnl_unlock();
3599
3600 return err;
3ff50b79 3601 }
1da177e4
LT
3602
3603 return -EINVAL;
3604}
3605
3606/*
3607 * Drop the packet on the floor
3608 */
3609
d5fdd6ba 3610static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3611{
612f09e8 3612 int type;
adf30907 3613 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3614 switch (ipstats_mib_noroutes) {
3615 case IPSTATS_MIB_INNOROUTES:
0660e03f 3616 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3617 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3618 IP6_INC_STATS(dev_net(dst->dev),
3619 __in6_dev_get_safely(skb->dev),
3bd653c8 3620 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3621 break;
3622 }
3623 /* FALLTHROUGH */
3624 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3625 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3626 ipstats_mib_noroutes);
612f09e8
YH
3627 break;
3628 }
3ffe533c 3629 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3630 kfree_skb(skb);
3631 return 0;
3632}
3633
9ce8ade0
TG
3634static int ip6_pkt_discard(struct sk_buff *skb)
3635{
612f09e8 3636 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3637}
3638
ede2059d 3639static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3640{
adf30907 3641 skb->dev = skb_dst(skb)->dev;
612f09e8 3642 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3643}
3644
9ce8ade0
TG
3645static int ip6_pkt_prohibit(struct sk_buff *skb)
3646{
612f09e8 3647 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3648}
3649
ede2059d 3650static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3651{
adf30907 3652 skb->dev = skb_dst(skb)->dev;
612f09e8 3653 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3654}
3655
1da177e4
LT
3656/*
3657 * Allocate a dst for local (unicast / anycast) address.
3658 */
3659
afb1d4b5
DA
3660struct rt6_info *addrconf_dst_alloc(struct net *net,
3661 struct inet6_dev *idev,
1da177e4 3662 const struct in6_addr *addr,
acb54e3c 3663 bool anycast, gfp_t gfp_flags)
1da177e4 3664{
ca254490 3665 u32 tb_id;
4832c30d 3666 struct net_device *dev = idev->dev;
5f02ce24
DA
3667 struct rt6_info *rt;
3668
5f02ce24 3669 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 3670 if (!rt)
1da177e4
LT
3671 return ERR_PTR(-ENOMEM);
3672
3b6761d1
DA
3673 rt->dst_nocount = true;
3674
1da177e4 3675 in6_dev_hold(idev);
1da177e4 3676 rt->rt6i_idev = idev;
1da177e4 3677
3b6761d1 3678 rt->dst_host = true;
94b5e0f9 3679 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 3680 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
e8478e80
DA
3681 if (anycast) {
3682 rt->fib6_type = RTN_ANYCAST;
58c4fb86 3683 rt->rt6i_flags |= RTF_ANYCAST;
e8478e80
DA
3684 } else {
3685 rt->fib6_type = RTN_LOCAL;
1da177e4 3686 rt->rt6i_flags |= RTF_LOCAL;
e8478e80 3687 }
1da177e4 3688
5e670d84
DA
3689 rt->fib6_nh.nh_gw = *addr;
3690 rt->fib6_nh.nh_dev = dev;
550bab42 3691 rt->rt6i_gateway = *addr;
4e3fd7a0 3692 rt->rt6i_dst.addr = *addr;
1da177e4 3693 rt->rt6i_dst.plen = 128;
ca254490
DA
3694 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3695 rt->rt6i_table = fib6_get_table(net, tb_id);
1da177e4 3696
1da177e4
LT
3697 return rt;
3698}
3699
c3968a85
DW
3700/* remove deleted ip from prefsrc entries */
3701struct arg_dev_net_ip {
3702 struct net_device *dev;
3703 struct net *net;
3704 struct in6_addr *addr;
3705};
3706
3707static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3708{
3709 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3710 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3711 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3712
5e670d84 3713 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
421842ed 3714 rt != net->ipv6.fib6_null_entry &&
c3968a85 3715 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
60006a48 3716 spin_lock_bh(&rt6_exception_lock);
c3968a85
DW
3717 /* remove prefsrc entry */
3718 rt->rt6i_prefsrc.plen = 0;
60006a48
WW
3719 /* need to update cache as well */
3720 rt6_exceptions_remove_prefsrc(rt);
3721 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3722 }
3723 return 0;
3724}
3725
3726void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3727{
3728 struct net *net = dev_net(ifp->idev->dev);
3729 struct arg_dev_net_ip adni = {
3730 .dev = ifp->idev->dev,
3731 .net = net,
3732 .addr = &ifp->addr,
3733 };
0c3584d5 3734 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3735}
3736
be7a010d 3737#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3738
3739/* Remove routers and update dst entries when gateway turn into host. */
3740static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3741{
3742 struct in6_addr *gateway = (struct in6_addr *)arg;
3743
2b760fcf 3744 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
5e670d84 3745 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
be7a010d
DJ
3746 return -1;
3747 }
b16cb459
WW
3748
3749 /* Further clean up cached routes in exception table.
3750 * This is needed because cached route may have a different
3751 * gateway than its 'parent' in the case of an ip redirect.
3752 */
3753 rt6_exceptions_clean_tohost(rt, gateway);
3754
be7a010d
DJ
3755 return 0;
3756}
3757
3758void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3759{
3760 fib6_clean_all(net, fib6_clean_tohost, gateway);
3761}
3762
2127d95a
IS
3763struct arg_netdev_event {
3764 const struct net_device *dev;
4c981e28
IS
3765 union {
3766 unsigned int nh_flags;
3767 unsigned long event;
3768 };
2127d95a
IS
3769};
3770
d7dedee1
IS
3771static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
3772{
3773 struct rt6_info *iter;
3774 struct fib6_node *fn;
3775
3776 fn = rcu_dereference_protected(rt->rt6i_node,
3777 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3778 iter = rcu_dereference_protected(fn->leaf,
3779 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3780 while (iter) {
3781 if (iter->rt6i_metric == rt->rt6i_metric &&
3782 rt6_qualify_for_ecmp(iter))
3783 return iter;
3784 iter = rcu_dereference_protected(iter->rt6_next,
3785 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3786 }
3787
3788 return NULL;
3789}
3790
3791static bool rt6_is_dead(const struct rt6_info *rt)
3792{
5e670d84
DA
3793 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3794 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d7dedee1
IS
3795 rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3796 return true;
3797
3798 return false;
3799}
3800
3801static int rt6_multipath_total_weight(const struct rt6_info *rt)
3802{
3803 struct rt6_info *iter;
3804 int total = 0;
3805
3806 if (!rt6_is_dead(rt))
5e670d84 3807 total += rt->fib6_nh.nh_weight;
d7dedee1
IS
3808
3809 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
3810 if (!rt6_is_dead(iter))
5e670d84 3811 total += iter->fib6_nh.nh_weight;
d7dedee1
IS
3812 }
3813
3814 return total;
3815}
3816
3817static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
3818{
3819 int upper_bound = -1;
3820
3821 if (!rt6_is_dead(rt)) {
5e670d84 3822 *weight += rt->fib6_nh.nh_weight;
d7dedee1
IS
3823 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3824 total) - 1;
3825 }
5e670d84 3826 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
d7dedee1
IS
3827}
3828
3829static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
3830{
3831 struct rt6_info *iter;
3832 int weight = 0;
3833
3834 rt6_upper_bound_set(rt, &weight, total);
3835
3836 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3837 rt6_upper_bound_set(iter, &weight, total);
3838}
3839
3840void rt6_multipath_rebalance(struct rt6_info *rt)
3841{
3842 struct rt6_info *first;
3843 int total;
3844
3845 /* In case the entire multipath route was marked for flushing,
3846 * then there is no need to rebalance upon the removal of every
3847 * sibling route.
3848 */
3849 if (!rt->rt6i_nsiblings || rt->should_flush)
3850 return;
3851
3852 /* During lookup routes are evaluated in order, so we need to
3853 * make sure upper bounds are assigned from the first sibling
3854 * onwards.
3855 */
3856 first = rt6_multipath_first_sibling(rt);
3857 if (WARN_ON_ONCE(!first))
3858 return;
3859
3860 total = rt6_multipath_total_weight(first);
3861 rt6_multipath_upper_bound_set(first, total);
3862}
3863
2127d95a
IS
3864static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3865{
3866 const struct arg_netdev_event *arg = p_arg;
7aef6859 3867 struct net *net = dev_net(arg->dev);
2127d95a 3868
421842ed 3869 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
5e670d84 3870 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
7aef6859 3871 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3872 rt6_multipath_rebalance(rt);
1de178ed 3873 }
2127d95a
IS
3874
3875 return 0;
3876}
3877
3878void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3879{
3880 struct arg_netdev_event arg = {
3881 .dev = dev,
6802f3ad
IS
3882 {
3883 .nh_flags = nh_flags,
3884 },
2127d95a
IS
3885 };
3886
3887 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3888 arg.nh_flags |= RTNH_F_LINKDOWN;
3889
3890 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3891}
3892
1de178ed
IS
3893static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
3894 const struct net_device *dev)
3895{
3896 struct rt6_info *iter;
3897
5e670d84 3898 if (rt->fib6_nh.nh_dev == dev)
1de178ed
IS
3899 return true;
3900 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
5e670d84 3901 if (iter->fib6_nh.nh_dev == dev)
1de178ed
IS
3902 return true;
3903
3904 return false;
3905}
3906
3907static void rt6_multipath_flush(struct rt6_info *rt)
3908{
3909 struct rt6_info *iter;
3910
3911 rt->should_flush = 1;
3912 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3913 iter->should_flush = 1;
3914}
3915
3916static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
3917 const struct net_device *down_dev)
3918{
3919 struct rt6_info *iter;
3920 unsigned int dead = 0;
3921
5e670d84
DA
3922 if (rt->fib6_nh.nh_dev == down_dev ||
3923 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
3924 dead++;
3925 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
5e670d84
DA
3926 if (iter->fib6_nh.nh_dev == down_dev ||
3927 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
3928 dead++;
3929
3930 return dead;
3931}
3932
3933static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
3934 const struct net_device *dev,
3935 unsigned int nh_flags)
3936{
3937 struct rt6_info *iter;
3938
5e670d84
DA
3939 if (rt->fib6_nh.nh_dev == dev)
3940 rt->fib6_nh.nh_flags |= nh_flags;
1de178ed 3941 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
5e670d84
DA
3942 if (iter->fib6_nh.nh_dev == dev)
3943 iter->fib6_nh.nh_flags |= nh_flags;
1de178ed
IS
3944}
3945
a1a22c12 3946/* called with write lock held for table with rt */
4c981e28 3947static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
1da177e4 3948{
4c981e28
IS
3949 const struct arg_netdev_event *arg = p_arg;
3950 const struct net_device *dev = arg->dev;
7aef6859 3951 struct net *net = dev_net(dev);
8ed67789 3952
421842ed 3953 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
3954 return 0;
3955
3956 switch (arg->event) {
3957 case NETDEV_UNREGISTER:
5e670d84 3958 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
27c6fa73 3959 case NETDEV_DOWN:
1de178ed 3960 if (rt->should_flush)
27c6fa73 3961 return -1;
1de178ed 3962 if (!rt->rt6i_nsiblings)
5e670d84 3963 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
1de178ed
IS
3964 if (rt6_multipath_uses_dev(rt, dev)) {
3965 unsigned int count;
3966
3967 count = rt6_multipath_dead_count(rt, dev);
3968 if (rt->rt6i_nsiblings + 1 == count) {
3969 rt6_multipath_flush(rt);
3970 return -1;
3971 }
3972 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3973 RTNH_F_LINKDOWN);
7aef6859 3974 fib6_update_sernum(net, rt);
d7dedee1 3975 rt6_multipath_rebalance(rt);
1de178ed
IS
3976 }
3977 return -2;
27c6fa73 3978 case NETDEV_CHANGE:
5e670d84 3979 if (rt->fib6_nh.nh_dev != dev ||
1de178ed 3980 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 3981 break;
5e670d84 3982 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 3983 rt6_multipath_rebalance(rt);
27c6fa73 3984 break;
2b241361 3985 }
c159d30c 3986
1da177e4
LT
3987 return 0;
3988}
3989
27c6fa73 3990void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 3991{
4c981e28 3992 struct arg_netdev_event arg = {
8ed67789 3993 .dev = dev,
6802f3ad
IS
3994 {
3995 .event = event,
3996 },
8ed67789
DL
3997 };
3998
4c981e28
IS
3999 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
4000}
4001
4002void rt6_disable_ip(struct net_device *dev, unsigned long event)
4003{
4004 rt6_sync_down_dev(dev, event);
4005 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4006 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4007}
4008
95c96174 4009struct rt6_mtu_change_arg {
1da177e4 4010 struct net_device *dev;
95c96174 4011 unsigned int mtu;
1da177e4
LT
4012};
4013
4014static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
4015{
4016 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4017 struct inet6_dev *idev;
4018
4019 /* In IPv6 pmtu discovery is not optional,
4020 so that RTAX_MTU lock cannot disable it.
4021 We still use this lock to block changes
4022 caused by addrconf/ndisc.
4023 */
4024
4025 idev = __in6_dev_get(arg->dev);
38308473 4026 if (!idev)
1da177e4
LT
4027 return 0;
4028
4029 /* For administrative MTU increase, there is no way to discover
4030 IPv6 PMTU increase, so PMTU increase should be updated here.
4031 Since RFC 1981 doesn't include administrative MTU increase
4032 update PMTU increase is a MUST. (i.e. jumbo frame)
4033 */
5e670d84 4034 if (rt->fib6_nh.nh_dev == arg->dev &&
d4ead6b3
DA
4035 !fib6_metric_locked(rt, RTAX_MTU)) {
4036 u32 mtu = rt->fib6_pmtu;
4037
4038 if (mtu >= arg->mtu ||
4039 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4040 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4041
f5bbe7ee 4042 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4043 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4044 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4045 }
1da177e4
LT
4046 return 0;
4047}
4048
95c96174 4049void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4050{
c71099ac
TG
4051 struct rt6_mtu_change_arg arg = {
4052 .dev = dev,
4053 .mtu = mtu,
4054 };
1da177e4 4055
0c3584d5 4056 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4057}
4058
ef7c79ed 4059static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4060 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 4061 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4062 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4063 [RTA_PRIORITY] = { .type = NLA_U32 },
4064 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4065 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4066 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4067 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4068 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4069 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4070 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4071 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
4072};
4073
4074static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4075 struct fib6_config *cfg,
4076 struct netlink_ext_ack *extack)
1da177e4 4077{
86872cb5
TG
4078 struct rtmsg *rtm;
4079 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4080 unsigned int pref;
86872cb5 4081 int err;
1da177e4 4082
fceb6435
JB
4083 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4084 NULL);
86872cb5
TG
4085 if (err < 0)
4086 goto errout;
1da177e4 4087
86872cb5
TG
4088 err = -EINVAL;
4089 rtm = nlmsg_data(nlh);
4090 memset(cfg, 0, sizeof(*cfg));
4091
4092 cfg->fc_table = rtm->rtm_table;
4093 cfg->fc_dst_len = rtm->rtm_dst_len;
4094 cfg->fc_src_len = rtm->rtm_src_len;
4095 cfg->fc_flags = RTF_UP;
4096 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 4097 cfg->fc_type = rtm->rtm_type;
86872cb5 4098
ef2c7d7b
ND
4099 if (rtm->rtm_type == RTN_UNREACHABLE ||
4100 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4101 rtm->rtm_type == RTN_PROHIBIT ||
4102 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4103 cfg->fc_flags |= RTF_REJECT;
4104
ab79ad14
4105 if (rtm->rtm_type == RTN_LOCAL)
4106 cfg->fc_flags |= RTF_LOCAL;
4107
1f56a01f
MKL
4108 if (rtm->rtm_flags & RTM_F_CLONED)
4109 cfg->fc_flags |= RTF_CACHE;
4110
fc1e64e1
DA
4111 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4112
15e47304 4113 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 4114 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 4115 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
4116
4117 if (tb[RTA_GATEWAY]) {
67b61f6c 4118 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4119 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4120 }
86872cb5
TG
4121
4122 if (tb[RTA_DST]) {
4123 int plen = (rtm->rtm_dst_len + 7) >> 3;
4124
4125 if (nla_len(tb[RTA_DST]) < plen)
4126 goto errout;
4127
4128 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4129 }
86872cb5
TG
4130
4131 if (tb[RTA_SRC]) {
4132 int plen = (rtm->rtm_src_len + 7) >> 3;
4133
4134 if (nla_len(tb[RTA_SRC]) < plen)
4135 goto errout;
4136
4137 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4138 }
86872cb5 4139
c3968a85 4140 if (tb[RTA_PREFSRC])
67b61f6c 4141 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4142
86872cb5
TG
4143 if (tb[RTA_OIF])
4144 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4145
4146 if (tb[RTA_PRIORITY])
4147 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4148
4149 if (tb[RTA_METRICS]) {
4150 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4151 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4152 }
86872cb5
TG
4153
4154 if (tb[RTA_TABLE])
4155 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4156
51ebd318
ND
4157 if (tb[RTA_MULTIPATH]) {
4158 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4159 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4160
4161 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4162 cfg->fc_mp_len, extack);
9ed59592
DA
4163 if (err < 0)
4164 goto errout;
51ebd318
ND
4165 }
4166
c78ba6d6
LR
4167 if (tb[RTA_PREF]) {
4168 pref = nla_get_u8(tb[RTA_PREF]);
4169 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4170 pref != ICMPV6_ROUTER_PREF_HIGH)
4171 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4172 cfg->fc_flags |= RTF_PREF(pref);
4173 }
4174
19e42e45
RP
4175 if (tb[RTA_ENCAP])
4176 cfg->fc_encap = tb[RTA_ENCAP];
4177
9ed59592 4178 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4179 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4180
c255bd68 4181 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4182 if (err < 0)
4183 goto errout;
4184 }
4185
32bc201e
XL
4186 if (tb[RTA_EXPIRES]) {
4187 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4188
4189 if (addrconf_finite_timeout(timeout)) {
4190 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4191 cfg->fc_flags |= RTF_EXPIRES;
4192 }
4193 }
4194
86872cb5
TG
4195 err = 0;
4196errout:
4197 return err;
1da177e4
LT
4198}
4199
6b9ea5a6
RP
4200struct rt6_nh {
4201 struct rt6_info *rt6_info;
4202 struct fib6_config r_cfg;
6b9ea5a6
RP
4203 struct list_head next;
4204};
4205
4206static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4207{
4208 struct rt6_nh *nh;
4209
4210 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4211 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4212 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4213 nh->r_cfg.fc_ifindex);
4214 }
4215}
4216
d4ead6b3
DA
4217static int ip6_route_info_append(struct net *net,
4218 struct list_head *rt6_nh_list,
6b9ea5a6
RP
4219 struct rt6_info *rt, struct fib6_config *r_cfg)
4220{
4221 struct rt6_nh *nh;
6b9ea5a6
RP
4222 int err = -EEXIST;
4223
4224 list_for_each_entry(nh, rt6_nh_list, next) {
4225 /* check if rt6_info already exists */
f06b7549 4226 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
6b9ea5a6
RP
4227 return err;
4228 }
4229
4230 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4231 if (!nh)
4232 return -ENOMEM;
4233 nh->rt6_info = rt;
d4ead6b3 4234 err = ip6_convert_metrics(net, rt, r_cfg);
6b9ea5a6
RP
4235 if (err) {
4236 kfree(nh);
4237 return err;
4238 }
4239 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4240 list_add_tail(&nh->next, rt6_nh_list);
4241
4242 return 0;
4243}
4244
3b1137fe
DA
4245static void ip6_route_mpath_notify(struct rt6_info *rt,
4246 struct rt6_info *rt_last,
4247 struct nl_info *info,
4248 __u16 nlflags)
4249{
4250 /* if this is an APPEND route, then rt points to the first route
4251 * inserted and rt_last points to last route inserted. Userspace
4252 * wants a consistent dump of the route which starts at the first
4253 * nexthop. Since sibling routes are always added at the end of
4254 * the list, find the first sibling of the last route appended
4255 */
4256 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
4257 rt = list_first_entry(&rt_last->rt6i_siblings,
4258 struct rt6_info,
4259 rt6i_siblings);
4260 }
4261
4262 if (rt)
4263 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4264}
4265
333c4301
DA
4266static int ip6_route_multipath_add(struct fib6_config *cfg,
4267 struct netlink_ext_ack *extack)
51ebd318 4268{
3b1137fe
DA
4269 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
4270 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4271 struct fib6_config r_cfg;
4272 struct rtnexthop *rtnh;
6b9ea5a6
RP
4273 struct rt6_info *rt;
4274 struct rt6_nh *err_nh;
4275 struct rt6_nh *nh, *nh_safe;
3b1137fe 4276 __u16 nlflags;
51ebd318
ND
4277 int remaining;
4278 int attrlen;
6b9ea5a6
RP
4279 int err = 1;
4280 int nhn = 0;
4281 int replace = (cfg->fc_nlinfo.nlh &&
4282 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4283 LIST_HEAD(rt6_nh_list);
51ebd318 4284
3b1137fe
DA
4285 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4286 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4287 nlflags |= NLM_F_APPEND;
4288
35f1b4e9 4289 remaining = cfg->fc_mp_len;
51ebd318 4290 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4291
6b9ea5a6
RP
4292 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4293 * rt6_info structs per nexthop
4294 */
51ebd318
ND
4295 while (rtnh_ok(rtnh, remaining)) {
4296 memcpy(&r_cfg, cfg, sizeof(*cfg));
4297 if (rtnh->rtnh_ifindex)
4298 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4299
4300 attrlen = rtnh_attrlen(rtnh);
4301 if (attrlen > 0) {
4302 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4303
4304 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4305 if (nla) {
67b61f6c 4306 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4307 r_cfg.fc_flags |= RTF_GATEWAY;
4308 }
19e42e45
RP
4309 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4310 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4311 if (nla)
4312 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4313 }
6b9ea5a6 4314
68e2ffde 4315 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4316 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4317 if (IS_ERR(rt)) {
4318 err = PTR_ERR(rt);
4319 rt = NULL;
6b9ea5a6 4320 goto cleanup;
8c5b83f0 4321 }
6b9ea5a6 4322
5e670d84 4323 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
398958ae 4324
d4ead6b3
DA
4325 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4326 rt, &r_cfg);
51ebd318 4327 if (err) {
587fea74 4328 dst_release_immediate(&rt->dst);
6b9ea5a6
RP
4329 goto cleanup;
4330 }
4331
4332 rtnh = rtnh_next(rtnh, &remaining);
4333 }
4334
3b1137fe
DA
4335 /* for add and replace send one notification with all nexthops.
4336 * Skip the notification in fib6_add_rt2node and send one with
4337 * the full route when done
4338 */
4339 info->skip_notify = 1;
4340
6b9ea5a6
RP
4341 err_nh = NULL;
4342 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe 4343 rt_last = nh->rt6_info;
d4ead6b3 4344 err = __ip6_ins_rt(nh->rt6_info, info, extack);
3b1137fe
DA
4345 /* save reference to first route for notification */
4346 if (!rt_notif && !err)
4347 rt_notif = nh->rt6_info;
4348
6b9ea5a6
RP
4349 /* nh->rt6_info is used or freed at this point, reset to NULL*/
4350 nh->rt6_info = NULL;
4351 if (err) {
4352 if (replace && nhn)
4353 ip6_print_replace_route_err(&rt6_nh_list);
4354 err_nh = nh;
4355 goto add_errout;
51ebd318 4356 }
6b9ea5a6 4357
1a72418b 4358 /* Because each route is added like a single route we remove
27596472
MK
4359 * these flags after the first nexthop: if there is a collision,
4360 * we have already failed to add the first nexthop:
4361 * fib6_add_rt2node() has rejected it; when replacing, old
4362 * nexthops have been replaced by first new, the rest should
4363 * be added to it.
1a72418b 4364 */
27596472
MK
4365 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4366 NLM_F_REPLACE);
6b9ea5a6
RP
4367 nhn++;
4368 }
4369
3b1137fe
DA
4370 /* success ... tell user about new route */
4371 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4372 goto cleanup;
4373
4374add_errout:
3b1137fe
DA
4375 /* send notification for routes that were added so that
4376 * the delete notifications sent by ip6_route_del are
4377 * coherent
4378 */
4379 if (rt_notif)
4380 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4381
6b9ea5a6
RP
4382 /* Delete routes that were already added */
4383 list_for_each_entry(nh, &rt6_nh_list, next) {
4384 if (err_nh == nh)
4385 break;
333c4301 4386 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4387 }
4388
4389cleanup:
4390 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
587fea74
WW
4391 if (nh->rt6_info)
4392 dst_release_immediate(&nh->rt6_info->dst);
6b9ea5a6
RP
4393 list_del(&nh->next);
4394 kfree(nh);
4395 }
4396
4397 return err;
4398}
4399
333c4301
DA
4400static int ip6_route_multipath_del(struct fib6_config *cfg,
4401 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4402{
4403 struct fib6_config r_cfg;
4404 struct rtnexthop *rtnh;
4405 int remaining;
4406 int attrlen;
4407 int err = 1, last_err = 0;
4408
4409 remaining = cfg->fc_mp_len;
4410 rtnh = (struct rtnexthop *)cfg->fc_mp;
4411
4412 /* Parse a Multipath Entry */
4413 while (rtnh_ok(rtnh, remaining)) {
4414 memcpy(&r_cfg, cfg, sizeof(*cfg));
4415 if (rtnh->rtnh_ifindex)
4416 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4417
4418 attrlen = rtnh_attrlen(rtnh);
4419 if (attrlen > 0) {
4420 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4421
4422 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4423 if (nla) {
4424 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4425 r_cfg.fc_flags |= RTF_GATEWAY;
4426 }
4427 }
333c4301 4428 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4429 if (err)
4430 last_err = err;
4431
51ebd318
ND
4432 rtnh = rtnh_next(rtnh, &remaining);
4433 }
4434
4435 return last_err;
4436}
4437
c21ef3e3
DA
4438static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4439 struct netlink_ext_ack *extack)
1da177e4 4440{
86872cb5
TG
4441 struct fib6_config cfg;
4442 int err;
1da177e4 4443
333c4301 4444 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4445 if (err < 0)
4446 return err;
4447
51ebd318 4448 if (cfg.fc_mp)
333c4301 4449 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4450 else {
4451 cfg.fc_delete_all_nh = 1;
333c4301 4452 return ip6_route_del(&cfg, extack);
0ae81335 4453 }
1da177e4
LT
4454}
4455
c21ef3e3
DA
4456static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4457 struct netlink_ext_ack *extack)
1da177e4 4458{
86872cb5
TG
4459 struct fib6_config cfg;
4460 int err;
1da177e4 4461
333c4301 4462 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4463 if (err < 0)
4464 return err;
4465
51ebd318 4466 if (cfg.fc_mp)
333c4301 4467 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4468 else
acb54e3c 4469 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4470}
4471
beb1afac 4472static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 4473{
beb1afac
DA
4474 int nexthop_len = 0;
4475
4476 if (rt->rt6i_nsiblings) {
4477 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4478 + NLA_ALIGN(sizeof(struct rtnexthop))
4479 + nla_total_size(16) /* RTA_GATEWAY */
5e670d84 4480 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
beb1afac
DA
4481
4482 nexthop_len *= rt->rt6i_nsiblings;
4483 }
4484
339bf98f
TG
4485 return NLMSG_ALIGN(sizeof(struct rtmsg))
4486 + nla_total_size(16) /* RTA_SRC */
4487 + nla_total_size(16) /* RTA_DST */
4488 + nla_total_size(16) /* RTA_GATEWAY */
4489 + nla_total_size(16) /* RTA_PREFSRC */
4490 + nla_total_size(4) /* RTA_TABLE */
4491 + nla_total_size(4) /* RTA_IIF */
4492 + nla_total_size(4) /* RTA_OIF */
4493 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4494 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4495 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4496 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4497 + nla_total_size(1) /* RTA_PREF */
5e670d84 4498 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
beb1afac
DA
4499 + nexthop_len;
4500}
4501
4502static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 4503 unsigned int *flags, bool skip_oif)
beb1afac 4504{
5e670d84 4505 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
f9d882ea
IS
4506 *flags |= RTNH_F_DEAD;
4507
5e670d84 4508 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
beb1afac
DA
4509 *flags |= RTNH_F_LINKDOWN;
4510 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
4511 *flags |= RTNH_F_DEAD;
4512 }
4513
4514 if (rt->rt6i_flags & RTF_GATEWAY) {
5e670d84 4515 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
beb1afac
DA
4516 goto nla_put_failure;
4517 }
4518
5e670d84
DA
4519 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4520 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4521 *flags |= RTNH_F_OFFLOAD;
4522
5be083ce 4523 /* not needed for multipath encoding b/c it has a rtnexthop struct */
5e670d84
DA
4524 if (!skip_oif && rt->fib6_nh.nh_dev &&
4525 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
beb1afac
DA
4526 goto nla_put_failure;
4527
5e670d84
DA
4528 if (rt->fib6_nh.nh_lwtstate &&
4529 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
beb1afac
DA
4530 goto nla_put_failure;
4531
4532 return 0;
4533
4534nla_put_failure:
4535 return -EMSGSIZE;
4536}
4537
5be083ce 4538/* add multipath next hop */
beb1afac
DA
4539static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4540{
5e670d84 4541 const struct net_device *dev = rt->fib6_nh.nh_dev;
beb1afac
DA
4542 struct rtnexthop *rtnh;
4543 unsigned int flags = 0;
4544
4545 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4546 if (!rtnh)
4547 goto nla_put_failure;
4548
5e670d84
DA
4549 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4550 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
beb1afac 4551
5be083ce 4552 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4553 goto nla_put_failure;
4554
4555 rtnh->rtnh_flags = flags;
4556
4557 /* length of rtnetlink header + attributes */
4558 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4559
4560 return 0;
4561
4562nla_put_failure:
4563 return -EMSGSIZE;
339bf98f
TG
4564}
4565
d4ead6b3
DA
4566static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4567 struct rt6_info *rt, struct dst_entry *dst,
4568 struct in6_addr *dest, struct in6_addr *src,
15e47304 4569 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4570 unsigned int flags)
1da177e4
LT
4571{
4572 struct rtmsg *rtm;
2d7202bf 4573 struct nlmsghdr *nlh;
d4ead6b3
DA
4574 long expires = 0;
4575 u32 *pmetrics;
9e762a4a 4576 u32 table;
1da177e4 4577
15e47304 4578 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4579 if (!nlh)
26932566 4580 return -EMSGSIZE;
2d7202bf
TG
4581
4582 rtm = nlmsg_data(nlh);
1da177e4
LT
4583 rtm->rtm_family = AF_INET6;
4584 rtm->rtm_dst_len = rt->rt6i_dst.plen;
4585 rtm->rtm_src_len = rt->rt6i_src.plen;
4586 rtm->rtm_tos = 0;
c71099ac 4587 if (rt->rt6i_table)
9e762a4a 4588 table = rt->rt6i_table->tb6_id;
c71099ac 4589 else
9e762a4a
PM
4590 table = RT6_TABLE_UNSPEC;
4591 rtm->rtm_table = table;
c78679e8
DM
4592 if (nla_put_u32(skb, RTA_TABLE, table))
4593 goto nla_put_failure;
e8478e80
DA
4594
4595 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4596 rtm->rtm_flags = 0;
4597 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4598 rtm->rtm_protocol = rt->rt6i_protocol;
1da177e4 4599
38308473 4600 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
4601 rtm->rtm_flags |= RTM_F_CLONED;
4602
d4ead6b3
DA
4603 if (dest) {
4604 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4605 goto nla_put_failure;
1ab1457c 4606 rtm->rtm_dst_len = 128;
1da177e4 4607 } else if (rtm->rtm_dst_len)
930345ea 4608 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 4609 goto nla_put_failure;
1da177e4
LT
4610#ifdef CONFIG_IPV6_SUBTREES
4611 if (src) {
930345ea 4612 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4613 goto nla_put_failure;
1ab1457c 4614 rtm->rtm_src_len = 128;
c78679e8 4615 } else if (rtm->rtm_src_len &&
930345ea 4616 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 4617 goto nla_put_failure;
1da177e4 4618#endif
7bc570c8
YH
4619 if (iif) {
4620#ifdef CONFIG_IPV6_MROUTE
4621 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
4622 int err = ip6mr_get_route(net, skb, rtm, portid);
4623
4624 if (err == 0)
4625 return 0;
4626 if (err < 0)
4627 goto nla_put_failure;
7bc570c8
YH
4628 } else
4629#endif
c78679e8
DM
4630 if (nla_put_u32(skb, RTA_IIF, iif))
4631 goto nla_put_failure;
d4ead6b3 4632 } else if (dest) {
1da177e4 4633 struct in6_addr saddr_buf;
d4ead6b3 4634 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4635 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4636 goto nla_put_failure;
1da177e4 4637 }
2d7202bf 4638
c3968a85
DW
4639 if (rt->rt6i_prefsrc.plen) {
4640 struct in6_addr saddr_buf;
4e3fd7a0 4641 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 4642 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4643 goto nla_put_failure;
c3968a85
DW
4644 }
4645
d4ead6b3
DA
4646 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4647 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4648 goto nla_put_failure;
4649
c78679e8
DM
4650 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4651 goto nla_put_failure;
8253947e 4652
beb1afac
DA
4653 /* For multipath routes, walk the siblings list and add
4654 * each as a nexthop within RTA_MULTIPATH.
4655 */
4656 if (rt->rt6i_nsiblings) {
4657 struct rt6_info *sibling, *next_sibling;
4658 struct nlattr *mp;
4659
4660 mp = nla_nest_start(skb, RTA_MULTIPATH);
4661 if (!mp)
4662 goto nla_put_failure;
4663
4664 if (rt6_add_nexthop(skb, rt) < 0)
4665 goto nla_put_failure;
4666
4667 list_for_each_entry_safe(sibling, next_sibling,
4668 &rt->rt6i_siblings, rt6i_siblings) {
4669 if (rt6_add_nexthop(skb, sibling) < 0)
4670 goto nla_put_failure;
4671 }
4672
4673 nla_nest_end(skb, mp);
4674 } else {
5be083ce 4675 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4676 goto nla_put_failure;
4677 }
4678
14895687
DA
4679 if (rt->rt6i_flags & RTF_EXPIRES) {
4680 expires = dst ? dst->expires : rt->expires;
4681 expires -= jiffies;
4682 }
69cdf8f9 4683
d4ead6b3 4684 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4685 goto nla_put_failure;
2d7202bf 4686
c78ba6d6
LR
4687 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4688 goto nla_put_failure;
4689
19e42e45 4690
053c095a
JB
4691 nlmsg_end(skb, nlh);
4692 return 0;
2d7202bf
TG
4693
4694nla_put_failure:
26932566
PM
4695 nlmsg_cancel(skb, nlh);
4696 return -EMSGSIZE;
1da177e4
LT
4697}
4698
1b43af54 4699int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
4700{
4701 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4702 struct net *net = arg->net;
4703
421842ed 4704 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4705 return 0;
1da177e4 4706
2d7202bf
TG
4707 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4708 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4709
4710 /* user wants prefix routes only */
4711 if (rtm->rtm_flags & RTM_F_PREFIX &&
4712 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4713 /* success since this is not a prefix route */
4714 return 1;
4715 }
4716 }
1da177e4 4717
d4ead6b3
DA
4718 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4719 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4720 arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
1da177e4
LT
4721}
4722
c21ef3e3
DA
4723static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4724 struct netlink_ext_ack *extack)
1da177e4 4725{
3b1e0a65 4726 struct net *net = sock_net(in_skb->sk);
ab364a6f 4727 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
4728 int err, iif = 0, oif = 0;
4729 struct dst_entry *dst;
ab364a6f 4730 struct rt6_info *rt;
1da177e4 4731 struct sk_buff *skb;
ab364a6f 4732 struct rtmsg *rtm;
4c9483b2 4733 struct flowi6 fl6;
18c3a61c 4734 bool fibmatch;
1da177e4 4735
fceb6435 4736 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4737 extack);
ab364a6f
TG
4738 if (err < 0)
4739 goto errout;
1da177e4 4740
ab364a6f 4741 err = -EINVAL;
4c9483b2 4742 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4743 rtm = nlmsg_data(nlh);
4744 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4745 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4746
ab364a6f
TG
4747 if (tb[RTA_SRC]) {
4748 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4749 goto errout;
4750
4e3fd7a0 4751 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4752 }
4753
4754 if (tb[RTA_DST]) {
4755 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4756 goto errout;
4757
4e3fd7a0 4758 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4759 }
4760
4761 if (tb[RTA_IIF])
4762 iif = nla_get_u32(tb[RTA_IIF]);
4763
4764 if (tb[RTA_OIF])
72331bc0 4765 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4766
2e47b291
LC
4767 if (tb[RTA_MARK])
4768 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4769
622ec2c9
LC
4770 if (tb[RTA_UID])
4771 fl6.flowi6_uid = make_kuid(current_user_ns(),
4772 nla_get_u32(tb[RTA_UID]));
4773 else
4774 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4775
1da177e4
LT
4776 if (iif) {
4777 struct net_device *dev;
72331bc0
SL
4778 int flags = 0;
4779
121622db
FW
4780 rcu_read_lock();
4781
4782 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4783 if (!dev) {
121622db 4784 rcu_read_unlock();
1da177e4 4785 err = -ENODEV;
ab364a6f 4786 goto errout;
1da177e4 4787 }
72331bc0
SL
4788
4789 fl6.flowi6_iif = iif;
4790
4791 if (!ipv6_addr_any(&fl6.saddr))
4792 flags |= RT6_LOOKUP_F_HAS_SADDR;
4793
b75cc8f9 4794 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4795
4796 rcu_read_unlock();
72331bc0
SL
4797 } else {
4798 fl6.flowi6_oif = oif;
4799
58acfd71 4800 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4801 }
4802
18c3a61c
RP
4803
4804 rt = container_of(dst, struct rt6_info, dst);
4805 if (rt->dst.error) {
4806 err = rt->dst.error;
4807 ip6_rt_put(rt);
4808 goto errout;
1da177e4
LT
4809 }
4810
9d6acb3b
WC
4811 if (rt == net->ipv6.ip6_null_entry) {
4812 err = rt->dst.error;
4813 ip6_rt_put(rt);
4814 goto errout;
4815 }
4816
fba961ab
DM
4817 if (fibmatch && rt->from) {
4818 struct rt6_info *ort = rt->from;
58acfd71
IS
4819
4820 dst_hold(&ort->dst);
4821 ip6_rt_put(rt);
4822 rt = ort;
4823 }
4824
ab364a6f 4825 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4826 if (!skb) {
94e187c0 4827 ip6_rt_put(rt);
ab364a6f
TG
4828 err = -ENOBUFS;
4829 goto errout;
4830 }
1da177e4 4831
d8d1f30b 4832 skb_dst_set(skb, &rt->dst);
18c3a61c 4833 if (fibmatch)
d4ead6b3 4834 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, iif,
18c3a61c
RP
4835 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4836 nlh->nlmsg_seq, 0);
4837 else
d4ead6b3
DA
4838 err = rt6_fill_node(net, skb, rt, dst, &fl6.daddr, &fl6.saddr,
4839 iif, RTM_NEWROUTE,
4840 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4841 0);
1da177e4 4842 if (err < 0) {
ab364a6f
TG
4843 kfree_skb(skb);
4844 goto errout;
1da177e4
LT
4845 }
4846
15e47304 4847 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4848errout:
1da177e4 4849 return err;
1da177e4
LT
4850}
4851
37a1d361
RP
4852void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4853 unsigned int nlm_flags)
1da177e4
LT
4854{
4855 struct sk_buff *skb;
5578689a 4856 struct net *net = info->nl_net;
528c4ceb
DL
4857 u32 seq;
4858 int err;
4859
4860 err = -ENOBUFS;
38308473 4861 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4862
19e42e45 4863 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4864 if (!skb)
21713ebc
TG
4865 goto errout;
4866
d4ead6b3
DA
4867 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4868 event, info->portid, seq, nlm_flags);
26932566
PM
4869 if (err < 0) {
4870 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4871 WARN_ON(err == -EMSGSIZE);
4872 kfree_skb(skb);
4873 goto errout;
4874 }
15e47304 4875 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4876 info->nlh, gfp_any());
4877 return;
21713ebc
TG
4878errout:
4879 if (err < 0)
5578689a 4880 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4881}
4882
8ed67789 4883static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4884 unsigned long event, void *ptr)
8ed67789 4885{
351638e7 4886 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4887 struct net *net = dev_net(dev);
8ed67789 4888
242d3a49
WC
4889 if (!(dev->flags & IFF_LOOPBACK))
4890 return NOTIFY_OK;
4891
4892 if (event == NETDEV_REGISTER) {
421842ed
DA
4893 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
4894 net->ipv6.fib6_null_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4895 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4896 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4897#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4898 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4899 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4900 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4901 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4902#endif
76da0704
WC
4903 } else if (event == NETDEV_UNREGISTER &&
4904 dev->reg_state != NETREG_UNREGISTERED) {
4905 /* NETDEV_UNREGISTER could be fired for multiple times by
4906 * netdev_wait_allrefs(). Make sure we only call this once.
4907 */
421842ed 4908 in6_dev_put_clear(&net->ipv6.fib6_null_entry->rt6i_idev);
12d94a80 4909 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4910#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4911 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4912 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4913#endif
4914 }
4915
4916 return NOTIFY_OK;
4917}
4918
1da177e4
LT
4919/*
4920 * /proc
4921 */
4922
4923#ifdef CONFIG_PROC_FS
4924
33120b30 4925static const struct file_operations ipv6_route_proc_fops = {
33120b30
AD
4926 .open = ipv6_route_open,
4927 .read = seq_read,
4928 .llseek = seq_lseek,
8d2ca1d7 4929 .release = seq_release_net,
33120b30
AD
4930};
4931
1da177e4
LT
4932static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4933{
69ddb805 4934 struct net *net = (struct net *)seq->private;
1da177e4 4935 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4936 net->ipv6.rt6_stats->fib_nodes,
4937 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 4938 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
4939 net->ipv6.rt6_stats->fib_rt_entries,
4940 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4941 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4942 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4943
4944 return 0;
4945}
4946
4947static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4948{
de05c557 4949 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4950}
4951
9a32144e 4952static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4953 .open = rt6_stats_seq_open,
4954 .read = seq_read,
4955 .llseek = seq_lseek,
b6fcbdb4 4956 .release = single_release_net,
1da177e4
LT
4957};
4958#endif /* CONFIG_PROC_FS */
4959
4960#ifdef CONFIG_SYSCTL
4961
1da177e4 4962static
fe2c6338 4963int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4964 void __user *buffer, size_t *lenp, loff_t *ppos)
4965{
c486da34
LAG
4966 struct net *net;
4967 int delay;
4968 if (!write)
1da177e4 4969 return -EINVAL;
c486da34
LAG
4970
4971 net = (struct net *)ctl->extra1;
4972 delay = net->ipv6.sysctl.flush_delay;
4973 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4974 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4975 return 0;
1da177e4
LT
4976}
4977
fe2c6338 4978struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4979 {
1da177e4 4980 .procname = "flush",
4990509f 4981 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4982 .maxlen = sizeof(int),
89c8b3a1 4983 .mode = 0200,
6d9f239a 4984 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4985 },
4986 {
1da177e4 4987 .procname = "gc_thresh",
9a7ec3a9 4988 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4989 .maxlen = sizeof(int),
4990 .mode = 0644,
6d9f239a 4991 .proc_handler = proc_dointvec,
1da177e4
LT
4992 },
4993 {
1da177e4 4994 .procname = "max_size",
4990509f 4995 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
4996 .maxlen = sizeof(int),
4997 .mode = 0644,
6d9f239a 4998 .proc_handler = proc_dointvec,
1da177e4
LT
4999 },
5000 {
1da177e4 5001 .procname = "gc_min_interval",
4990509f 5002 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5003 .maxlen = sizeof(int),
5004 .mode = 0644,
6d9f239a 5005 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5006 },
5007 {
1da177e4 5008 .procname = "gc_timeout",
4990509f 5009 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5010 .maxlen = sizeof(int),
5011 .mode = 0644,
6d9f239a 5012 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5013 },
5014 {
1da177e4 5015 .procname = "gc_interval",
4990509f 5016 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5017 .maxlen = sizeof(int),
5018 .mode = 0644,
6d9f239a 5019 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5020 },
5021 {
1da177e4 5022 .procname = "gc_elasticity",
4990509f 5023 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5024 .maxlen = sizeof(int),
5025 .mode = 0644,
f3d3f616 5026 .proc_handler = proc_dointvec,
1da177e4
LT
5027 },
5028 {
1da177e4 5029 .procname = "mtu_expires",
4990509f 5030 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5031 .maxlen = sizeof(int),
5032 .mode = 0644,
6d9f239a 5033 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5034 },
5035 {
1da177e4 5036 .procname = "min_adv_mss",
4990509f 5037 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5038 .maxlen = sizeof(int),
5039 .mode = 0644,
f3d3f616 5040 .proc_handler = proc_dointvec,
1da177e4
LT
5041 },
5042 {
1da177e4 5043 .procname = "gc_min_interval_ms",
4990509f 5044 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5045 .maxlen = sizeof(int),
5046 .mode = 0644,
6d9f239a 5047 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5048 },
f8572d8f 5049 { }
1da177e4
LT
5050};
5051
2c8c1e72 5052struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5053{
5054 struct ctl_table *table;
5055
5056 table = kmemdup(ipv6_route_table_template,
5057 sizeof(ipv6_route_table_template),
5058 GFP_KERNEL);
5ee09105
YH
5059
5060 if (table) {
5061 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5062 table[0].extra1 = net;
86393e52 5063 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5064 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5065 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5066 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5067 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5068 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5069 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5070 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5071 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
5072
5073 /* Don't export sysctls to unprivileged users */
5074 if (net->user_ns != &init_user_ns)
5075 table[0].procname = NULL;
5ee09105
YH
5076 }
5077
760f2d01
DL
5078 return table;
5079}
1da177e4
LT
5080#endif
5081
2c8c1e72 5082static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5083{
633d424b 5084 int ret = -ENOMEM;
8ed67789 5085
86393e52
AD
5086 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5087 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5088
fc66f95c
ED
5089 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5090 goto out_ip6_dst_ops;
5091
421842ed
DA
5092 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5093 sizeof(*net->ipv6.fib6_null_entry),
5094 GFP_KERNEL);
5095 if (!net->ipv6.fib6_null_entry)
5096 goto out_ip6_dst_entries;
5097
8ed67789
DL
5098 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5099 sizeof(*net->ipv6.ip6_null_entry),
5100 GFP_KERNEL);
5101 if (!net->ipv6.ip6_null_entry)
421842ed 5102 goto out_fib6_null_entry;
d8d1f30b 5103 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5104 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5105 ip6_template_metrics, true);
8ed67789
DL
5106
5107#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5108 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5109 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5110 sizeof(*net->ipv6.ip6_prohibit_entry),
5111 GFP_KERNEL);
68fffc67
PZ
5112 if (!net->ipv6.ip6_prohibit_entry)
5113 goto out_ip6_null_entry;
d8d1f30b 5114 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5115 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5116 ip6_template_metrics, true);
8ed67789
DL
5117
5118 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5119 sizeof(*net->ipv6.ip6_blk_hole_entry),
5120 GFP_KERNEL);
68fffc67
PZ
5121 if (!net->ipv6.ip6_blk_hole_entry)
5122 goto out_ip6_prohibit_entry;
d8d1f30b 5123 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5124 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5125 ip6_template_metrics, true);
8ed67789
DL
5126#endif
5127
b339a47c
PZ
5128 net->ipv6.sysctl.flush_delay = 0;
5129 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5130 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5131 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5132 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5133 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5134 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5135 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5136
6891a346
BT
5137 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5138
8ed67789
DL
5139 ret = 0;
5140out:
5141 return ret;
f2fc6a54 5142
68fffc67
PZ
5143#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5144out_ip6_prohibit_entry:
5145 kfree(net->ipv6.ip6_prohibit_entry);
5146out_ip6_null_entry:
5147 kfree(net->ipv6.ip6_null_entry);
5148#endif
421842ed
DA
5149out_fib6_null_entry:
5150 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5151out_ip6_dst_entries:
5152 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5153out_ip6_dst_ops:
f2fc6a54 5154 goto out;
cdb18761
DL
5155}
5156
2c8c1e72 5157static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5158{
421842ed 5159 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5160 kfree(net->ipv6.ip6_null_entry);
5161#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5162 kfree(net->ipv6.ip6_prohibit_entry);
5163 kfree(net->ipv6.ip6_blk_hole_entry);
5164#endif
41bb78b4 5165 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5166}
5167
d189634e
TG
5168static int __net_init ip6_route_net_init_late(struct net *net)
5169{
5170#ifdef CONFIG_PROC_FS
d4beaa66 5171 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
d6444062 5172 proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
5173#endif
5174 return 0;
5175}
5176
5177static void __net_exit ip6_route_net_exit_late(struct net *net)
5178{
5179#ifdef CONFIG_PROC_FS
ece31ffd
G
5180 remove_proc_entry("ipv6_route", net->proc_net);
5181 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5182#endif
5183}
5184
cdb18761
DL
5185static struct pernet_operations ip6_route_net_ops = {
5186 .init = ip6_route_net_init,
5187 .exit = ip6_route_net_exit,
5188};
5189
c3426b47
DM
5190static int __net_init ipv6_inetpeer_init(struct net *net)
5191{
5192 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5193
5194 if (!bp)
5195 return -ENOMEM;
5196 inet_peer_base_init(bp);
5197 net->ipv6.peers = bp;
5198 return 0;
5199}
5200
5201static void __net_exit ipv6_inetpeer_exit(struct net *net)
5202{
5203 struct inet_peer_base *bp = net->ipv6.peers;
5204
5205 net->ipv6.peers = NULL;
56a6b248 5206 inetpeer_invalidate_tree(bp);
c3426b47
DM
5207 kfree(bp);
5208}
5209
2b823f72 5210static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5211 .init = ipv6_inetpeer_init,
5212 .exit = ipv6_inetpeer_exit,
5213};
5214
d189634e
TG
5215static struct pernet_operations ip6_route_net_late_ops = {
5216 .init = ip6_route_net_init_late,
5217 .exit = ip6_route_net_exit_late,
5218};
5219
8ed67789
DL
5220static struct notifier_block ip6_route_dev_notifier = {
5221 .notifier_call = ip6_route_dev_notify,
242d3a49 5222 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5223};
5224
2f460933
WC
5225void __init ip6_route_init_special_entries(void)
5226{
5227 /* Registering of the loopback is done before this portion of code,
5228 * the loopback reference in rt6_info will not be taken, do it
5229 * manually for init_net */
421842ed
DA
5230 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
5231 init_net.ipv6.fib6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2f460933
WC
5232 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5233 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5234 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5235 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5236 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5237 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5238 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5239 #endif
5240}
5241
433d49c3 5242int __init ip6_route_init(void)
1da177e4 5243{
433d49c3 5244 int ret;
8d0b94af 5245 int cpu;
433d49c3 5246
9a7ec3a9
DL
5247 ret = -ENOMEM;
5248 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5249 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5250 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5251 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5252 goto out;
14e50e57 5253
fc66f95c 5254 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5255 if (ret)
bdb3289f 5256 goto out_kmem_cache;
bdb3289f 5257
c3426b47
DM
5258 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5259 if (ret)
e8803b6c 5260 goto out_dst_entries;
2a0c451a 5261
7e52b33b
DM
5262 ret = register_pernet_subsys(&ip6_route_net_ops);
5263 if (ret)
5264 goto out_register_inetpeer;
c3426b47 5265
5dc121e9
AE
5266 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5267
e8803b6c 5268 ret = fib6_init();
433d49c3 5269 if (ret)
8ed67789 5270 goto out_register_subsys;
433d49c3 5271
433d49c3
DL
5272 ret = xfrm6_init();
5273 if (ret)
e8803b6c 5274 goto out_fib6_init;
c35b7e72 5275
433d49c3
DL
5276 ret = fib6_rules_init();
5277 if (ret)
5278 goto xfrm6_init;
7e5449c2 5279
d189634e
TG
5280 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5281 if (ret)
5282 goto fib6_rules_init;
5283
16feebcf
FW
5284 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5285 inet6_rtm_newroute, NULL, 0);
5286 if (ret < 0)
5287 goto out_register_late_subsys;
5288
5289 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5290 inet6_rtm_delroute, NULL, 0);
5291 if (ret < 0)
5292 goto out_register_late_subsys;
5293
5294 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5295 inet6_rtm_getroute, NULL,
5296 RTNL_FLAG_DOIT_UNLOCKED);
5297 if (ret < 0)
d189634e 5298 goto out_register_late_subsys;
c127ea2c 5299
8ed67789 5300 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5301 if (ret)
d189634e 5302 goto out_register_late_subsys;
8ed67789 5303
8d0b94af
MKL
5304 for_each_possible_cpu(cpu) {
5305 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5306
5307 INIT_LIST_HEAD(&ul->head);
5308 spin_lock_init(&ul->lock);
5309 }
5310
433d49c3
DL
5311out:
5312 return ret;
5313
d189634e 5314out_register_late_subsys:
16feebcf 5315 rtnl_unregister_all(PF_INET6);
d189634e 5316 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5317fib6_rules_init:
433d49c3
DL
5318 fib6_rules_cleanup();
5319xfrm6_init:
433d49c3 5320 xfrm6_fini();
2a0c451a
TG
5321out_fib6_init:
5322 fib6_gc_cleanup();
8ed67789
DL
5323out_register_subsys:
5324 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5325out_register_inetpeer:
5326 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5327out_dst_entries:
5328 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5329out_kmem_cache:
f2fc6a54 5330 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5331 goto out;
1da177e4
LT
5332}
5333
5334void ip6_route_cleanup(void)
5335{
8ed67789 5336 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5337 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5338 fib6_rules_cleanup();
1da177e4 5339 xfrm6_fini();
1da177e4 5340 fib6_gc_cleanup();
c3426b47 5341 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5342 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5343 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5344 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5345}