net/ipv6: separate handling of FIB entries from dst based routes
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
52bd4c0c 99static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3 100static size_t rt6_nlmsg_size(struct rt6_info *rt);
d4ead6b3
DA
101static int rt6_fill_node(struct net *net, struct sk_buff *skb,
102 struct rt6_info *rt, struct dst_entry *dst,
103 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
104 int iif, int type, u32 portid, u32 seq,
105 unsigned int flags);
35732d01
WW
106static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
107 struct in6_addr *daddr,
108 struct in6_addr *saddr);
1da177e4 109
70ceb4f5 110#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 111static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 112 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
113 const struct in6_addr *gwaddr,
114 struct net_device *dev,
95c96174 115 unsigned int pref);
efa2cea0 116static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 117 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
118 const struct in6_addr *gwaddr,
119 struct net_device *dev);
70ceb4f5
YH
120#endif
121
8d0b94af
MKL
122struct uncached_list {
123 spinlock_t lock;
124 struct list_head head;
125};
126
127static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
128
510c321b 129void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
130{
131 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
132
8d0b94af
MKL
133 rt->rt6i_uncached_list = ul;
134
135 spin_lock_bh(&ul->lock);
136 list_add_tail(&rt->rt6i_uncached, &ul->head);
137 spin_unlock_bh(&ul->lock);
138}
139
510c321b 140void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
141{
142 if (!list_empty(&rt->rt6i_uncached)) {
143 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 144 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
145
146 spin_lock_bh(&ul->lock);
147 list_del(&rt->rt6i_uncached);
81eb8447 148 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
149 spin_unlock_bh(&ul->lock);
150 }
151}
152
153static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154{
155 struct net_device *loopback_dev = net->loopback_dev;
156 int cpu;
157
e332bc67
EB
158 if (dev == loopback_dev)
159 return;
160
8d0b94af
MKL
161 for_each_possible_cpu(cpu) {
162 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 struct rt6_info *rt;
164
165 spin_lock_bh(&ul->lock);
166 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 struct inet6_dev *rt_idev = rt->rt6i_idev;
168 struct net_device *rt_dev = rt->dst.dev;
169
e332bc67 170 if (rt_idev->dev == dev) {
8d0b94af
MKL
171 rt->rt6i_idev = in6_dev_get(loopback_dev);
172 in6_dev_put(rt_idev);
173 }
174
e332bc67 175 if (rt_dev == dev) {
8d0b94af
MKL
176 rt->dst.dev = loopback_dev;
177 dev_hold(rt->dst.dev);
178 dev_put(rt_dev);
179 }
180 }
181 spin_unlock_bh(&ul->lock);
182 }
183}
184
f8a1b43b 185static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
186 struct sk_buff *skb,
187 const void *daddr)
39232973 188{
a7563f34 189 if (!ipv6_addr_any(p))
39232973 190 return (const void *) p;
f894cbf8
DM
191 else if (skb)
192 return &ipv6_hdr(skb)->daddr;
39232973
DM
193 return daddr;
194}
195
f8a1b43b
DA
196struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
197 struct net_device *dev,
198 struct sk_buff *skb,
199 const void *daddr)
d3aaeb38 200{
39232973
DM
201 struct neighbour *n;
202
f8a1b43b
DA
203 daddr = choose_neigh_daddr(gw, skb, daddr);
204 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
205 if (n)
206 return n;
f8a1b43b
DA
207 return neigh_create(&nd_tbl, daddr, dev);
208}
209
210static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
211 struct sk_buff *skb,
212 const void *daddr)
213{
214 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
215
216 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
217}
218
63fca65d
JA
219static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
220{
221 struct net_device *dev = dst->dev;
222 struct rt6_info *rt = (struct rt6_info *)dst;
223
f8a1b43b 224 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
225 if (!daddr)
226 return;
227 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
228 return;
229 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
230 return;
231 __ipv6_confirm_neigh(dev, daddr);
232}
233
9a7ec3a9 234static struct dst_ops ip6_dst_ops_template = {
1da177e4 235 .family = AF_INET6,
1da177e4
LT
236 .gc = ip6_dst_gc,
237 .gc_thresh = 1024,
238 .check = ip6_dst_check,
0dbaee3b 239 .default_advmss = ip6_default_advmss,
ebb762f2 240 .mtu = ip6_mtu,
d4ead6b3 241 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
242 .destroy = ip6_dst_destroy,
243 .ifdown = ip6_dst_ifdown,
244 .negative_advice = ip6_negative_advice,
245 .link_failure = ip6_link_failure,
246 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 247 .redirect = rt6_do_redirect,
9f8955cc 248 .local_out = __ip6_local_out,
f8a1b43b 249 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 250 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
251};
252
ebb762f2 253static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 254{
618f9bc7
SK
255 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
256
257 return mtu ? : dst->dev->mtu;
ec831ea7
RD
258}
259
6700c270
DM
260static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
261 struct sk_buff *skb, u32 mtu)
14e50e57
DM
262{
263}
264
6700c270
DM
265static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
266 struct sk_buff *skb)
b587ee3b
DM
267{
268}
269
14e50e57
DM
270static struct dst_ops ip6_dst_blackhole_ops = {
271 .family = AF_INET6,
14e50e57
DM
272 .destroy = ip6_dst_destroy,
273 .check = ip6_dst_check,
ebb762f2 274 .mtu = ip6_blackhole_mtu,
214f45c9 275 .default_advmss = ip6_default_advmss,
14e50e57 276 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 277 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 278 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 279 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
280};
281
62fa8a84 282static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 283 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
284};
285
421842ed
DA
286static const struct rt6_info fib6_null_entry_template = {
287 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
288 .rt6i_protocol = RTPROT_KERNEL,
289 .rt6i_metric = ~(u32)0,
290 .rt6i_ref = ATOMIC_INIT(1),
291 .fib6_type = RTN_UNREACHABLE,
292 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
293};
294
fb0af4c7 295static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
296 .dst = {
297 .__refcnt = ATOMIC_INIT(1),
298 .__use = 1,
2c20cbd7 299 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 300 .error = -ENETUNREACH,
d8d1f30b
CG
301 .input = ip6_pkt_discard,
302 .output = ip6_pkt_discard_out,
1da177e4
LT
303 },
304 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 305 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
306 .rt6i_metric = ~(u32) 0,
307 .rt6i_ref = ATOMIC_INIT(1),
e8478e80 308 .fib6_type = RTN_UNREACHABLE,
1da177e4
LT
309};
310
101367c2
TG
311#ifdef CONFIG_IPV6_MULTIPLE_TABLES
312
fb0af4c7 313static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
314 .dst = {
315 .__refcnt = ATOMIC_INIT(1),
316 .__use = 1,
2c20cbd7 317 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 318 .error = -EACCES,
d8d1f30b
CG
319 .input = ip6_pkt_prohibit,
320 .output = ip6_pkt_prohibit_out,
101367c2
TG
321 },
322 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 323 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
324 .rt6i_metric = ~(u32) 0,
325 .rt6i_ref = ATOMIC_INIT(1),
e8478e80 326 .fib6_type = RTN_PROHIBIT,
101367c2
TG
327};
328
fb0af4c7 329static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
330 .dst = {
331 .__refcnt = ATOMIC_INIT(1),
332 .__use = 1,
2c20cbd7 333 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 334 .error = -EINVAL,
d8d1f30b 335 .input = dst_discard,
ede2059d 336 .output = dst_discard_out,
101367c2
TG
337 },
338 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 339 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
340 .rt6i_metric = ~(u32) 0,
341 .rt6i_ref = ATOMIC_INIT(1),
e8478e80 342 .fib6_type = RTN_BLACKHOLE,
101367c2
TG
343};
344
345#endif
346
ebfa45f0
MKL
347static void rt6_info_init(struct rt6_info *rt)
348{
349 struct dst_entry *dst = &rt->dst;
350
351 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
352 INIT_LIST_HEAD(&rt->rt6i_siblings);
353 INIT_LIST_HEAD(&rt->rt6i_uncached);
354}
355
1da177e4 356/* allocate dst with ip6_dst_ops */
93531c67
DA
357struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
358 int flags)
1da177e4 359{
97bab73f 360 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 361 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 362
81eb8447 363 if (rt) {
ebfa45f0 364 rt6_info_init(rt);
81eb8447
WW
365 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
366 }
8104891b 367
cf911662 368 return rt;
1da177e4 369}
9ab179d8 370EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 371
1da177e4
LT
372static void ip6_dst_destroy(struct dst_entry *dst)
373{
374 struct rt6_info *rt = (struct rt6_info *)dst;
3a2232e9 375 struct rt6_info *from = rt->from;
8d0b94af 376 struct inet6_dev *idev;
1da177e4 377
4b32b5ad 378 dst_destroy_metrics_generic(dst);
8d0b94af
MKL
379 rt6_uncached_list_del(rt);
380
381 idev = rt->rt6i_idev;
38308473 382 if (idev) {
1da177e4
LT
383 rt->rt6i_idev = NULL;
384 in6_dev_put(idev);
1ab1457c 385 }
d4ead6b3 386
3a2232e9 387 rt->from = NULL;
93531c67 388 fib6_info_release(from);
b3419363
DM
389}
390
1da177e4
LT
391static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
392 int how)
393{
394 struct rt6_info *rt = (struct rt6_info *)dst;
395 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 396 struct net_device *loopback_dev =
c346dca1 397 dev_net(dev)->loopback_dev;
1da177e4 398
e5645f51
WW
399 if (idev && idev->dev != loopback_dev) {
400 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
401 if (loopback_idev) {
402 rt->rt6i_idev = loopback_idev;
403 in6_dev_put(idev);
97cac082 404 }
1da177e4
LT
405 }
406}
407
5973fb1e
MKL
408static bool __rt6_check_expired(const struct rt6_info *rt)
409{
410 if (rt->rt6i_flags & RTF_EXPIRES)
411 return time_after(jiffies, rt->dst.expires);
412 else
413 return false;
414}
415
a50feda5 416static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 417{
1716a961
G
418 if (rt->rt6i_flags & RTF_EXPIRES) {
419 if (time_after(jiffies, rt->dst.expires))
a50feda5 420 return true;
3a2232e9 421 } else if (rt->from) {
1e2ea8ad 422 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
14895687 423 fib6_check_expired(rt->from);
1716a961 424 }
a50feda5 425 return false;
1da177e4
LT
426}
427
b4bac172
DA
428static struct rt6_info *rt6_multipath_select(const struct net *net,
429 struct rt6_info *match,
52bd4c0c 430 struct flowi6 *fl6, int oif,
b75cc8f9 431 const struct sk_buff *skb,
52bd4c0c 432 int strict)
51ebd318
ND
433{
434 struct rt6_info *sibling, *next_sibling;
51ebd318 435
b673d6cc
JS
436 /* We might have already computed the hash for ICMPv6 errors. In such
437 * case it will always be non-zero. Otherwise now is the time to do it.
438 */
439 if (!fl6->mp_hash)
b4bac172 440 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 441
5e670d84 442 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
3d709f69
IS
443 return match;
444
445 list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
446 rt6i_siblings) {
5e670d84
DA
447 int nh_upper_bound;
448
449 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
450 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
451 continue;
452 if (rt6_score_route(sibling, oif, strict) < 0)
453 break;
454 match = sibling;
455 break;
456 }
457
51ebd318
ND
458 return match;
459}
460
1da177e4 461/*
66f5d6ce 462 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
463 */
464
8ed67789
DL
465static inline struct rt6_info *rt6_device_match(struct net *net,
466 struct rt6_info *rt,
b71d1d42 467 const struct in6_addr *saddr,
1da177e4 468 int oif,
d420895e 469 int flags)
1da177e4
LT
470{
471 struct rt6_info *local = NULL;
472 struct rt6_info *sprt;
473
5e670d84
DA
474 if (!oif && ipv6_addr_any(saddr) &&
475 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
8067bb8c 476 return rt;
dd3abc4e 477
071fb37e 478 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
5e670d84 479 const struct net_device *dev = sprt->fib6_nh.nh_dev;
dd3abc4e 480
5e670d84 481 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
482 continue;
483
dd3abc4e 484 if (oif) {
1da177e4
LT
485 if (dev->ifindex == oif)
486 return sprt;
487 if (dev->flags & IFF_LOOPBACK) {
38308473 488 if (!sprt->rt6i_idev ||
1da177e4 489 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 490 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 491 continue;
17fb0b2b
DA
492 if (local &&
493 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
494 continue;
495 }
496 local = sprt;
497 }
dd3abc4e
YH
498 } else {
499 if (ipv6_chk_addr(net, saddr, dev,
500 flags & RT6_LOOKUP_F_IFACE))
501 return sprt;
1da177e4 502 }
dd3abc4e 503 }
1da177e4 504
dd3abc4e 505 if (oif) {
1da177e4
LT
506 if (local)
507 return local;
508
d420895e 509 if (flags & RT6_LOOKUP_F_IFACE)
421842ed 510 return net->ipv6.fib6_null_entry;
1da177e4 511 }
8067bb8c 512
421842ed 513 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
514}
515
27097255 516#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
517struct __rt6_probe_work {
518 struct work_struct work;
519 struct in6_addr target;
520 struct net_device *dev;
521};
522
523static void rt6_probe_deferred(struct work_struct *w)
524{
525 struct in6_addr mcaddr;
526 struct __rt6_probe_work *work =
527 container_of(w, struct __rt6_probe_work, work);
528
529 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 530 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 531 dev_put(work->dev);
662f5533 532 kfree(work);
c2f17e82
HFS
533}
534
27097255
YH
535static void rt6_probe(struct rt6_info *rt)
536{
990edb42 537 struct __rt6_probe_work *work;
5e670d84 538 const struct in6_addr *nh_gw;
f2c31e32 539 struct neighbour *neigh;
5e670d84
DA
540 struct net_device *dev;
541
27097255
YH
542 /*
543 * Okay, this does not seem to be appropriate
544 * for now, however, we need to check if it
545 * is really so; aka Router Reachability Probing.
546 *
547 * Router Reachability Probe MUST be rate-limited
548 * to no more than one per minute.
549 */
2152caea 550 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 551 return;
5e670d84
DA
552
553 nh_gw = &rt->fib6_nh.nh_gw;
554 dev = rt->fib6_nh.nh_dev;
2152caea 555 rcu_read_lock_bh();
5e670d84 556 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 557 if (neigh) {
8d6c31bf
MKL
558 if (neigh->nud_state & NUD_VALID)
559 goto out;
560
990edb42 561 work = NULL;
2152caea 562 write_lock(&neigh->lock);
990edb42
MKL
563 if (!(neigh->nud_state & NUD_VALID) &&
564 time_after(jiffies,
565 neigh->updated +
566 rt->rt6i_idev->cnf.rtr_probe_interval)) {
567 work = kmalloc(sizeof(*work), GFP_ATOMIC);
568 if (work)
569 __neigh_set_probe_once(neigh);
c2f17e82 570 }
2152caea 571 write_unlock(&neigh->lock);
990edb42
MKL
572 } else {
573 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 574 }
990edb42
MKL
575
576 if (work) {
577 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
578 work->target = *nh_gw;
579 dev_hold(dev);
580 work->dev = dev;
990edb42
MKL
581 schedule_work(&work->work);
582 }
583
8d6c31bf 584out:
2152caea 585 rcu_read_unlock_bh();
27097255
YH
586}
587#else
588static inline void rt6_probe(struct rt6_info *rt)
589{
27097255
YH
590}
591#endif
592
1da177e4 593/*
554cfb7e 594 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 595 */
b6f99a21 596static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 597{
5e670d84
DA
598 const struct net_device *dev = rt->fib6_nh.nh_dev;
599
161980f4 600 if (!oif || dev->ifindex == oif)
554cfb7e 601 return 2;
161980f4
DM
602 if ((dev->flags & IFF_LOOPBACK) &&
603 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
604 return 1;
605 return 0;
554cfb7e 606}
1da177e4 607
afc154e9 608static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 609{
afc154e9 610 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 611 struct neighbour *neigh;
f2c31e32 612
4d0c5911
YH
613 if (rt->rt6i_flags & RTF_NONEXTHOP ||
614 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 615 return RT6_NUD_SUCCEED;
145a3621
YH
616
617 rcu_read_lock_bh();
5e670d84
DA
618 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
619 &rt->fib6_nh.nh_gw);
145a3621
YH
620 if (neigh) {
621 read_lock(&neigh->lock);
554cfb7e 622 if (neigh->nud_state & NUD_VALID)
afc154e9 623 ret = RT6_NUD_SUCCEED;
398bcbeb 624#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 625 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 626 ret = RT6_NUD_SUCCEED;
7e980569
JB
627 else
628 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 629#endif
145a3621 630 read_unlock(&neigh->lock);
afc154e9
HFS
631 } else {
632 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 633 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 634 }
145a3621
YH
635 rcu_read_unlock_bh();
636
a5a81f0b 637 return ret;
1da177e4
LT
638}
639
554cfb7e
YH
640static int rt6_score_route(struct rt6_info *rt, int oif,
641 int strict)
1da177e4 642{
a5a81f0b 643 int m;
1ab1457c 644
4d0c5911 645 m = rt6_check_dev(rt, oif);
77d16f45 646 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 647 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
648#ifdef CONFIG_IPV6_ROUTER_PREF
649 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
650#endif
afc154e9
HFS
651 if (strict & RT6_LOOKUP_F_REACHABLE) {
652 int n = rt6_check_neigh(rt);
653 if (n < 0)
654 return n;
655 }
554cfb7e
YH
656 return m;
657}
658
f11e6659 659static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
660 int *mpri, struct rt6_info *match,
661 bool *do_rr)
554cfb7e 662{
f11e6659 663 int m;
afc154e9 664 bool match_do_rr = false;
35103d11 665 struct inet6_dev *idev = rt->rt6i_idev;
35103d11 666
5e670d84 667 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
668 goto out;
669
14c5206c 670 if (idev->cnf.ignore_routes_with_linkdown &&
5e670d84 671 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 672 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 673 goto out;
f11e6659 674
14895687 675 if (fib6_check_expired(rt))
f11e6659
DM
676 goto out;
677
678 m = rt6_score_route(rt, oif, strict);
7e980569 679 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
680 match_do_rr = true;
681 m = 0; /* lowest valid score */
7e980569 682 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 683 goto out;
afc154e9
HFS
684 }
685
686 if (strict & RT6_LOOKUP_F_REACHABLE)
687 rt6_probe(rt);
f11e6659 688
7e980569 689 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 690 if (m > *mpri) {
afc154e9 691 *do_rr = match_do_rr;
f11e6659
DM
692 *mpri = m;
693 match = rt;
f11e6659 694 }
f11e6659
DM
695out:
696 return match;
697}
698
699static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
8d1040e8 700 struct rt6_info *leaf,
f11e6659 701 struct rt6_info *rr_head,
afc154e9
HFS
702 u32 metric, int oif, int strict,
703 bool *do_rr)
f11e6659 704{
9fbdcfaf 705 struct rt6_info *rt, *match, *cont;
554cfb7e 706 int mpri = -1;
1da177e4 707
f11e6659 708 match = NULL;
9fbdcfaf 709 cont = NULL;
071fb37e 710 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
711 if (rt->rt6i_metric != metric) {
712 cont = rt;
713 break;
714 }
715
716 match = find_match(rt, oif, strict, &mpri, match, do_rr);
717 }
718
66f5d6ce 719 for (rt = leaf; rt && rt != rr_head;
071fb37e 720 rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
721 if (rt->rt6i_metric != metric) {
722 cont = rt;
723 break;
724 }
725
afc154e9 726 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
727 }
728
729 if (match || !cont)
730 return match;
731
071fb37e 732 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
afc154e9 733 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 734
f11e6659
DM
735 return match;
736}
1da177e4 737
8d1040e8
WW
738static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
739 int oif, int strict)
f11e6659 740{
66f5d6ce 741 struct rt6_info *leaf = rcu_dereference(fn->leaf);
f11e6659 742 struct rt6_info *match, *rt0;
afc154e9 743 bool do_rr = false;
17ecf590 744 int key_plen;
1da177e4 745
421842ed
DA
746 if (!leaf || leaf == net->ipv6.fib6_null_entry)
747 return net->ipv6.fib6_null_entry;
8d1040e8 748
66f5d6ce 749 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 750 if (!rt0)
66f5d6ce 751 rt0 = leaf;
1da177e4 752
17ecf590
WW
753 /* Double check to make sure fn is not an intermediate node
754 * and fn->leaf does not points to its child's leaf
755 * (This might happen if all routes under fn are deleted from
756 * the tree and fib6_repair_tree() is called on the node.)
757 */
758 key_plen = rt0->rt6i_dst.plen;
759#ifdef CONFIG_IPV6_SUBTREES
760 if (rt0->rt6i_src.plen)
761 key_plen = rt0->rt6i_src.plen;
762#endif
763 if (fn->fn_bit != key_plen)
421842ed 764 return net->ipv6.fib6_null_entry;
17ecf590 765
8d1040e8 766 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
afc154e9 767 &do_rr);
1da177e4 768
afc154e9 769 if (do_rr) {
071fb37e 770 struct rt6_info *next = rcu_dereference(rt0->rt6_next);
f11e6659 771
554cfb7e 772 /* no entries matched; do round-robin */
f11e6659 773 if (!next || next->rt6i_metric != rt0->rt6i_metric)
8d1040e8 774 next = leaf;
f11e6659 775
66f5d6ce
WW
776 if (next != rt0) {
777 spin_lock_bh(&leaf->rt6i_table->tb6_lock);
778 /* make sure next is not being deleted from the tree */
779 if (next->rt6i_node)
780 rcu_assign_pointer(fn->rr_ptr, next);
781 spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
782 }
1da177e4 783 }
1da177e4 784
421842ed 785 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
786}
787
8b9df265
MKL
788static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
789{
790 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
791}
792
70ceb4f5
YH
793#ifdef CONFIG_IPV6_ROUTE_INFO
794int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 795 const struct in6_addr *gwaddr)
70ceb4f5 796{
c346dca1 797 struct net *net = dev_net(dev);
70ceb4f5
YH
798 struct route_info *rinfo = (struct route_info *) opt;
799 struct in6_addr prefix_buf, *prefix;
800 unsigned int pref;
4bed72e4 801 unsigned long lifetime;
70ceb4f5
YH
802 struct rt6_info *rt;
803
804 if (len < sizeof(struct route_info)) {
805 return -EINVAL;
806 }
807
808 /* Sanity check for prefix_len and length */
809 if (rinfo->length > 3) {
810 return -EINVAL;
811 } else if (rinfo->prefix_len > 128) {
812 return -EINVAL;
813 } else if (rinfo->prefix_len > 64) {
814 if (rinfo->length < 2) {
815 return -EINVAL;
816 }
817 } else if (rinfo->prefix_len > 0) {
818 if (rinfo->length < 1) {
819 return -EINVAL;
820 }
821 }
822
823 pref = rinfo->route_pref;
824 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 825 return -EINVAL;
70ceb4f5 826
4bed72e4 827 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
828
829 if (rinfo->length == 3)
830 prefix = (struct in6_addr *)rinfo->prefix;
831 else {
832 /* this function is safe */
833 ipv6_addr_prefix(&prefix_buf,
834 (struct in6_addr *)rinfo->prefix,
835 rinfo->prefix_len);
836 prefix = &prefix_buf;
837 }
838
f104a567 839 if (rinfo->prefix_len == 0)
afb1d4b5 840 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
841 else
842 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 843 gwaddr, dev);
70ceb4f5
YH
844
845 if (rt && !lifetime) {
afb1d4b5 846 ip6_del_rt(net, rt);
70ceb4f5
YH
847 rt = NULL;
848 }
849
850 if (!rt && lifetime)
830218c1
DA
851 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
852 dev, pref);
70ceb4f5
YH
853 else if (rt)
854 rt->rt6i_flags = RTF_ROUTEINFO |
855 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
856
857 if (rt) {
1716a961 858 if (!addrconf_finite_timeout(lifetime))
14895687 859 fib6_clean_expires(rt);
1716a961 860 else
14895687 861 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 862
93531c67 863 fib6_info_release(rt);
70ceb4f5
YH
864 }
865 return 0;
866}
867#endif
868
ae90d867
DA
869/*
870 * Misc support functions
871 */
872
873/* called with rcu_lock held */
874static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
875{
5e670d84 876 struct net_device *dev = rt->fib6_nh.nh_dev;
ae90d867
DA
877
878 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
879 /* for copies of local routes, dst->dev needs to be the
880 * device if it is a master device, the master device if
881 * device is enslaved, and the loopback as the default
882 */
883 if (netif_is_l3_slave(dev) &&
884 !rt6_need_strict(&rt->rt6i_dst.addr))
885 dev = l3mdev_master_dev_rcu(dev);
886 else if (!netif_is_l3_master(dev))
887 dev = dev_net(dev)->loopback_dev;
888 /* last case is netif_is_l3_master(dev) is true in which
889 * case we want dev returned to be dev
890 */
891 }
892
893 return dev;
894}
895
6edb3c96
DA
896static const int fib6_prop[RTN_MAX + 1] = {
897 [RTN_UNSPEC] = 0,
898 [RTN_UNICAST] = 0,
899 [RTN_LOCAL] = 0,
900 [RTN_BROADCAST] = 0,
901 [RTN_ANYCAST] = 0,
902 [RTN_MULTICAST] = 0,
903 [RTN_BLACKHOLE] = -EINVAL,
904 [RTN_UNREACHABLE] = -EHOSTUNREACH,
905 [RTN_PROHIBIT] = -EACCES,
906 [RTN_THROW] = -EAGAIN,
907 [RTN_NAT] = -EINVAL,
908 [RTN_XRESOLVE] = -EINVAL,
909};
910
911static int ip6_rt_type_to_error(u8 fib6_type)
912{
913 return fib6_prop[fib6_type];
914}
915
3b6761d1
DA
916static unsigned short fib6_info_dst_flags(struct rt6_info *rt)
917{
918 unsigned short flags = 0;
919
920 if (rt->dst_nocount)
921 flags |= DST_NOCOUNT;
922 if (rt->dst_nopolicy)
923 flags |= DST_NOPOLICY;
924 if (rt->dst_host)
925 flags |= DST_HOST;
926
927 return flags;
928}
929
6edb3c96
DA
930static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct rt6_info *ort)
931{
932 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
933
934 switch (ort->fib6_type) {
935 case RTN_BLACKHOLE:
936 rt->dst.output = dst_discard_out;
937 rt->dst.input = dst_discard;
938 break;
939 case RTN_PROHIBIT:
940 rt->dst.output = ip6_pkt_prohibit_out;
941 rt->dst.input = ip6_pkt_prohibit;
942 break;
943 case RTN_THROW:
944 case RTN_UNREACHABLE:
945 default:
946 rt->dst.output = ip6_pkt_discard_out;
947 rt->dst.input = ip6_pkt_discard;
948 break;
949 }
950}
951
952static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
953{
3b6761d1
DA
954 rt->dst.flags |= fib6_info_dst_flags(ort);
955
6edb3c96
DA
956 if (ort->rt6i_flags & RTF_REJECT) {
957 ip6_rt_init_dst_reject(rt, ort);
958 return;
959 }
960
961 rt->dst.error = 0;
962 rt->dst.output = ip6_output;
963
964 if (ort->fib6_type == RTN_LOCAL) {
6edb3c96
DA
965 rt->dst.input = ip6_input;
966 } else if (ipv6_addr_type(&ort->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) {
967 rt->dst.input = ip6_mc_input;
968 } else {
969 rt->dst.input = ip6_forward;
970 }
971
972 if (ort->fib6_nh.nh_lwtstate) {
973 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
974 lwtunnel_set_redirect(&rt->dst);
975 }
976
977 rt->dst.lastuse = jiffies;
978}
979
ae90d867
DA
980static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
981{
ae90d867 982 rt->rt6i_flags &= ~RTF_EXPIRES;
93531c67
DA
983 fib6_info_hold(from);
984 rt->from = from;
d4ead6b3
DA
985 dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
986 if (from->fib6_metrics != &dst_default_metrics) {
987 rt->dst._metrics |= DST_METRICS_REFCOUNTED;
988 refcount_inc(&from->fib6_metrics->refcnt);
989 }
ae90d867
DA
990}
991
992static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
993{
6edb3c96
DA
994 ip6_rt_init_dst(rt, ort);
995
ae90d867 996 rt->rt6i_dst = ort->rt6i_dst;
ae90d867
DA
997 rt->rt6i_idev = ort->rt6i_idev;
998 if (rt->rt6i_idev)
999 in6_dev_hold(rt->rt6i_idev);
5e670d84 1000 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
ae90d867
DA
1001 rt->rt6i_flags = ort->rt6i_flags;
1002 rt6_set_from(rt, ort);
1003 rt->rt6i_metric = ort->rt6i_metric;
1004#ifdef CONFIG_IPV6_SUBTREES
1005 rt->rt6i_src = ort->rt6i_src;
1006#endif
1007 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
1008 rt->rt6i_table = ort->rt6i_table;
5e670d84 1009 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
ae90d867
DA
1010}
1011
a3c00e46
MKL
1012static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1013 struct in6_addr *saddr)
1014{
66f5d6ce 1015 struct fib6_node *pn, *sn;
a3c00e46
MKL
1016 while (1) {
1017 if (fn->fn_flags & RTN_TL_ROOT)
1018 return NULL;
66f5d6ce
WW
1019 pn = rcu_dereference(fn->parent);
1020 sn = FIB6_SUBTREE(pn);
1021 if (sn && sn != fn)
1022 fn = fib6_lookup(sn, NULL, saddr);
a3c00e46
MKL
1023 else
1024 fn = pn;
1025 if (fn->fn_flags & RTN_RTINFO)
1026 return fn;
1027 }
1028}
c71099ac 1029
d3843fe5
WW
1030static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1031 bool null_fallback)
1032{
1033 struct rt6_info *rt = *prt;
1034
1035 if (dst_hold_safe(&rt->dst))
1036 return true;
1037 if (null_fallback) {
1038 rt = net->ipv6.ip6_null_entry;
1039 dst_hold(&rt->dst);
1040 } else {
1041 rt = NULL;
1042 }
1043 *prt = rt;
1044 return false;
1045}
1046
dec9b0e2
DA
1047/* called with rcu_lock held */
1048static struct rt6_info *ip6_create_rt_rcu(struct rt6_info *rt)
1049{
3b6761d1 1050 unsigned short flags = fib6_info_dst_flags(rt);
dec9b0e2
DA
1051 struct net_device *dev = rt->fib6_nh.nh_dev;
1052 struct rt6_info *nrt;
1053
93531c67 1054 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
dec9b0e2
DA
1055 if (nrt)
1056 ip6_rt_copy_init(nrt, rt);
1057
1058 return nrt;
1059}
1060
8ed67789
DL
1061static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1062 struct fib6_table *table,
b75cc8f9
DA
1063 struct flowi6 *fl6,
1064 const struct sk_buff *skb,
1065 int flags)
1da177e4 1066{
23fb93a4 1067 struct rt6_info *f6i;
1da177e4 1068 struct fib6_node *fn;
23fb93a4 1069 struct rt6_info *rt;
1da177e4 1070
b6cdbc85
DA
1071 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1072 flags &= ~RT6_LOOKUP_F_IFACE;
1073
66f5d6ce 1074 rcu_read_lock();
4c9483b2 1075 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1076restart:
23fb93a4
DA
1077 f6i = rcu_dereference(fn->leaf);
1078 if (!f6i) {
1079 f6i = net->ipv6.fib6_null_entry;
66f5d6ce 1080 } else {
23fb93a4 1081 f6i = rt6_device_match(net, f6i, &fl6->saddr,
66f5d6ce 1082 fl6->flowi6_oif, flags);
23fb93a4
DA
1083 if (f6i->rt6i_nsiblings && fl6->flowi6_oif == 0)
1084 f6i = rt6_multipath_select(net, f6i, fl6,
1085 fl6->flowi6_oif, skb, flags);
66f5d6ce 1086 }
23fb93a4 1087 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1088 fn = fib6_backtrack(fn, &fl6->saddr);
1089 if (fn)
1090 goto restart;
1091 }
23fb93a4 1092
2b760fcf 1093 /* Search through exception table */
23fb93a4
DA
1094 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1095 if (rt) {
dec9b0e2
DA
1096 if (ip6_hold_safe(net, &rt, true))
1097 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1098 } else if (f6i == net->ipv6.fib6_null_entry) {
dec9b0e2
DA
1099 rt = net->ipv6.ip6_null_entry;
1100 dst_hold(&rt->dst);
23fb93a4
DA
1101 } else {
1102 rt = ip6_create_rt_rcu(f6i);
1103 if (!rt) {
1104 rt = net->ipv6.ip6_null_entry;
1105 dst_hold(&rt->dst);
1106 }
dec9b0e2 1107 }
d3843fe5 1108
66f5d6ce 1109 rcu_read_unlock();
b811580d 1110
b65f164d 1111 trace_fib6_table_lookup(net, rt, table, fl6);
b811580d 1112
c71099ac 1113 return rt;
c71099ac
TG
1114}
1115
67ba4152 1116struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1117 const struct sk_buff *skb, int flags)
ea6e574e 1118{
b75cc8f9 1119 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1120}
1121EXPORT_SYMBOL_GPL(ip6_route_lookup);
1122
9acd9f3a 1123struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1124 const struct in6_addr *saddr, int oif,
1125 const struct sk_buff *skb, int strict)
c71099ac 1126{
4c9483b2
DM
1127 struct flowi6 fl6 = {
1128 .flowi6_oif = oif,
1129 .daddr = *daddr,
c71099ac
TG
1130 };
1131 struct dst_entry *dst;
77d16f45 1132 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1133
adaa70bb 1134 if (saddr) {
4c9483b2 1135 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1136 flags |= RT6_LOOKUP_F_HAS_SADDR;
1137 }
1138
b75cc8f9 1139 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1140 if (dst->error == 0)
1141 return (struct rt6_info *) dst;
1142
1143 dst_release(dst);
1144
1da177e4
LT
1145 return NULL;
1146}
7159039a
YH
1147EXPORT_SYMBOL(rt6_lookup);
1148
c71099ac 1149/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1150 * It takes new route entry, the addition fails by any reason the
1151 * route is released.
1152 * Caller must hold dst before calling it.
1da177e4
LT
1153 */
1154
e5fd387a 1155static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301 1156 struct netlink_ext_ack *extack)
1da177e4
LT
1157{
1158 int err;
c71099ac 1159 struct fib6_table *table;
1da177e4 1160
c71099ac 1161 table = rt->rt6i_table;
66f5d6ce 1162 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1163 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1164 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1165
1166 return err;
1167}
1168
afb1d4b5 1169int ip6_ins_rt(struct net *net, struct rt6_info *rt)
40e22e8f 1170{
afb1d4b5 1171 struct nl_info info = { .nl_net = net, };
e715b6d3 1172
d4ead6b3 1173 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1174}
1175
8b9df265
MKL
1176static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1177 const struct in6_addr *daddr,
1178 const struct in6_addr *saddr)
1da177e4 1179{
4832c30d 1180 struct net_device *dev;
1da177e4
LT
1181 struct rt6_info *rt;
1182
1183 /*
1184 * Clone the route.
1185 */
1186
4832c30d
DA
1187 rcu_read_lock();
1188 dev = ip6_rt_get_dev_rcu(ort);
93531c67 1189 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
4832c30d 1190 rcu_read_unlock();
83a09abd
MKL
1191 if (!rt)
1192 return NULL;
1193
1194 ip6_rt_copy_init(rt, ort);
1195 rt->rt6i_flags |= RTF_CACHE;
1196 rt->rt6i_metric = 0;
1197 rt->dst.flags |= DST_HOST;
1198 rt->rt6i_dst.addr = *daddr;
1199 rt->rt6i_dst.plen = 128;
1da177e4 1200
83a09abd
MKL
1201 if (!rt6_is_gw_or_nonexthop(ort)) {
1202 if (ort->rt6i_dst.plen != 128 &&
1203 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1204 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1205#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1206 if (rt->rt6i_src.plen && saddr) {
1207 rt->rt6i_src.addr = *saddr;
1208 rt->rt6i_src.plen = 128;
8b9df265 1209 }
83a09abd 1210#endif
95a9a5ba 1211 }
1da177e4 1212
95a9a5ba
YH
1213 return rt;
1214}
1da177e4 1215
d52d3997
MKL
1216static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1217{
3b6761d1 1218 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1219 struct net_device *dev;
d52d3997
MKL
1220 struct rt6_info *pcpu_rt;
1221
4832c30d
DA
1222 rcu_read_lock();
1223 dev = ip6_rt_get_dev_rcu(rt);
93531c67 1224 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1225 rcu_read_unlock();
d52d3997
MKL
1226 if (!pcpu_rt)
1227 return NULL;
1228 ip6_rt_copy_init(pcpu_rt, rt);
1229 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1230 pcpu_rt->rt6i_flags |= RTF_PCPU;
1231 return pcpu_rt;
1232}
1233
66f5d6ce 1234/* It should be called with rcu_read_lock() acquired */
d52d3997
MKL
1235static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1236{
a73e4195 1237 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1238
1239 p = this_cpu_ptr(rt->rt6i_pcpu);
1240 pcpu_rt = *p;
1241
d4ead6b3
DA
1242 if (pcpu_rt)
1243 ip6_hold_safe(NULL, &pcpu_rt, false);
d3843fe5 1244
a73e4195
MKL
1245 return pcpu_rt;
1246}
1247
afb1d4b5
DA
1248static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1249 struct rt6_info *rt)
a73e4195
MKL
1250{
1251 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1252
1253 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1254 if (!pcpu_rt) {
9c7370a1
MKL
1255 dst_hold(&net->ipv6.ip6_null_entry->dst);
1256 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1257 }
1258
a94b9367
WW
1259 dst_hold(&pcpu_rt->dst);
1260 p = this_cpu_ptr(rt->rt6i_pcpu);
1261 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1262 BUG_ON(prev);
a94b9367 1263
d52d3997
MKL
1264 return pcpu_rt;
1265}
1266
35732d01
WW
1267/* exception hash table implementation
1268 */
1269static DEFINE_SPINLOCK(rt6_exception_lock);
1270
1271/* Remove rt6_ex from hash table and free the memory
1272 * Caller must hold rt6_exception_lock
1273 */
1274static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1275 struct rt6_exception *rt6_ex)
1276{
b2427e67 1277 struct net *net;
81eb8447 1278
35732d01
WW
1279 if (!bucket || !rt6_ex)
1280 return;
b2427e67
CIK
1281
1282 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01
WW
1283 rt6_ex->rt6i->rt6i_node = NULL;
1284 hlist_del_rcu(&rt6_ex->hlist);
93531c67 1285 ip6_rt_put(rt6_ex->rt6i);
35732d01
WW
1286 kfree_rcu(rt6_ex, rcu);
1287 WARN_ON_ONCE(!bucket->depth);
1288 bucket->depth--;
81eb8447 1289 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1290}
1291
1292/* Remove oldest rt6_ex in bucket and free the memory
1293 * Caller must hold rt6_exception_lock
1294 */
1295static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1296{
1297 struct rt6_exception *rt6_ex, *oldest = NULL;
1298
1299 if (!bucket)
1300 return;
1301
1302 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1303 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1304 oldest = rt6_ex;
1305 }
1306 rt6_remove_exception(bucket, oldest);
1307}
1308
1309static u32 rt6_exception_hash(const struct in6_addr *dst,
1310 const struct in6_addr *src)
1311{
1312 static u32 seed __read_mostly;
1313 u32 val;
1314
1315 net_get_random_once(&seed, sizeof(seed));
1316 val = jhash(dst, sizeof(*dst), seed);
1317
1318#ifdef CONFIG_IPV6_SUBTREES
1319 if (src)
1320 val = jhash(src, sizeof(*src), val);
1321#endif
1322 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1323}
1324
1325/* Helper function to find the cached rt in the hash table
1326 * and update bucket pointer to point to the bucket for this
1327 * (daddr, saddr) pair
1328 * Caller must hold rt6_exception_lock
1329 */
1330static struct rt6_exception *
1331__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1332 const struct in6_addr *daddr,
1333 const struct in6_addr *saddr)
1334{
1335 struct rt6_exception *rt6_ex;
1336 u32 hval;
1337
1338 if (!(*bucket) || !daddr)
1339 return NULL;
1340
1341 hval = rt6_exception_hash(daddr, saddr);
1342 *bucket += hval;
1343
1344 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1345 struct rt6_info *rt6 = rt6_ex->rt6i;
1346 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1347
1348#ifdef CONFIG_IPV6_SUBTREES
1349 if (matched && saddr)
1350 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1351#endif
1352 if (matched)
1353 return rt6_ex;
1354 }
1355 return NULL;
1356}
1357
1358/* Helper function to find the cached rt in the hash table
1359 * and update bucket pointer to point to the bucket for this
1360 * (daddr, saddr) pair
1361 * Caller must hold rcu_read_lock()
1362 */
1363static struct rt6_exception *
1364__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1365 const struct in6_addr *daddr,
1366 const struct in6_addr *saddr)
1367{
1368 struct rt6_exception *rt6_ex;
1369 u32 hval;
1370
1371 WARN_ON_ONCE(!rcu_read_lock_held());
1372
1373 if (!(*bucket) || !daddr)
1374 return NULL;
1375
1376 hval = rt6_exception_hash(daddr, saddr);
1377 *bucket += hval;
1378
1379 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1380 struct rt6_info *rt6 = rt6_ex->rt6i;
1381 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1382
1383#ifdef CONFIG_IPV6_SUBTREES
1384 if (matched && saddr)
1385 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1386#endif
1387 if (matched)
1388 return rt6_ex;
1389 }
1390 return NULL;
1391}
1392
d4ead6b3
DA
1393static unsigned int fib6_mtu(const struct rt6_info *rt)
1394{
1395 unsigned int mtu;
1396
1397 mtu = rt->fib6_pmtu ? : rt->rt6i_idev->cnf.mtu6;
1398 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1399
1400 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1401}
1402
35732d01
WW
1403static int rt6_insert_exception(struct rt6_info *nrt,
1404 struct rt6_info *ort)
1405{
5e670d84 1406 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1407 struct rt6_exception_bucket *bucket;
1408 struct in6_addr *src_key = NULL;
1409 struct rt6_exception *rt6_ex;
1410 int err = 0;
1411
35732d01
WW
1412 spin_lock_bh(&rt6_exception_lock);
1413
1414 if (ort->exception_bucket_flushed) {
1415 err = -EINVAL;
1416 goto out;
1417 }
1418
1419 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1420 lockdep_is_held(&rt6_exception_lock));
1421 if (!bucket) {
1422 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1423 GFP_ATOMIC);
1424 if (!bucket) {
1425 err = -ENOMEM;
1426 goto out;
1427 }
1428 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1429 }
1430
1431#ifdef CONFIG_IPV6_SUBTREES
1432 /* rt6i_src.plen != 0 indicates ort is in subtree
1433 * and exception table is indexed by a hash of
1434 * both rt6i_dst and rt6i_src.
1435 * Otherwise, the exception table is indexed by
1436 * a hash of only rt6i_dst.
1437 */
1438 if (ort->rt6i_src.plen)
1439 src_key = &nrt->rt6i_src.addr;
1440#endif
60006a48
WW
1441
1442 /* Update rt6i_prefsrc as it could be changed
1443 * in rt6_remove_prefsrc()
1444 */
1445 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
f5bbe7ee
WW
1446 /* rt6_mtu_change() might lower mtu on ort.
1447 * Only insert this exception route if its mtu
1448 * is less than ort's mtu value.
1449 */
d4ead6b3 1450 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1451 err = -EINVAL;
1452 goto out;
1453 }
60006a48 1454
35732d01
WW
1455 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1456 src_key);
1457 if (rt6_ex)
1458 rt6_remove_exception(bucket, rt6_ex);
1459
1460 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1461 if (!rt6_ex) {
1462 err = -ENOMEM;
1463 goto out;
1464 }
1465 rt6_ex->rt6i = nrt;
1466 rt6_ex->stamp = jiffies;
1467 atomic_inc(&nrt->rt6i_ref);
1468 nrt->rt6i_node = ort->rt6i_node;
1469 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1470 bucket->depth++;
81eb8447 1471 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1472
1473 if (bucket->depth > FIB6_MAX_DEPTH)
1474 rt6_exception_remove_oldest(bucket);
1475
1476out:
1477 spin_unlock_bh(&rt6_exception_lock);
1478
1479 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1480 if (!err) {
922c2ac8 1481 spin_lock_bh(&ort->rt6i_table->tb6_lock);
7aef6859 1482 fib6_update_sernum(net, ort);
922c2ac8 1483 spin_unlock_bh(&ort->rt6i_table->tb6_lock);
b886d5f2
PA
1484 fib6_force_start_gc(net);
1485 }
35732d01
WW
1486
1487 return err;
1488}
1489
1490void rt6_flush_exceptions(struct rt6_info *rt)
1491{
1492 struct rt6_exception_bucket *bucket;
1493 struct rt6_exception *rt6_ex;
1494 struct hlist_node *tmp;
1495 int i;
1496
1497 spin_lock_bh(&rt6_exception_lock);
1498 /* Prevent rt6_insert_exception() to recreate the bucket list */
1499 rt->exception_bucket_flushed = 1;
1500
1501 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1502 lockdep_is_held(&rt6_exception_lock));
1503 if (!bucket)
1504 goto out;
1505
1506 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1507 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1508 rt6_remove_exception(bucket, rt6_ex);
1509 WARN_ON_ONCE(bucket->depth);
1510 bucket++;
1511 }
1512
1513out:
1514 spin_unlock_bh(&rt6_exception_lock);
1515}
1516
1517/* Find cached rt in the hash table inside passed in rt
1518 * Caller has to hold rcu_read_lock()
1519 */
1520static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1521 struct in6_addr *daddr,
1522 struct in6_addr *saddr)
1523{
1524 struct rt6_exception_bucket *bucket;
1525 struct in6_addr *src_key = NULL;
1526 struct rt6_exception *rt6_ex;
1527 struct rt6_info *res = NULL;
1528
1529 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1530
1531#ifdef CONFIG_IPV6_SUBTREES
1532 /* rt6i_src.plen != 0 indicates rt is in subtree
1533 * and exception table is indexed by a hash of
1534 * both rt6i_dst and rt6i_src.
1535 * Otherwise, the exception table is indexed by
1536 * a hash of only rt6i_dst.
1537 */
1538 if (rt->rt6i_src.plen)
1539 src_key = saddr;
1540#endif
1541 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1542
1543 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1544 res = rt6_ex->rt6i;
1545
1546 return res;
1547}
1548
1549/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1550static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1551{
35732d01 1552 struct rt6_exception_bucket *bucket;
3a2232e9 1553 struct rt6_info *from = rt->from;
35732d01
WW
1554 struct in6_addr *src_key = NULL;
1555 struct rt6_exception *rt6_ex;
1556 int err;
1557
1558 if (!from ||
442d713b 1559 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1560 return -EINVAL;
1561
1562 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1563 return -ENOENT;
1564
1565 spin_lock_bh(&rt6_exception_lock);
1566 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1567 lockdep_is_held(&rt6_exception_lock));
1568#ifdef CONFIG_IPV6_SUBTREES
1569 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1570 * and exception table is indexed by a hash of
1571 * both rt6i_dst and rt6i_src.
1572 * Otherwise, the exception table is indexed by
1573 * a hash of only rt6i_dst.
1574 */
1575 if (from->rt6i_src.plen)
1576 src_key = &rt->rt6i_src.addr;
1577#endif
1578 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1579 &rt->rt6i_dst.addr,
1580 src_key);
1581 if (rt6_ex) {
1582 rt6_remove_exception(bucket, rt6_ex);
1583 err = 0;
1584 } else {
1585 err = -ENOENT;
1586 }
1587
1588 spin_unlock_bh(&rt6_exception_lock);
1589 return err;
1590}
1591
1592/* Find rt6_ex which contains the passed in rt cache and
1593 * refresh its stamp
1594 */
1595static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1596{
35732d01 1597 struct rt6_exception_bucket *bucket;
3a2232e9 1598 struct rt6_info *from = rt->from;
35732d01
WW
1599 struct in6_addr *src_key = NULL;
1600 struct rt6_exception *rt6_ex;
1601
1602 if (!from ||
442d713b 1603 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1604 return;
1605
1606 rcu_read_lock();
1607 bucket = rcu_dereference(from->rt6i_exception_bucket);
1608
1609#ifdef CONFIG_IPV6_SUBTREES
1610 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1611 * and exception table is indexed by a hash of
1612 * both rt6i_dst and rt6i_src.
1613 * Otherwise, the exception table is indexed by
1614 * a hash of only rt6i_dst.
1615 */
1616 if (from->rt6i_src.plen)
1617 src_key = &rt->rt6i_src.addr;
1618#endif
1619 rt6_ex = __rt6_find_exception_rcu(&bucket,
1620 &rt->rt6i_dst.addr,
1621 src_key);
1622 if (rt6_ex)
1623 rt6_ex->stamp = jiffies;
1624
1625 rcu_read_unlock();
1626}
1627
60006a48
WW
1628static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1629{
1630 struct rt6_exception_bucket *bucket;
1631 struct rt6_exception *rt6_ex;
1632 int i;
1633
1634 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1635 lockdep_is_held(&rt6_exception_lock));
1636
1637 if (bucket) {
1638 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1639 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1640 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1641 }
1642 bucket++;
1643 }
1644 }
1645}
1646
e9fa1495
SB
1647static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1648 struct rt6_info *rt, int mtu)
1649{
1650 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1651 * lowest MTU in the path: always allow updating the route PMTU to
1652 * reflect PMTU decreases.
1653 *
1654 * If the new MTU is higher, and the route PMTU is equal to the local
1655 * MTU, this means the old MTU is the lowest in the path, so allow
1656 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1657 * handle this.
1658 */
1659
1660 if (dst_mtu(&rt->dst) >= mtu)
1661 return true;
1662
1663 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1664 return true;
1665
1666 return false;
1667}
1668
1669static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1670 struct rt6_info *rt, int mtu)
f5bbe7ee
WW
1671{
1672 struct rt6_exception_bucket *bucket;
1673 struct rt6_exception *rt6_ex;
1674 int i;
1675
1676 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1677 lockdep_is_held(&rt6_exception_lock));
1678
e9fa1495
SB
1679 if (!bucket)
1680 return;
1681
1682 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1683 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1684 struct rt6_info *entry = rt6_ex->rt6i;
1685
1686 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1687 * route), the metrics of its rt->from have already
e9fa1495
SB
1688 * been updated.
1689 */
d4ead6b3 1690 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1691 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1692 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1693 }
e9fa1495 1694 bucket++;
f5bbe7ee
WW
1695 }
1696}
1697
b16cb459
WW
1698#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1699
1700static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1701 struct in6_addr *gateway)
1702{
1703 struct rt6_exception_bucket *bucket;
1704 struct rt6_exception *rt6_ex;
1705 struct hlist_node *tmp;
1706 int i;
1707
1708 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1709 return;
1710
1711 spin_lock_bh(&rt6_exception_lock);
1712 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1713 lockdep_is_held(&rt6_exception_lock));
1714
1715 if (bucket) {
1716 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1717 hlist_for_each_entry_safe(rt6_ex, tmp,
1718 &bucket->chain, hlist) {
1719 struct rt6_info *entry = rt6_ex->rt6i;
1720
1721 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1722 RTF_CACHE_GATEWAY &&
1723 ipv6_addr_equal(gateway,
1724 &entry->rt6i_gateway)) {
1725 rt6_remove_exception(bucket, rt6_ex);
1726 }
1727 }
1728 bucket++;
1729 }
1730 }
1731
1732 spin_unlock_bh(&rt6_exception_lock);
1733}
1734
c757faa8
WW
1735static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1736 struct rt6_exception *rt6_ex,
1737 struct fib6_gc_args *gc_args,
1738 unsigned long now)
1739{
1740 struct rt6_info *rt = rt6_ex->rt6i;
1741
1859bac0
PA
1742 /* we are pruning and obsoleting aged-out and non gateway exceptions
1743 * even if others have still references to them, so that on next
1744 * dst_check() such references can be dropped.
1745 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1746 * expired, independently from their aging, as per RFC 8201 section 4
1747 */
31afeb42
WW
1748 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1749 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1750 RT6_TRACE("aging clone %p\n", rt);
1751 rt6_remove_exception(bucket, rt6_ex);
1752 return;
1753 }
1754 } else if (time_after(jiffies, rt->dst.expires)) {
1755 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1756 rt6_remove_exception(bucket, rt6_ex);
1757 return;
31afeb42
WW
1758 }
1759
1760 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1761 struct neighbour *neigh;
1762 __u8 neigh_flags = 0;
1763
1bfa26ff
ED
1764 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1765 if (neigh)
c757faa8 1766 neigh_flags = neigh->flags;
1bfa26ff 1767
c757faa8
WW
1768 if (!(neigh_flags & NTF_ROUTER)) {
1769 RT6_TRACE("purging route %p via non-router but gateway\n",
1770 rt);
1771 rt6_remove_exception(bucket, rt6_ex);
1772 return;
1773 }
1774 }
31afeb42 1775
c757faa8
WW
1776 gc_args->more++;
1777}
1778
1779void rt6_age_exceptions(struct rt6_info *rt,
1780 struct fib6_gc_args *gc_args,
1781 unsigned long now)
1782{
1783 struct rt6_exception_bucket *bucket;
1784 struct rt6_exception *rt6_ex;
1785 struct hlist_node *tmp;
1786 int i;
1787
1788 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1789 return;
1790
1bfa26ff
ED
1791 rcu_read_lock_bh();
1792 spin_lock(&rt6_exception_lock);
c757faa8
WW
1793 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1794 lockdep_is_held(&rt6_exception_lock));
1795
1796 if (bucket) {
1797 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1798 hlist_for_each_entry_safe(rt6_ex, tmp,
1799 &bucket->chain, hlist) {
1800 rt6_age_examine_exception(bucket, rt6_ex,
1801 gc_args, now);
1802 }
1803 bucket++;
1804 }
1805 }
1bfa26ff
ED
1806 spin_unlock(&rt6_exception_lock);
1807 rcu_read_unlock_bh();
c757faa8
WW
1808}
1809
9ff74384 1810struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
b75cc8f9
DA
1811 int oif, struct flowi6 *fl6,
1812 const struct sk_buff *skb, int flags)
1da177e4 1813{
367efcb9 1814 struct fib6_node *fn, *saved_fn;
23fb93a4
DA
1815 struct rt6_info *f6i;
1816 struct rt6_info *rt;
c71099ac 1817 int strict = 0;
1da177e4 1818
77d16f45 1819 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1820 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1821 if (net->ipv6.devconf_all->forwarding == 0)
1822 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1823
66f5d6ce 1824 rcu_read_lock();
1da177e4 1825
4c9483b2 1826 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1827 saved_fn = fn;
1da177e4 1828
ca254490
DA
1829 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1830 oif = 0;
1831
a3c00e46 1832redo_rt6_select:
23fb93a4
DA
1833 f6i = rt6_select(net, fn, oif, strict);
1834 if (f6i->rt6i_nsiblings)
1835 f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
1836 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1837 fn = fib6_backtrack(fn, &fl6->saddr);
1838 if (fn)
1839 goto redo_rt6_select;
367efcb9
MKL
1840 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1841 /* also consider unreachable route */
1842 strict &= ~RT6_LOOKUP_F_REACHABLE;
1843 fn = saved_fn;
1844 goto redo_rt6_select;
367efcb9 1845 }
a3c00e46
MKL
1846 }
1847
23fb93a4 1848 if (f6i == net->ipv6.fib6_null_entry) {
421842ed 1849 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1850 rcu_read_unlock();
d3843fe5 1851 dst_hold(&rt->dst);
b65f164d 1852 trace_fib6_table_lookup(net, rt, table, fl6);
d3843fe5 1853 return rt;
23fb93a4
DA
1854 }
1855
1856 /*Search through exception table */
1857 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1858 if (rt) {
d4ead6b3 1859 if (ip6_hold_safe(net, &rt, true))
d3843fe5 1860 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1861
66f5d6ce 1862 rcu_read_unlock();
b65f164d 1863 trace_fib6_table_lookup(net, rt, table, fl6);
d52d3997 1864 return rt;
3da59bd9 1865 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
23fb93a4 1866 !(f6i->rt6i_flags & RTF_GATEWAY))) {
3da59bd9
MKL
1867 /* Create a RTF_CACHE clone which will not be
1868 * owned by the fib6 tree. It is for the special case where
1869 * the daddr in the skb during the neighbor look-up is different
1870 * from the fl6->daddr used to look-up route here.
1871 */
1872
1873 struct rt6_info *uncached_rt;
1874
93531c67 1875 fib6_info_hold(f6i);
66f5d6ce 1876 rcu_read_unlock();
d52d3997 1877
23fb93a4 1878 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
93531c67 1879 fib6_info_release(f6i);
c71099ac 1880
1cfb71ee
WW
1881 if (uncached_rt) {
1882 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1883 * No need for another dst_hold()
1884 */
8d0b94af 1885 rt6_uncached_list_add(uncached_rt);
81eb8447 1886 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1887 } else {
3da59bd9 1888 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1889 dst_hold(&uncached_rt->dst);
1890 }
b811580d 1891
b65f164d 1892 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
3da59bd9 1893 return uncached_rt;
3da59bd9 1894
d52d3997
MKL
1895 } else {
1896 /* Get a percpu copy */
1897
1898 struct rt6_info *pcpu_rt;
1899
951f788a 1900 local_bh_disable();
23fb93a4 1901 pcpu_rt = rt6_get_pcpu_route(f6i);
d52d3997 1902
93531c67
DA
1903 if (!pcpu_rt)
1904 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1905
951f788a
ED
1906 local_bh_enable();
1907 rcu_read_unlock();
b65f164d 1908 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
d52d3997
MKL
1909 return pcpu_rt;
1910 }
1da177e4 1911}
9ff74384 1912EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1913
b75cc8f9
DA
1914static struct rt6_info *ip6_pol_route_input(struct net *net,
1915 struct fib6_table *table,
1916 struct flowi6 *fl6,
1917 const struct sk_buff *skb,
1918 int flags)
4acad72d 1919{
b75cc8f9 1920 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1921}
1922
d409b847
MB
1923struct dst_entry *ip6_route_input_lookup(struct net *net,
1924 struct net_device *dev,
b75cc8f9
DA
1925 struct flowi6 *fl6,
1926 const struct sk_buff *skb,
1927 int flags)
72331bc0
SL
1928{
1929 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1930 flags |= RT6_LOOKUP_F_IFACE;
1931
b75cc8f9 1932 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1933}
d409b847 1934EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1935
23aebdac 1936static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1937 struct flow_keys *keys,
1938 struct flow_keys *flkeys)
23aebdac
JS
1939{
1940 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1941 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1942 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1943 const struct ipv6hdr *inner_iph;
1944 const struct icmp6hdr *icmph;
1945 struct ipv6hdr _inner_iph;
1946
1947 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1948 goto out;
1949
1950 icmph = icmp6_hdr(skb);
1951 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1952 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1953 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1954 icmph->icmp6_type != ICMPV6_PARAMPROB)
1955 goto out;
1956
1957 inner_iph = skb_header_pointer(skb,
1958 skb_transport_offset(skb) + sizeof(*icmph),
1959 sizeof(_inner_iph), &_inner_iph);
1960 if (!inner_iph)
1961 goto out;
1962
1963 key_iph = inner_iph;
5e5d6fed 1964 _flkeys = NULL;
23aebdac 1965out:
5e5d6fed
RP
1966 if (_flkeys) {
1967 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1968 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1969 keys->tags.flow_label = _flkeys->tags.flow_label;
1970 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1971 } else {
1972 keys->addrs.v6addrs.src = key_iph->saddr;
1973 keys->addrs.v6addrs.dst = key_iph->daddr;
1974 keys->tags.flow_label = ip6_flowinfo(key_iph);
1975 keys->basic.ip_proto = key_iph->nexthdr;
1976 }
23aebdac
JS
1977}
1978
1979/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
1980u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1981 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
1982{
1983 struct flow_keys hash_keys;
9a2a537a 1984 u32 mhash;
23aebdac 1985
bbfa047a 1986 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
1987 case 0:
1988 memset(&hash_keys, 0, sizeof(hash_keys));
1989 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1990 if (skb) {
1991 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1992 } else {
1993 hash_keys.addrs.v6addrs.src = fl6->saddr;
1994 hash_keys.addrs.v6addrs.dst = fl6->daddr;
1995 hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
1996 hash_keys.basic.ip_proto = fl6->flowi6_proto;
1997 }
1998 break;
1999 case 1:
2000 if (skb) {
2001 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2002 struct flow_keys keys;
2003
2004 /* short-circuit if we already have L4 hash present */
2005 if (skb->l4_hash)
2006 return skb_get_hash_raw(skb) >> 1;
2007
2008 memset(&hash_keys, 0, sizeof(hash_keys));
2009
2010 if (!flkeys) {
2011 skb_flow_dissect_flow_keys(skb, &keys, flag);
2012 flkeys = &keys;
2013 }
2014 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2015 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2016 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2017 hash_keys.ports.src = flkeys->ports.src;
2018 hash_keys.ports.dst = flkeys->ports.dst;
2019 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2020 } else {
2021 memset(&hash_keys, 0, sizeof(hash_keys));
2022 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2023 hash_keys.addrs.v6addrs.src = fl6->saddr;
2024 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2025 hash_keys.ports.src = fl6->fl6_sport;
2026 hash_keys.ports.dst = fl6->fl6_dport;
2027 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2028 }
2029 break;
23aebdac 2030 }
9a2a537a 2031 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2032
9a2a537a 2033 return mhash >> 1;
23aebdac
JS
2034}
2035
c71099ac
TG
2036void ip6_route_input(struct sk_buff *skb)
2037{
b71d1d42 2038 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2039 struct net *net = dev_net(skb->dev);
adaa70bb 2040 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2041 struct ip_tunnel_info *tun_info;
4c9483b2 2042 struct flowi6 fl6 = {
e0d56fdd 2043 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2044 .daddr = iph->daddr,
2045 .saddr = iph->saddr,
6502ca52 2046 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2047 .flowi6_mark = skb->mark,
2048 .flowi6_proto = iph->nexthdr,
c71099ac 2049 };
5e5d6fed 2050 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2051
904af04d 2052 tun_info = skb_tunnel_info(skb);
46fa062a 2053 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2054 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2055
2056 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2057 flkeys = &_flkeys;
2058
23aebdac 2059 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2060 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2061 skb_dst_drop(skb);
b75cc8f9
DA
2062 skb_dst_set(skb,
2063 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2064}
2065
b75cc8f9
DA
2066static struct rt6_info *ip6_pol_route_output(struct net *net,
2067 struct fib6_table *table,
2068 struct flowi6 *fl6,
2069 const struct sk_buff *skb,
2070 int flags)
1da177e4 2071{
b75cc8f9 2072 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2073}
2074
6f21c96a
PA
2075struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2076 struct flowi6 *fl6, int flags)
c71099ac 2077{
d46a9d67 2078 bool any_src;
c71099ac 2079
4c1feac5
DA
2080 if (rt6_need_strict(&fl6->daddr)) {
2081 struct dst_entry *dst;
2082
2083 dst = l3mdev_link_scope_lookup(net, fl6);
2084 if (dst)
2085 return dst;
2086 }
ca254490 2087
1fb9489b 2088 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2089
d46a9d67 2090 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2091 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2092 (fl6->flowi6_oif && any_src))
77d16f45 2093 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2094
d46a9d67 2095 if (!any_src)
adaa70bb 2096 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2097 else if (sk)
2098 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2099
b75cc8f9 2100 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2101}
6f21c96a 2102EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2103
2774c131 2104struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2105{
5c1e6aa3 2106 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2107 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2108 struct dst_entry *new = NULL;
2109
1dbe3252 2110 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2111 DST_OBSOLETE_DEAD, 0);
14e50e57 2112 if (rt) {
0a1f5962 2113 rt6_info_init(rt);
81eb8447 2114 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2115
0a1f5962 2116 new = &rt->dst;
14e50e57 2117 new->__use = 1;
352e512c 2118 new->input = dst_discard;
ede2059d 2119 new->output = dst_discard_out;
14e50e57 2120
0a1f5962 2121 dst_copy_metrics(new, &ort->dst);
14e50e57 2122
1dbe3252 2123 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2124 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2125 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2126 rt->rt6i_metric = 0;
2127
2128 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2129#ifdef CONFIG_IPV6_SUBTREES
2130 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2131#endif
14e50e57
DM
2132 }
2133
69ead7af
DM
2134 dst_release(dst_orig);
2135 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2136}
14e50e57 2137
1da177e4
LT
2138/*
2139 * Destination cache support functions
2140 */
2141
93531c67
DA
2142static bool fib6_check(struct rt6_info *f6i, u32 cookie)
2143{
2144 u32 rt_cookie = 0;
2145
2146 if ((f6i && !rt6_get_cookie_safe(f6i, &rt_cookie)) ||
2147 rt_cookie != cookie)
2148 return false;
2149
2150 if (fib6_check_expired(f6i))
2151 return false;
2152
2153 return true;
2154}
2155
3da59bd9
MKL
2156static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
2157{
36143645 2158 u32 rt_cookie = 0;
c5cff856 2159
93531c67
DA
2160 if ((rt->from && !rt6_get_cookie_safe(rt->from, &rt_cookie)) ||
2161 rt_cookie != cookie)
3da59bd9
MKL
2162 return NULL;
2163
2164 if (rt6_check_expired(rt))
2165 return NULL;
2166
2167 return &rt->dst;
2168}
2169
2170static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
2171{
5973fb1e
MKL
2172 if (!__rt6_check_expired(rt) &&
2173 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
93531c67 2174 fib6_check(rt->from, cookie))
3da59bd9
MKL
2175 return &rt->dst;
2176 else
2177 return NULL;
2178}
2179
1da177e4
LT
2180static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2181{
2182 struct rt6_info *rt;
2183
2184 rt = (struct rt6_info *) dst;
2185
6f3118b5
ND
2186 /* All IPV6 dsts are created with ->obsolete set to the value
2187 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2188 * into this function always.
2189 */
e3bc10bd 2190
02bcf4e0 2191 if (rt->rt6i_flags & RTF_PCPU ||
3a2232e9 2192 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
3da59bd9
MKL
2193 return rt6_dst_from_check(rt, cookie);
2194 else
2195 return rt6_check(rt, cookie);
1da177e4
LT
2196}
2197
2198static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2199{
2200 struct rt6_info *rt = (struct rt6_info *) dst;
2201
2202 if (rt) {
54c1a859
YH
2203 if (rt->rt6i_flags & RTF_CACHE) {
2204 if (rt6_check_expired(rt)) {
93531c67 2205 rt6_remove_exception_rt(rt);
54c1a859
YH
2206 dst = NULL;
2207 }
2208 } else {
1da177e4 2209 dst_release(dst);
54c1a859
YH
2210 dst = NULL;
2211 }
1da177e4 2212 }
54c1a859 2213 return dst;
1da177e4
LT
2214}
2215
2216static void ip6_link_failure(struct sk_buff *skb)
2217{
2218 struct rt6_info *rt;
2219
3ffe533c 2220 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2221
adf30907 2222 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2223 if (rt) {
1eb4f758 2224 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0 2225 if (dst_hold_safe(&rt->dst))
93531c67
DA
2226 rt6_remove_exception_rt(rt);
2227 } else if (rt->from) {
c5cff856
WW
2228 struct fib6_node *fn;
2229
2230 rcu_read_lock();
93531c67 2231 fn = rcu_dereference(rt->from->rt6i_node);
c5cff856
WW
2232 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2233 fn->fn_sernum = -1;
2234 rcu_read_unlock();
1eb4f758 2235 }
1da177e4
LT
2236 }
2237}
2238
45e4fd26
MKL
2239static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2240{
2241 struct net *net = dev_net(rt->dst.dev);
2242
d4ead6b3 2243 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2244 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2245 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2246}
2247
0d3f6d29
MKL
2248static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2249{
2250 return !(rt->rt6i_flags & RTF_CACHE) &&
4e587ea7
WW
2251 (rt->rt6i_flags & RTF_PCPU ||
2252 rcu_access_pointer(rt->rt6i_node));
0d3f6d29
MKL
2253}
2254
45e4fd26
MKL
2255static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2256 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2257{
0dec879f 2258 const struct in6_addr *daddr, *saddr;
67ba4152 2259 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2260
45e4fd26
MKL
2261 if (rt6->rt6i_flags & RTF_LOCAL)
2262 return;
81aded24 2263
19bda36c
XL
2264 if (dst_metric_locked(dst, RTAX_MTU))
2265 return;
2266
0dec879f
JA
2267 if (iph) {
2268 daddr = &iph->daddr;
2269 saddr = &iph->saddr;
2270 } else if (sk) {
2271 daddr = &sk->sk_v6_daddr;
2272 saddr = &inet6_sk(sk)->saddr;
2273 } else {
2274 daddr = NULL;
2275 saddr = NULL;
2276 }
2277 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2278 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2279 if (mtu >= dst_mtu(dst))
2280 return;
9d289715 2281
0d3f6d29 2282 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2283 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2284 /* update rt6_ex->stamp for cache */
2285 if (rt6->rt6i_flags & RTF_CACHE)
2286 rt6_update_exception_stamp_rt(rt6);
0dec879f 2287 } else if (daddr) {
45e4fd26
MKL
2288 struct rt6_info *nrt6;
2289
d4ead6b3 2290 nrt6 = ip6_rt_cache_alloc(rt6->from, daddr, saddr);
45e4fd26
MKL
2291 if (nrt6) {
2292 rt6_do_update_pmtu(nrt6, mtu);
d4ead6b3 2293 if (rt6_insert_exception(nrt6, rt6->from))
2b760fcf 2294 dst_release_immediate(&nrt6->dst);
45e4fd26 2295 }
1da177e4
LT
2296 }
2297}
2298
45e4fd26
MKL
2299static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2300 struct sk_buff *skb, u32 mtu)
2301{
2302 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2303}
2304
42ae66c8 2305void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2306 int oif, u32 mark, kuid_t uid)
81aded24
DM
2307{
2308 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2309 struct dst_entry *dst;
2310 struct flowi6 fl6;
2311
2312 memset(&fl6, 0, sizeof(fl6));
2313 fl6.flowi6_oif = oif;
1b3c61dc 2314 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2315 fl6.daddr = iph->daddr;
2316 fl6.saddr = iph->saddr;
6502ca52 2317 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2318 fl6.flowi6_uid = uid;
81aded24
DM
2319
2320 dst = ip6_route_output(net, NULL, &fl6);
2321 if (!dst->error)
45e4fd26 2322 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2323 dst_release(dst);
2324}
2325EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2326
2327void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2328{
33c162a9
MKL
2329 struct dst_entry *dst;
2330
81aded24 2331 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2332 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2333
2334 dst = __sk_dst_get(sk);
2335 if (!dst || !dst->obsolete ||
2336 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2337 return;
2338
2339 bh_lock_sock(sk);
2340 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2341 ip6_datagram_dst_update(sk, false);
2342 bh_unlock_sock(sk);
81aded24
DM
2343}
2344EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2345
7d6850f7
AK
2346void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2347 const struct flowi6 *fl6)
2348{
2349#ifdef CONFIG_IPV6_SUBTREES
2350 struct ipv6_pinfo *np = inet6_sk(sk);
2351#endif
2352
2353 ip6_dst_store(sk, dst,
2354 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2355 &sk->sk_v6_daddr : NULL,
2356#ifdef CONFIG_IPV6_SUBTREES
2357 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2358 &np->saddr :
2359#endif
2360 NULL);
2361}
2362
b55b76b2
DJ
2363/* Handle redirects */
2364struct ip6rd_flowi {
2365 struct flowi6 fl6;
2366 struct in6_addr gateway;
2367};
2368
2369static struct rt6_info *__ip6_route_redirect(struct net *net,
2370 struct fib6_table *table,
2371 struct flowi6 *fl6,
b75cc8f9 2372 const struct sk_buff *skb,
b55b76b2
DJ
2373 int flags)
2374{
2375 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
23fb93a4
DA
2376 struct rt6_info *ret = NULL, *rt_cache;
2377 struct rt6_info *rt;
b55b76b2
DJ
2378 struct fib6_node *fn;
2379
2380 /* Get the "current" route for this destination and
67c408cf 2381 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2382 *
2383 * RFC 4861 specifies that redirects should only be
2384 * accepted if they come from the nexthop to the target.
2385 * Due to the way the routes are chosen, this notion
2386 * is a bit fuzzy and one might need to check all possible
2387 * routes.
2388 */
2389
66f5d6ce 2390 rcu_read_lock();
b55b76b2
DJ
2391 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2392restart:
66f5d6ce 2393 for_each_fib6_node_rt_rcu(fn) {
5e670d84 2394 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c 2395 continue;
14895687 2396 if (fib6_check_expired(rt))
b55b76b2 2397 continue;
6edb3c96 2398 if (rt->rt6i_flags & RTF_REJECT)
b55b76b2
DJ
2399 break;
2400 if (!(rt->rt6i_flags & RTF_GATEWAY))
2401 continue;
5e670d84 2402 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
b55b76b2 2403 continue;
2b760fcf
WW
2404 /* rt_cache's gateway might be different from its 'parent'
2405 * in the case of an ip redirect.
2406 * So we keep searching in the exception table if the gateway
2407 * is different.
2408 */
5e670d84 2409 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2b760fcf
WW
2410 rt_cache = rt6_find_cached_rt(rt,
2411 &fl6->daddr,
2412 &fl6->saddr);
2413 if (rt_cache &&
2414 ipv6_addr_equal(&rdfl->gateway,
2415 &rt_cache->rt6i_gateway)) {
23fb93a4 2416 ret = rt_cache;
2b760fcf
WW
2417 break;
2418 }
b55b76b2 2419 continue;
2b760fcf 2420 }
b55b76b2
DJ
2421 break;
2422 }
2423
2424 if (!rt)
421842ed 2425 rt = net->ipv6.fib6_null_entry;
6edb3c96 2426 else if (rt->rt6i_flags & RTF_REJECT) {
23fb93a4 2427 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2428 goto out;
2429 }
2430
421842ed 2431 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2432 fn = fib6_backtrack(fn, &fl6->saddr);
2433 if (fn)
2434 goto restart;
b55b76b2 2435 }
a3c00e46 2436
b0a1ba59 2437out:
23fb93a4
DA
2438 if (ret)
2439 dst_hold(&ret->dst);
2440 else
2441 ret = ip6_create_rt_rcu(rt);
b55b76b2 2442
66f5d6ce 2443 rcu_read_unlock();
b55b76b2 2444
23fb93a4
DA
2445 trace_fib6_table_lookup(net, ret, table, fl6);
2446 return ret;
b55b76b2
DJ
2447};
2448
2449static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2450 const struct flowi6 *fl6,
2451 const struct sk_buff *skb,
2452 const struct in6_addr *gateway)
b55b76b2
DJ
2453{
2454 int flags = RT6_LOOKUP_F_HAS_SADDR;
2455 struct ip6rd_flowi rdfl;
2456
2457 rdfl.fl6 = *fl6;
2458 rdfl.gateway = *gateway;
2459
b75cc8f9 2460 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2461 flags, __ip6_route_redirect);
2462}
2463
e2d118a1
LC
2464void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2465 kuid_t uid)
3a5ad2ee
DM
2466{
2467 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2468 struct dst_entry *dst;
2469 struct flowi6 fl6;
2470
2471 memset(&fl6, 0, sizeof(fl6));
e374c618 2472 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2473 fl6.flowi6_oif = oif;
2474 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2475 fl6.daddr = iph->daddr;
2476 fl6.saddr = iph->saddr;
6502ca52 2477 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2478 fl6.flowi6_uid = uid;
3a5ad2ee 2479
b75cc8f9 2480 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2481 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2482 dst_release(dst);
2483}
2484EXPORT_SYMBOL_GPL(ip6_redirect);
2485
c92a59ec
DJ
2486void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2487 u32 mark)
2488{
2489 const struct ipv6hdr *iph = ipv6_hdr(skb);
2490 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2491 struct dst_entry *dst;
2492 struct flowi6 fl6;
2493
2494 memset(&fl6, 0, sizeof(fl6));
e374c618 2495 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2496 fl6.flowi6_oif = oif;
2497 fl6.flowi6_mark = mark;
c92a59ec
DJ
2498 fl6.daddr = msg->dest;
2499 fl6.saddr = iph->daddr;
e2d118a1 2500 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2501
b75cc8f9 2502 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2503 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2504 dst_release(dst);
2505}
2506
3a5ad2ee
DM
2507void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2508{
e2d118a1
LC
2509 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2510 sk->sk_uid);
3a5ad2ee
DM
2511}
2512EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2513
0dbaee3b 2514static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2515{
0dbaee3b
DM
2516 struct net_device *dev = dst->dev;
2517 unsigned int mtu = dst_mtu(dst);
2518 struct net *net = dev_net(dev);
2519
1da177e4
LT
2520 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2521
5578689a
DL
2522 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2523 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2524
2525 /*
1ab1457c
YH
2526 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2527 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2528 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2529 * rely only on pmtu discovery"
2530 */
2531 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2532 mtu = IPV6_MAXPLEN;
2533 return mtu;
2534}
2535
ebb762f2 2536static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2537{
d33e4553 2538 struct inet6_dev *idev;
d4ead6b3 2539 unsigned int mtu;
4b32b5ad
MKL
2540
2541 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2542 if (mtu)
30f78d8e 2543 goto out;
618f9bc7
SK
2544
2545 mtu = IPV6_MIN_MTU;
d33e4553
DM
2546
2547 rcu_read_lock();
2548 idev = __in6_dev_get(dst->dev);
2549 if (idev)
2550 mtu = idev->cnf.mtu6;
2551 rcu_read_unlock();
2552
30f78d8e 2553out:
14972cbd
RP
2554 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2555
2556 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2557}
2558
3b00944c 2559struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2560 struct flowi6 *fl6)
1da177e4 2561{
87a11578 2562 struct dst_entry *dst;
1da177e4
LT
2563 struct rt6_info *rt;
2564 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2565 struct net *net = dev_net(dev);
1da177e4 2566
38308473 2567 if (unlikely(!idev))
122bdf67 2568 return ERR_PTR(-ENODEV);
1da177e4 2569
ad706862 2570 rt = ip6_dst_alloc(net, dev, 0);
38308473 2571 if (unlikely(!rt)) {
1da177e4 2572 in6_dev_put(idev);
87a11578 2573 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2574 goto out;
2575 }
2576
8e2ec639 2577 rt->dst.flags |= DST_HOST;
588753f1 2578 rt->dst.input = ip6_input;
8e2ec639 2579 rt->dst.output = ip6_output;
550bab42 2580 rt->rt6i_gateway = fl6->daddr;
87a11578 2581 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2582 rt->rt6i_dst.plen = 128;
2583 rt->rt6i_idev = idev;
14edd87d 2584 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2585
4c981e28 2586 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2587 * do proper release of the net_device
2588 */
2589 rt6_uncached_list_add(rt);
81eb8447 2590 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2591
87a11578
DM
2592 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2593
1da177e4 2594out:
87a11578 2595 return dst;
1da177e4
LT
2596}
2597
569d3645 2598static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2599{
86393e52 2600 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2601 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2602 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2603 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2604 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2605 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2606 int entries;
7019b78e 2607
fc66f95c 2608 entries = dst_entries_get_fast(ops);
49a18d86 2609 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2610 entries <= rt_max_size)
1da177e4
LT
2611 goto out;
2612
6891a346 2613 net->ipv6.ip6_rt_gc_expire++;
14956643 2614 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2615 entries = dst_entries_get_slow(ops);
2616 if (entries < ops->gc_thresh)
7019b78e 2617 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2618out:
7019b78e 2619 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2620 return entries > rt_max_size;
1da177e4
LT
2621}
2622
d4ead6b3
DA
2623static int ip6_convert_metrics(struct net *net, struct rt6_info *rt,
2624 struct fib6_config *cfg)
e715b6d3 2625{
d4ead6b3 2626 int err = 0;
e715b6d3 2627
d4ead6b3
DA
2628 if (cfg->fc_mx) {
2629 rt->fib6_metrics = kzalloc(sizeof(*rt->fib6_metrics),
2630 GFP_KERNEL);
2631 if (unlikely(!rt->fib6_metrics))
2632 return -ENOMEM;
ea697639 2633
d4ead6b3 2634 refcount_set(&rt->fib6_metrics->refcnt, 1);
e715b6d3 2635
d4ead6b3
DA
2636 err = ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len,
2637 rt->fib6_metrics->metrics);
c3a8d947 2638 }
e715b6d3 2639
d4ead6b3 2640 return err;
e715b6d3 2641}
1da177e4 2642
8c14586f
DA
2643static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2644 struct fib6_config *cfg,
f4797b33
DA
2645 const struct in6_addr *gw_addr,
2646 u32 tbid, int flags)
8c14586f
DA
2647{
2648 struct flowi6 fl6 = {
2649 .flowi6_oif = cfg->fc_ifindex,
2650 .daddr = *gw_addr,
2651 .saddr = cfg->fc_prefsrc,
2652 };
2653 struct fib6_table *table;
2654 struct rt6_info *rt;
8c14586f 2655
f4797b33 2656 table = fib6_get_table(net, tbid);
8c14586f
DA
2657 if (!table)
2658 return NULL;
2659
2660 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2661 flags |= RT6_LOOKUP_F_HAS_SADDR;
2662
f4797b33 2663 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2664 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2665
2666 /* if table lookup failed, fall back to full lookup */
2667 if (rt == net->ipv6.ip6_null_entry) {
2668 ip6_rt_put(rt);
2669 rt = NULL;
2670 }
2671
2672 return rt;
2673}
2674
fc1e64e1
DA
2675static int ip6_route_check_nh_onlink(struct net *net,
2676 struct fib6_config *cfg,
9fbb704c 2677 const struct net_device *dev,
fc1e64e1
DA
2678 struct netlink_ext_ack *extack)
2679{
44750f84 2680 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2681 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2682 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2683 struct rt6_info *grt;
2684 int err;
2685
2686 err = 0;
2687 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2688 if (grt) {
58e354c0
DA
2689 if (!grt->dst.error &&
2690 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2691 NL_SET_ERR_MSG(extack,
2692 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2693 err = -EINVAL;
2694 }
2695
2696 ip6_rt_put(grt);
2697 }
2698
2699 return err;
2700}
2701
1edce99f
DA
2702static int ip6_route_check_nh(struct net *net,
2703 struct fib6_config *cfg,
2704 struct net_device **_dev,
2705 struct inet6_dev **idev)
2706{
2707 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2708 struct net_device *dev = _dev ? *_dev : NULL;
2709 struct rt6_info *grt = NULL;
2710 int err = -EHOSTUNREACH;
2711
2712 if (cfg->fc_table) {
f4797b33
DA
2713 int flags = RT6_LOOKUP_F_IFACE;
2714
2715 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2716 cfg->fc_table, flags);
1edce99f
DA
2717 if (grt) {
2718 if (grt->rt6i_flags & RTF_GATEWAY ||
2719 (dev && dev != grt->dst.dev)) {
2720 ip6_rt_put(grt);
2721 grt = NULL;
2722 }
2723 }
2724 }
2725
2726 if (!grt)
b75cc8f9 2727 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2728
2729 if (!grt)
2730 goto out;
2731
2732 if (dev) {
2733 if (dev != grt->dst.dev) {
2734 ip6_rt_put(grt);
2735 goto out;
2736 }
2737 } else {
2738 *_dev = dev = grt->dst.dev;
2739 *idev = grt->rt6i_idev;
2740 dev_hold(dev);
2741 in6_dev_hold(grt->rt6i_idev);
2742 }
2743
2744 if (!(grt->rt6i_flags & RTF_GATEWAY))
2745 err = 0;
2746
2747 ip6_rt_put(grt);
2748
2749out:
2750 return err;
2751}
2752
9fbb704c
DA
2753static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2754 struct net_device **_dev, struct inet6_dev **idev,
2755 struct netlink_ext_ack *extack)
2756{
2757 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2758 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2759 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2760 const struct net_device *dev = *_dev;
232378e8 2761 bool need_addr_check = !dev;
9fbb704c
DA
2762 int err = -EINVAL;
2763
2764 /* if gw_addr is local we will fail to detect this in case
2765 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2766 * will return already-added prefix route via interface that
2767 * prefix route was assigned to, which might be non-loopback.
2768 */
232378e8
DA
2769 if (dev &&
2770 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2771 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2772 goto out;
2773 }
2774
2775 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2776 /* IPv6 strictly inhibits using not link-local
2777 * addresses as nexthop address.
2778 * Otherwise, router will not able to send redirects.
2779 * It is very good, but in some (rare!) circumstances
2780 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2781 * some exceptions. --ANK
2782 * We allow IPv4-mapped nexthops to support RFC4798-type
2783 * addressing
2784 */
2785 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2786 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2787 goto out;
2788 }
2789
2790 if (cfg->fc_flags & RTNH_F_ONLINK)
2791 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2792 else
2793 err = ip6_route_check_nh(net, cfg, _dev, idev);
2794
2795 if (err)
2796 goto out;
2797 }
2798
2799 /* reload in case device was changed */
2800 dev = *_dev;
2801
2802 err = -EINVAL;
2803 if (!dev) {
2804 NL_SET_ERR_MSG(extack, "Egress device not specified");
2805 goto out;
2806 } else if (dev->flags & IFF_LOOPBACK) {
2807 NL_SET_ERR_MSG(extack,
2808 "Egress device can not be loopback device for this route");
2809 goto out;
2810 }
232378e8
DA
2811
2812 /* if we did not check gw_addr above, do so now that the
2813 * egress device has been resolved.
2814 */
2815 if (need_addr_check &&
2816 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2817 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2818 goto out;
2819 }
2820
9fbb704c
DA
2821 err = 0;
2822out:
2823 return err;
2824}
2825
333c4301 2826static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 2827 gfp_t gfp_flags,
333c4301 2828 struct netlink_ext_ack *extack)
1da177e4 2829{
5578689a 2830 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
2831 struct rt6_info *rt = NULL;
2832 struct net_device *dev = NULL;
2833 struct inet6_dev *idev = NULL;
c71099ac 2834 struct fib6_table *table;
1da177e4 2835 int addr_type;
8c5b83f0 2836 int err = -EINVAL;
1da177e4 2837
557c44be 2838 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2839 if (cfg->fc_flags & RTF_PCPU) {
2840 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2841 goto out;
d5d531cb 2842 }
557c44be 2843
2ea2352e
WW
2844 /* RTF_CACHE is an internal flag; can not be set by userspace */
2845 if (cfg->fc_flags & RTF_CACHE) {
2846 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2847 goto out;
2848 }
2849
e8478e80
DA
2850 if (cfg->fc_type > RTN_MAX) {
2851 NL_SET_ERR_MSG(extack, "Invalid route type");
2852 goto out;
2853 }
2854
d5d531cb
DA
2855 if (cfg->fc_dst_len > 128) {
2856 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2857 goto out;
2858 }
2859 if (cfg->fc_src_len > 128) {
2860 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2861 goto out;
d5d531cb 2862 }
1da177e4 2863#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2864 if (cfg->fc_src_len) {
2865 NL_SET_ERR_MSG(extack,
2866 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2867 goto out;
d5d531cb 2868 }
1da177e4 2869#endif
86872cb5 2870 if (cfg->fc_ifindex) {
1da177e4 2871 err = -ENODEV;
5578689a 2872 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2873 if (!dev)
2874 goto out;
2875 idev = in6_dev_get(dev);
2876 if (!idev)
2877 goto out;
2878 }
2879
86872cb5
TG
2880 if (cfg->fc_metric == 0)
2881 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2882
fc1e64e1
DA
2883 if (cfg->fc_flags & RTNH_F_ONLINK) {
2884 if (!dev) {
2885 NL_SET_ERR_MSG(extack,
2886 "Nexthop device required for onlink");
2887 err = -ENODEV;
2888 goto out;
2889 }
2890
2891 if (!(dev->flags & IFF_UP)) {
2892 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2893 err = -ENETDOWN;
2894 goto out;
2895 }
2896 }
2897
d71314b4 2898 err = -ENOBUFS;
38308473
DM
2899 if (cfg->fc_nlinfo.nlh &&
2900 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2901 table = fib6_get_table(net, cfg->fc_table);
38308473 2902 if (!table) {
f3213831 2903 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2904 table = fib6_new_table(net, cfg->fc_table);
2905 }
2906 } else {
2907 table = fib6_new_table(net, cfg->fc_table);
2908 }
38308473
DM
2909
2910 if (!table)
c71099ac 2911 goto out;
c71099ac 2912
93531c67
DA
2913 err = -ENOMEM;
2914 rt = fib6_info_alloc(gfp_flags);
2915 if (!rt)
1da177e4 2916 goto out;
93531c67
DA
2917
2918 if (cfg->fc_flags & RTF_ADDRCONF)
2919 rt->dst_nocount = true;
1da177e4 2920
d4ead6b3
DA
2921 err = ip6_convert_metrics(net, rt, cfg);
2922 if (err < 0)
2923 goto out;
2924
1716a961 2925 if (cfg->fc_flags & RTF_EXPIRES)
14895687 2926 fib6_set_expires(rt, jiffies +
1716a961
G
2927 clock_t_to_jiffies(cfg->fc_expires));
2928 else
14895687 2929 fib6_clean_expires(rt);
1da177e4 2930
86872cb5
TG
2931 if (cfg->fc_protocol == RTPROT_UNSPEC)
2932 cfg->fc_protocol = RTPROT_BOOT;
2933 rt->rt6i_protocol = cfg->fc_protocol;
2934
2935 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4 2936
19e42e45
RP
2937 if (cfg->fc_encap) {
2938 struct lwtunnel_state *lwtstate;
2939
30357d7d 2940 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2941 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2942 &lwtstate, extack);
19e42e45
RP
2943 if (err)
2944 goto out;
5e670d84 2945 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
19e42e45
RP
2946 }
2947
86872cb5
TG
2948 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2949 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 2950 if (rt->rt6i_dst.plen == 128)
3b6761d1 2951 rt->dst_host = true;
e5fd387a 2952
1da177e4 2953#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
2954 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2955 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
2956#endif
2957
86872cb5 2958 rt->rt6i_metric = cfg->fc_metric;
5e670d84 2959 rt->fib6_nh.nh_weight = 1;
1da177e4 2960
e8478e80
DA
2961 rt->fib6_type = cfg->fc_type;
2962
1da177e4
LT
2963 /* We cannot add true routes via loopback here,
2964 they would result in kernel looping; promote them to reject routes
2965 */
86872cb5 2966 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
2967 (dev && (dev->flags & IFF_LOOPBACK) &&
2968 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2969 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 2970 /* hold loopback dev/idev if we haven't done so. */
5578689a 2971 if (dev != net->loopback_dev) {
1da177e4
LT
2972 if (dev) {
2973 dev_put(dev);
2974 in6_dev_put(idev);
2975 }
5578689a 2976 dev = net->loopback_dev;
1da177e4
LT
2977 dev_hold(dev);
2978 idev = in6_dev_get(dev);
2979 if (!idev) {
2980 err = -ENODEV;
2981 goto out;
2982 }
2983 }
1da177e4
LT
2984 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
2985 goto install_route;
2986 }
2987
86872cb5 2988 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
2989 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
2990 if (err)
48ed7b26 2991 goto out;
1da177e4 2992
93531c67 2993 rt->fib6_nh.nh_gw = cfg->fc_gateway;
1da177e4
LT
2994 }
2995
2996 err = -ENODEV;
38308473 2997 if (!dev)
1da177e4
LT
2998 goto out;
2999
428604fb
LB
3000 if (idev->cnf.disable_ipv6) {
3001 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3002 err = -EACCES;
3003 goto out;
3004 }
3005
955ec4cb
DA
3006 if (!(dev->flags & IFF_UP)) {
3007 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3008 err = -ENETDOWN;
3009 goto out;
3010 }
3011
c3968a85
DW
3012 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3013 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3014 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3015 err = -EINVAL;
3016 goto out;
3017 }
4e3fd7a0 3018 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
3019 rt->rt6i_prefsrc.plen = 128;
3020 } else
3021 rt->rt6i_prefsrc.plen = 0;
3022
86872cb5 3023 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
3024
3025install_route:
5609b80a
IS
3026 if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3027 !netif_carrier_ok(dev))
5e670d84
DA
3028 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3029 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
93531c67 3030 rt->fib6_nh.nh_dev = dev;
1da177e4 3031 rt->rt6i_idev = idev;
c71099ac 3032 rt->rt6i_table = table;
63152fc0 3033
c346dca1 3034 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 3035
8c5b83f0 3036 return rt;
6b9ea5a6
RP
3037out:
3038 if (dev)
3039 dev_put(dev);
3040 if (idev)
3041 in6_dev_put(idev);
6b9ea5a6 3042
93531c67 3043 fib6_info_release(rt);
8c5b83f0 3044 return ERR_PTR(err);
6b9ea5a6
RP
3045}
3046
acb54e3c
DA
3047int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3048 struct netlink_ext_ack *extack)
6b9ea5a6 3049{
8c5b83f0 3050 struct rt6_info *rt;
6b9ea5a6
RP
3051 int err;
3052
acb54e3c 3053 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3054 if (IS_ERR(rt))
3055 return PTR_ERR(rt);
6b9ea5a6 3056
d4ead6b3 3057 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3058 fib6_info_release(rt);
6b9ea5a6 3059
1da177e4
LT
3060 return err;
3061}
3062
86872cb5 3063static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4 3064{
afb1d4b5 3065 struct net *net = info->nl_net;
c71099ac 3066 struct fib6_table *table;
afb1d4b5 3067 int err;
1da177e4 3068
421842ed 3069 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3070 err = -ENOENT;
3071 goto out;
3072 }
6c813a72 3073
c71099ac 3074 table = rt->rt6i_table;
66f5d6ce 3075 spin_lock_bh(&table->tb6_lock);
86872cb5 3076 err = fib6_del(rt, info);
66f5d6ce 3077 spin_unlock_bh(&table->tb6_lock);
1da177e4 3078
6825a26c 3079out:
93531c67 3080 fib6_info_release(rt);
1da177e4
LT
3081 return err;
3082}
3083
afb1d4b5 3084int ip6_del_rt(struct net *net, struct rt6_info *rt)
e0a1ad73 3085{
afb1d4b5
DA
3086 struct nl_info info = { .nl_net = net };
3087
528c4ceb 3088 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3089}
3090
0ae81335
DA
3091static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
3092{
3093 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3094 struct net *net = info->nl_net;
16a16cd3 3095 struct sk_buff *skb = NULL;
0ae81335 3096 struct fib6_table *table;
e3330039 3097 int err = -ENOENT;
0ae81335 3098
421842ed 3099 if (rt == net->ipv6.fib6_null_entry)
e3330039 3100 goto out_put;
0ae81335 3101 table = rt->rt6i_table;
66f5d6ce 3102 spin_lock_bh(&table->tb6_lock);
0ae81335
DA
3103
3104 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
3105 struct rt6_info *sibling, *next_sibling;
3106
16a16cd3
DA
3107 /* prefer to send a single notification with all hops */
3108 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3109 if (skb) {
3110 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3111
d4ead6b3 3112 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3113 NULL, NULL, 0, RTM_DELROUTE,
3114 info->portid, seq, 0) < 0) {
3115 kfree_skb(skb);
3116 skb = NULL;
3117 } else
3118 info->skip_notify = 1;
3119 }
3120
0ae81335
DA
3121 list_for_each_entry_safe(sibling, next_sibling,
3122 &rt->rt6i_siblings,
3123 rt6i_siblings) {
3124 err = fib6_del(sibling, info);
3125 if (err)
e3330039 3126 goto out_unlock;
0ae81335
DA
3127 }
3128 }
3129
3130 err = fib6_del(rt, info);
e3330039 3131out_unlock:
66f5d6ce 3132 spin_unlock_bh(&table->tb6_lock);
e3330039 3133out_put:
93531c67 3134 fib6_info_release(rt);
16a16cd3
DA
3135
3136 if (skb) {
e3330039 3137 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3138 info->nlh, gfp_any());
3139 }
0ae81335
DA
3140 return err;
3141}
3142
23fb93a4
DA
3143static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3144{
3145 int rc = -ESRCH;
3146
3147 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3148 goto out;
3149
3150 if (cfg->fc_flags & RTF_GATEWAY &&
3151 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3152 goto out;
3153 if (dst_hold_safe(&rt->dst))
3154 rc = rt6_remove_exception_rt(rt);
3155out:
3156 return rc;
3157}
3158
333c4301
DA
3159static int ip6_route_del(struct fib6_config *cfg,
3160 struct netlink_ext_ack *extack)
1da177e4 3161{
2b760fcf 3162 struct rt6_info *rt, *rt_cache;
c71099ac 3163 struct fib6_table *table;
1da177e4 3164 struct fib6_node *fn;
1da177e4
LT
3165 int err = -ESRCH;
3166
5578689a 3167 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3168 if (!table) {
3169 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3170 return err;
d5d531cb 3171 }
c71099ac 3172
66f5d6ce 3173 rcu_read_lock();
1da177e4 3174
c71099ac 3175 fn = fib6_locate(&table->tb6_root,
86872cb5 3176 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3177 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3178 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3179
1da177e4 3180 if (fn) {
66f5d6ce 3181 for_each_fib6_node_rt_rcu(fn) {
2b760fcf 3182 if (cfg->fc_flags & RTF_CACHE) {
23fb93a4
DA
3183 int rc;
3184
2b760fcf
WW
3185 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3186 &cfg->fc_src);
23fb93a4
DA
3187 if (rt_cache) {
3188 rc = ip6_del_cached_rt(rt_cache, cfg);
3189 if (rc != -ESRCH)
3190 return rc;
3191 }
3192 continue;
2b760fcf 3193 }
86872cb5 3194 if (cfg->fc_ifindex &&
5e670d84
DA
3195 (!rt->fib6_nh.nh_dev ||
3196 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3197 continue;
86872cb5 3198 if (cfg->fc_flags & RTF_GATEWAY &&
5e670d84 3199 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
1da177e4 3200 continue;
86872cb5 3201 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 3202 continue;
c2ed1880
M
3203 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
3204 continue;
93531c67 3205 fib6_info_hold(rt);
66f5d6ce 3206 rcu_read_unlock();
1da177e4 3207
0ae81335
DA
3208 /* if gateway was specified only delete the one hop */
3209 if (cfg->fc_flags & RTF_GATEWAY)
3210 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3211
3212 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3213 }
3214 }
66f5d6ce 3215 rcu_read_unlock();
1da177e4
LT
3216
3217 return err;
3218}
3219
6700c270 3220static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3221{
a6279458 3222 struct netevent_redirect netevent;
e8599ff4 3223 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3224 struct ndisc_options ndopts;
3225 struct inet6_dev *in6_dev;
3226 struct neighbour *neigh;
71bcdba0 3227 struct rd_msg *msg;
6e157b6a
DM
3228 int optlen, on_link;
3229 u8 *lladdr;
e8599ff4 3230
29a3cad5 3231 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3232 optlen -= sizeof(*msg);
e8599ff4
DM
3233
3234 if (optlen < 0) {
6e157b6a 3235 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3236 return;
3237 }
3238
71bcdba0 3239 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3240
71bcdba0 3241 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3242 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3243 return;
3244 }
3245
6e157b6a 3246 on_link = 0;
71bcdba0 3247 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3248 on_link = 1;
71bcdba0 3249 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3250 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3251 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3252 return;
3253 }
3254
3255 in6_dev = __in6_dev_get(skb->dev);
3256 if (!in6_dev)
3257 return;
3258 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3259 return;
3260
3261 /* RFC2461 8.1:
3262 * The IP source address of the Redirect MUST be the same as the current
3263 * first-hop router for the specified ICMP Destination Address.
3264 */
3265
f997c55c 3266 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3267 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3268 return;
3269 }
6e157b6a
DM
3270
3271 lladdr = NULL;
e8599ff4
DM
3272 if (ndopts.nd_opts_tgt_lladdr) {
3273 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3274 skb->dev);
3275 if (!lladdr) {
3276 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3277 return;
3278 }
3279 }
3280
6e157b6a 3281 rt = (struct rt6_info *) dst;
ec13ad1d 3282 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3283 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3284 return;
6e157b6a 3285 }
e8599ff4 3286
6e157b6a
DM
3287 /* Redirect received -> path was valid.
3288 * Look, redirects are sent only in response to data packets,
3289 * so that this nexthop apparently is reachable. --ANK
3290 */
0dec879f 3291 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3292
71bcdba0 3293 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3294 if (!neigh)
3295 return;
a6279458 3296
1da177e4
LT
3297 /*
3298 * We have finally decided to accept it.
3299 */
3300
f997c55c 3301 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3302 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3303 NEIGH_UPDATE_F_OVERRIDE|
3304 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3305 NEIGH_UPDATE_F_ISROUTER)),
3306 NDISC_REDIRECT, &ndopts);
1da177e4 3307
23fb93a4 3308 nrt = ip6_rt_cache_alloc(rt->from, &msg->dest, NULL);
38308473 3309 if (!nrt)
1da177e4
LT
3310 goto out;
3311
3312 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3313 if (on_link)
3314 nrt->rt6i_flags &= ~RTF_GATEWAY;
3315
b91d5329 3316 nrt->rt6i_protocol = RTPROT_REDIRECT;
4e3fd7a0 3317 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3318
2b760fcf
WW
3319 /* No need to remove rt from the exception table if rt is
3320 * a cached route because rt6_insert_exception() will
3321 * takes care of it
3322 */
d4ead6b3 3323 if (rt6_insert_exception(nrt, rt->from)) {
2b760fcf
WW
3324 dst_release_immediate(&nrt->dst);
3325 goto out;
3326 }
1da177e4 3327
d8d1f30b
CG
3328 netevent.old = &rt->dst;
3329 netevent.new = &nrt->dst;
71bcdba0 3330 netevent.daddr = &msg->dest;
60592833 3331 netevent.neigh = neigh;
8d71740c
TT
3332 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3333
1da177e4 3334out:
e8599ff4 3335 neigh_release(neigh);
6e157b6a
DM
3336}
3337
70ceb4f5 3338#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 3339static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 3340 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3341 const struct in6_addr *gwaddr,
3342 struct net_device *dev)
70ceb4f5 3343{
830218c1
DA
3344 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3345 int ifindex = dev->ifindex;
70ceb4f5
YH
3346 struct fib6_node *fn;
3347 struct rt6_info *rt = NULL;
c71099ac
TG
3348 struct fib6_table *table;
3349
830218c1 3350 table = fib6_get_table(net, tb_id);
38308473 3351 if (!table)
c71099ac 3352 return NULL;
70ceb4f5 3353
66f5d6ce 3354 rcu_read_lock();
38fbeeee 3355 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3356 if (!fn)
3357 goto out;
3358
66f5d6ce 3359 for_each_fib6_node_rt_rcu(fn) {
5e670d84 3360 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
70ceb4f5
YH
3361 continue;
3362 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3363 continue;
5e670d84 3364 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
70ceb4f5 3365 continue;
d3843fe5 3366 ip6_hold_safe(NULL, &rt, false);
70ceb4f5
YH
3367 break;
3368 }
3369out:
66f5d6ce 3370 rcu_read_unlock();
70ceb4f5
YH
3371 return rt;
3372}
3373
efa2cea0 3374static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 3375 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3376 const struct in6_addr *gwaddr,
3377 struct net_device *dev,
95c96174 3378 unsigned int pref)
70ceb4f5 3379{
86872cb5 3380 struct fib6_config cfg = {
238fc7ea 3381 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3382 .fc_ifindex = dev->ifindex,
86872cb5
TG
3383 .fc_dst_len = prefixlen,
3384 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3385 RTF_UP | RTF_PREF(pref),
b91d5329 3386 .fc_protocol = RTPROT_RA,
e8478e80 3387 .fc_type = RTN_UNICAST,
15e47304 3388 .fc_nlinfo.portid = 0,
efa2cea0
DL
3389 .fc_nlinfo.nlh = NULL,
3390 .fc_nlinfo.nl_net = net,
86872cb5
TG
3391 };
3392
830218c1 3393 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3394 cfg.fc_dst = *prefix;
3395 cfg.fc_gateway = *gwaddr;
70ceb4f5 3396
e317da96
YH
3397 /* We should treat it as a default route if prefix length is 0. */
3398 if (!prefixlen)
86872cb5 3399 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3400
acb54e3c 3401 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3402
830218c1 3403 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3404}
3405#endif
3406
afb1d4b5
DA
3407struct rt6_info *rt6_get_dflt_router(struct net *net,
3408 const struct in6_addr *addr,
3409 struct net_device *dev)
1ab1457c 3410{
830218c1 3411 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 3412 struct rt6_info *rt;
c71099ac 3413 struct fib6_table *table;
1da177e4 3414
afb1d4b5 3415 table = fib6_get_table(net, tb_id);
38308473 3416 if (!table)
c71099ac 3417 return NULL;
1da177e4 3418
66f5d6ce
WW
3419 rcu_read_lock();
3420 for_each_fib6_node_rt_rcu(&table->tb6_root) {
5e670d84 3421 if (dev == rt->fib6_nh.nh_dev &&
045927ff 3422 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
5e670d84 3423 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
1da177e4
LT
3424 break;
3425 }
3426 if (rt)
d3843fe5 3427 ip6_hold_safe(NULL, &rt, false);
66f5d6ce 3428 rcu_read_unlock();
1da177e4
LT
3429 return rt;
3430}
3431
afb1d4b5
DA
3432struct rt6_info *rt6_add_dflt_router(struct net *net,
3433 const struct in6_addr *gwaddr,
ebacaaa0
YH
3434 struct net_device *dev,
3435 unsigned int pref)
1da177e4 3436{
86872cb5 3437 struct fib6_config cfg = {
ca254490 3438 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3439 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3440 .fc_ifindex = dev->ifindex,
3441 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3442 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3443 .fc_protocol = RTPROT_RA,
e8478e80 3444 .fc_type = RTN_UNICAST,
15e47304 3445 .fc_nlinfo.portid = 0,
5578689a 3446 .fc_nlinfo.nlh = NULL,
afb1d4b5 3447 .fc_nlinfo.nl_net = net,
86872cb5 3448 };
1da177e4 3449
4e3fd7a0 3450 cfg.fc_gateway = *gwaddr;
1da177e4 3451
acb54e3c 3452 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3453 struct fib6_table *table;
3454
3455 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3456 if (table)
3457 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3458 }
1da177e4 3459
afb1d4b5 3460 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3461}
3462
afb1d4b5
DA
3463static void __rt6_purge_dflt_routers(struct net *net,
3464 struct fib6_table *table)
1da177e4
LT
3465{
3466 struct rt6_info *rt;
3467
3468restart:
66f5d6ce
WW
3469 rcu_read_lock();
3470 for_each_fib6_node_rt_rcu(&table->tb6_root) {
3e8b0ac3
LC
3471 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3472 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
93531c67
DA
3473 fib6_info_hold(rt);
3474 rcu_read_unlock();
3475 ip6_del_rt(net, rt);
1da177e4
LT
3476 goto restart;
3477 }
3478 }
66f5d6ce 3479 rcu_read_unlock();
830218c1
DA
3480
3481 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3482}
3483
3484void rt6_purge_dflt_routers(struct net *net)
3485{
3486 struct fib6_table *table;
3487 struct hlist_head *head;
3488 unsigned int h;
3489
3490 rcu_read_lock();
3491
3492 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3493 head = &net->ipv6.fib_table_hash[h];
3494 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3495 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3496 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3497 }
3498 }
3499
3500 rcu_read_unlock();
1da177e4
LT
3501}
3502
5578689a
DL
3503static void rtmsg_to_fib6_config(struct net *net,
3504 struct in6_rtmsg *rtmsg,
86872cb5
TG
3505 struct fib6_config *cfg)
3506{
3507 memset(cfg, 0, sizeof(*cfg));
3508
ca254490
DA
3509 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3510 : RT6_TABLE_MAIN;
86872cb5
TG
3511 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3512 cfg->fc_metric = rtmsg->rtmsg_metric;
3513 cfg->fc_expires = rtmsg->rtmsg_info;
3514 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3515 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3516 cfg->fc_flags = rtmsg->rtmsg_flags;
e8478e80 3517 cfg->fc_type = rtmsg->rtmsg_type;
86872cb5 3518
5578689a 3519 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3520
4e3fd7a0
AD
3521 cfg->fc_dst = rtmsg->rtmsg_dst;
3522 cfg->fc_src = rtmsg->rtmsg_src;
3523 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3524}
3525
5578689a 3526int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3527{
86872cb5 3528 struct fib6_config cfg;
1da177e4
LT
3529 struct in6_rtmsg rtmsg;
3530 int err;
3531
67ba4152 3532 switch (cmd) {
1da177e4
LT
3533 case SIOCADDRT: /* Add a route */
3534 case SIOCDELRT: /* Delete a route */
af31f412 3535 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3536 return -EPERM;
3537 err = copy_from_user(&rtmsg, arg,
3538 sizeof(struct in6_rtmsg));
3539 if (err)
3540 return -EFAULT;
86872cb5 3541
5578689a 3542 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3543
1da177e4
LT
3544 rtnl_lock();
3545 switch (cmd) {
3546 case SIOCADDRT:
acb54e3c 3547 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3548 break;
3549 case SIOCDELRT:
333c4301 3550 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3551 break;
3552 default:
3553 err = -EINVAL;
3554 }
3555 rtnl_unlock();
3556
3557 return err;
3ff50b79 3558 }
1da177e4
LT
3559
3560 return -EINVAL;
3561}
3562
3563/*
3564 * Drop the packet on the floor
3565 */
3566
d5fdd6ba 3567static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3568{
612f09e8 3569 int type;
adf30907 3570 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3571 switch (ipstats_mib_noroutes) {
3572 case IPSTATS_MIB_INNOROUTES:
0660e03f 3573 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3574 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3575 IP6_INC_STATS(dev_net(dst->dev),
3576 __in6_dev_get_safely(skb->dev),
3bd653c8 3577 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3578 break;
3579 }
3580 /* FALLTHROUGH */
3581 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3582 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3583 ipstats_mib_noroutes);
612f09e8
YH
3584 break;
3585 }
3ffe533c 3586 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3587 kfree_skb(skb);
3588 return 0;
3589}
3590
9ce8ade0
TG
3591static int ip6_pkt_discard(struct sk_buff *skb)
3592{
612f09e8 3593 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3594}
3595
ede2059d 3596static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3597{
adf30907 3598 skb->dev = skb_dst(skb)->dev;
612f09e8 3599 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3600}
3601
9ce8ade0
TG
3602static int ip6_pkt_prohibit(struct sk_buff *skb)
3603{
612f09e8 3604 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3605}
3606
ede2059d 3607static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3608{
adf30907 3609 skb->dev = skb_dst(skb)->dev;
612f09e8 3610 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3611}
3612
1da177e4
LT
3613/*
3614 * Allocate a dst for local (unicast / anycast) address.
3615 */
3616
afb1d4b5
DA
3617struct rt6_info *addrconf_dst_alloc(struct net *net,
3618 struct inet6_dev *idev,
1da177e4 3619 const struct in6_addr *addr,
acb54e3c 3620 bool anycast, gfp_t gfp_flags)
1da177e4 3621{
ca254490 3622 u32 tb_id;
4832c30d 3623 struct net_device *dev = idev->dev;
5f02ce24
DA
3624 struct rt6_info *rt;
3625
93531c67 3626 rt = fib6_info_alloc(gfp_flags);
a3300ef4 3627 if (!rt)
1da177e4
LT
3628 return ERR_PTR(-ENOMEM);
3629
3b6761d1
DA
3630 rt->dst_nocount = true;
3631
1da177e4 3632 in6_dev_hold(idev);
1da177e4 3633 rt->rt6i_idev = idev;
1da177e4 3634
3b6761d1 3635 rt->dst_host = true;
94b5e0f9 3636 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 3637 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
e8478e80
DA
3638 if (anycast) {
3639 rt->fib6_type = RTN_ANYCAST;
58c4fb86 3640 rt->rt6i_flags |= RTF_ANYCAST;
e8478e80
DA
3641 } else {
3642 rt->fib6_type = RTN_LOCAL;
1da177e4 3643 rt->rt6i_flags |= RTF_LOCAL;
e8478e80 3644 }
1da177e4 3645
5e670d84 3646 rt->fib6_nh.nh_gw = *addr;
93531c67 3647 dev_hold(dev);
5e670d84 3648 rt->fib6_nh.nh_dev = dev;
4e3fd7a0 3649 rt->rt6i_dst.addr = *addr;
1da177e4 3650 rt->rt6i_dst.plen = 128;
ca254490
DA
3651 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3652 rt->rt6i_table = fib6_get_table(net, tb_id);
1da177e4 3653
1da177e4
LT
3654 return rt;
3655}
3656
c3968a85
DW
3657/* remove deleted ip from prefsrc entries */
3658struct arg_dev_net_ip {
3659 struct net_device *dev;
3660 struct net *net;
3661 struct in6_addr *addr;
3662};
3663
3664static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3665{
3666 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3667 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3668 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3669
5e670d84 3670 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
421842ed 3671 rt != net->ipv6.fib6_null_entry &&
c3968a85 3672 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
60006a48 3673 spin_lock_bh(&rt6_exception_lock);
c3968a85
DW
3674 /* remove prefsrc entry */
3675 rt->rt6i_prefsrc.plen = 0;
60006a48
WW
3676 /* need to update cache as well */
3677 rt6_exceptions_remove_prefsrc(rt);
3678 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3679 }
3680 return 0;
3681}
3682
3683void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3684{
3685 struct net *net = dev_net(ifp->idev->dev);
3686 struct arg_dev_net_ip adni = {
3687 .dev = ifp->idev->dev,
3688 .net = net,
3689 .addr = &ifp->addr,
3690 };
0c3584d5 3691 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3692}
3693
be7a010d 3694#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3695
3696/* Remove routers and update dst entries when gateway turn into host. */
3697static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3698{
3699 struct in6_addr *gateway = (struct in6_addr *)arg;
3700
2b760fcf 3701 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
5e670d84 3702 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
be7a010d
DJ
3703 return -1;
3704 }
b16cb459
WW
3705
3706 /* Further clean up cached routes in exception table.
3707 * This is needed because cached route may have a different
3708 * gateway than its 'parent' in the case of an ip redirect.
3709 */
3710 rt6_exceptions_clean_tohost(rt, gateway);
3711
be7a010d
DJ
3712 return 0;
3713}
3714
3715void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3716{
3717 fib6_clean_all(net, fib6_clean_tohost, gateway);
3718}
3719
2127d95a
IS
3720struct arg_netdev_event {
3721 const struct net_device *dev;
4c981e28
IS
3722 union {
3723 unsigned int nh_flags;
3724 unsigned long event;
3725 };
2127d95a
IS
3726};
3727
d7dedee1
IS
3728static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
3729{
3730 struct rt6_info *iter;
3731 struct fib6_node *fn;
3732
3733 fn = rcu_dereference_protected(rt->rt6i_node,
3734 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3735 iter = rcu_dereference_protected(fn->leaf,
3736 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3737 while (iter) {
3738 if (iter->rt6i_metric == rt->rt6i_metric &&
3739 rt6_qualify_for_ecmp(iter))
3740 return iter;
3741 iter = rcu_dereference_protected(iter->rt6_next,
3742 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3743 }
3744
3745 return NULL;
3746}
3747
3748static bool rt6_is_dead(const struct rt6_info *rt)
3749{
5e670d84
DA
3750 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3751 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d7dedee1
IS
3752 rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3753 return true;
3754
3755 return false;
3756}
3757
3758static int rt6_multipath_total_weight(const struct rt6_info *rt)
3759{
3760 struct rt6_info *iter;
3761 int total = 0;
3762
3763 if (!rt6_is_dead(rt))
5e670d84 3764 total += rt->fib6_nh.nh_weight;
d7dedee1
IS
3765
3766 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
3767 if (!rt6_is_dead(iter))
5e670d84 3768 total += iter->fib6_nh.nh_weight;
d7dedee1
IS
3769 }
3770
3771 return total;
3772}
3773
3774static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
3775{
3776 int upper_bound = -1;
3777
3778 if (!rt6_is_dead(rt)) {
5e670d84 3779 *weight += rt->fib6_nh.nh_weight;
d7dedee1
IS
3780 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3781 total) - 1;
3782 }
5e670d84 3783 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
d7dedee1
IS
3784}
3785
3786static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
3787{
3788 struct rt6_info *iter;
3789 int weight = 0;
3790
3791 rt6_upper_bound_set(rt, &weight, total);
3792
3793 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3794 rt6_upper_bound_set(iter, &weight, total);
3795}
3796
3797void rt6_multipath_rebalance(struct rt6_info *rt)
3798{
3799 struct rt6_info *first;
3800 int total;
3801
3802 /* In case the entire multipath route was marked for flushing,
3803 * then there is no need to rebalance upon the removal of every
3804 * sibling route.
3805 */
3806 if (!rt->rt6i_nsiblings || rt->should_flush)
3807 return;
3808
3809 /* During lookup routes are evaluated in order, so we need to
3810 * make sure upper bounds are assigned from the first sibling
3811 * onwards.
3812 */
3813 first = rt6_multipath_first_sibling(rt);
3814 if (WARN_ON_ONCE(!first))
3815 return;
3816
3817 total = rt6_multipath_total_weight(first);
3818 rt6_multipath_upper_bound_set(first, total);
3819}
3820
2127d95a
IS
3821static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3822{
3823 const struct arg_netdev_event *arg = p_arg;
7aef6859 3824 struct net *net = dev_net(arg->dev);
2127d95a 3825
421842ed 3826 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
5e670d84 3827 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
7aef6859 3828 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3829 rt6_multipath_rebalance(rt);
1de178ed 3830 }
2127d95a
IS
3831
3832 return 0;
3833}
3834
3835void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3836{
3837 struct arg_netdev_event arg = {
3838 .dev = dev,
6802f3ad
IS
3839 {
3840 .nh_flags = nh_flags,
3841 },
2127d95a
IS
3842 };
3843
3844 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3845 arg.nh_flags |= RTNH_F_LINKDOWN;
3846
3847 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3848}
3849
1de178ed
IS
3850static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
3851 const struct net_device *dev)
3852{
3853 struct rt6_info *iter;
3854
5e670d84 3855 if (rt->fib6_nh.nh_dev == dev)
1de178ed
IS
3856 return true;
3857 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
5e670d84 3858 if (iter->fib6_nh.nh_dev == dev)
1de178ed
IS
3859 return true;
3860
3861 return false;
3862}
3863
3864static void rt6_multipath_flush(struct rt6_info *rt)
3865{
3866 struct rt6_info *iter;
3867
3868 rt->should_flush = 1;
3869 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3870 iter->should_flush = 1;
3871}
3872
3873static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
3874 const struct net_device *down_dev)
3875{
3876 struct rt6_info *iter;
3877 unsigned int dead = 0;
3878
5e670d84
DA
3879 if (rt->fib6_nh.nh_dev == down_dev ||
3880 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
3881 dead++;
3882 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
5e670d84
DA
3883 if (iter->fib6_nh.nh_dev == down_dev ||
3884 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
3885 dead++;
3886
3887 return dead;
3888}
3889
3890static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
3891 const struct net_device *dev,
3892 unsigned int nh_flags)
3893{
3894 struct rt6_info *iter;
3895
5e670d84
DA
3896 if (rt->fib6_nh.nh_dev == dev)
3897 rt->fib6_nh.nh_flags |= nh_flags;
1de178ed 3898 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
5e670d84
DA
3899 if (iter->fib6_nh.nh_dev == dev)
3900 iter->fib6_nh.nh_flags |= nh_flags;
1de178ed
IS
3901}
3902
a1a22c12 3903/* called with write lock held for table with rt */
4c981e28 3904static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
1da177e4 3905{
4c981e28
IS
3906 const struct arg_netdev_event *arg = p_arg;
3907 const struct net_device *dev = arg->dev;
7aef6859 3908 struct net *net = dev_net(dev);
8ed67789 3909
421842ed 3910 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
3911 return 0;
3912
3913 switch (arg->event) {
3914 case NETDEV_UNREGISTER:
5e670d84 3915 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
27c6fa73 3916 case NETDEV_DOWN:
1de178ed 3917 if (rt->should_flush)
27c6fa73 3918 return -1;
1de178ed 3919 if (!rt->rt6i_nsiblings)
5e670d84 3920 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
1de178ed
IS
3921 if (rt6_multipath_uses_dev(rt, dev)) {
3922 unsigned int count;
3923
3924 count = rt6_multipath_dead_count(rt, dev);
3925 if (rt->rt6i_nsiblings + 1 == count) {
3926 rt6_multipath_flush(rt);
3927 return -1;
3928 }
3929 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3930 RTNH_F_LINKDOWN);
7aef6859 3931 fib6_update_sernum(net, rt);
d7dedee1 3932 rt6_multipath_rebalance(rt);
1de178ed
IS
3933 }
3934 return -2;
27c6fa73 3935 case NETDEV_CHANGE:
5e670d84 3936 if (rt->fib6_nh.nh_dev != dev ||
1de178ed 3937 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 3938 break;
5e670d84 3939 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 3940 rt6_multipath_rebalance(rt);
27c6fa73 3941 break;
2b241361 3942 }
c159d30c 3943
1da177e4
LT
3944 return 0;
3945}
3946
27c6fa73 3947void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 3948{
4c981e28 3949 struct arg_netdev_event arg = {
8ed67789 3950 .dev = dev,
6802f3ad
IS
3951 {
3952 .event = event,
3953 },
8ed67789
DL
3954 };
3955
4c981e28
IS
3956 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
3957}
3958
3959void rt6_disable_ip(struct net_device *dev, unsigned long event)
3960{
3961 rt6_sync_down_dev(dev, event);
3962 rt6_uncached_list_flush_dev(dev_net(dev), dev);
3963 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
3964}
3965
95c96174 3966struct rt6_mtu_change_arg {
1da177e4 3967 struct net_device *dev;
95c96174 3968 unsigned int mtu;
1da177e4
LT
3969};
3970
3971static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3972{
3973 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3974 struct inet6_dev *idev;
3975
3976 /* In IPv6 pmtu discovery is not optional,
3977 so that RTAX_MTU lock cannot disable it.
3978 We still use this lock to block changes
3979 caused by addrconf/ndisc.
3980 */
3981
3982 idev = __in6_dev_get(arg->dev);
38308473 3983 if (!idev)
1da177e4
LT
3984 return 0;
3985
3986 /* For administrative MTU increase, there is no way to discover
3987 IPv6 PMTU increase, so PMTU increase should be updated here.
3988 Since RFC 1981 doesn't include administrative MTU increase
3989 update PMTU increase is a MUST. (i.e. jumbo frame)
3990 */
5e670d84 3991 if (rt->fib6_nh.nh_dev == arg->dev &&
d4ead6b3
DA
3992 !fib6_metric_locked(rt, RTAX_MTU)) {
3993 u32 mtu = rt->fib6_pmtu;
3994
3995 if (mtu >= arg->mtu ||
3996 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
3997 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
3998
f5bbe7ee 3999 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4000 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4001 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4002 }
1da177e4
LT
4003 return 0;
4004}
4005
95c96174 4006void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4007{
c71099ac
TG
4008 struct rt6_mtu_change_arg arg = {
4009 .dev = dev,
4010 .mtu = mtu,
4011 };
1da177e4 4012
0c3584d5 4013 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4014}
4015
ef7c79ed 4016static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4017 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 4018 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4019 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4020 [RTA_PRIORITY] = { .type = NLA_U32 },
4021 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4022 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4023 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4024 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4025 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4026 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4027 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4028 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
4029};
4030
4031static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4032 struct fib6_config *cfg,
4033 struct netlink_ext_ack *extack)
1da177e4 4034{
86872cb5
TG
4035 struct rtmsg *rtm;
4036 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4037 unsigned int pref;
86872cb5 4038 int err;
1da177e4 4039
fceb6435
JB
4040 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4041 NULL);
86872cb5
TG
4042 if (err < 0)
4043 goto errout;
1da177e4 4044
86872cb5
TG
4045 err = -EINVAL;
4046 rtm = nlmsg_data(nlh);
4047 memset(cfg, 0, sizeof(*cfg));
4048
4049 cfg->fc_table = rtm->rtm_table;
4050 cfg->fc_dst_len = rtm->rtm_dst_len;
4051 cfg->fc_src_len = rtm->rtm_src_len;
4052 cfg->fc_flags = RTF_UP;
4053 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 4054 cfg->fc_type = rtm->rtm_type;
86872cb5 4055
ef2c7d7b
ND
4056 if (rtm->rtm_type == RTN_UNREACHABLE ||
4057 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4058 rtm->rtm_type == RTN_PROHIBIT ||
4059 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4060 cfg->fc_flags |= RTF_REJECT;
4061
ab79ad14
4062 if (rtm->rtm_type == RTN_LOCAL)
4063 cfg->fc_flags |= RTF_LOCAL;
4064
1f56a01f
MKL
4065 if (rtm->rtm_flags & RTM_F_CLONED)
4066 cfg->fc_flags |= RTF_CACHE;
4067
fc1e64e1
DA
4068 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4069
15e47304 4070 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 4071 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 4072 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
4073
4074 if (tb[RTA_GATEWAY]) {
67b61f6c 4075 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4076 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4077 }
86872cb5
TG
4078
4079 if (tb[RTA_DST]) {
4080 int plen = (rtm->rtm_dst_len + 7) >> 3;
4081
4082 if (nla_len(tb[RTA_DST]) < plen)
4083 goto errout;
4084
4085 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4086 }
86872cb5
TG
4087
4088 if (tb[RTA_SRC]) {
4089 int plen = (rtm->rtm_src_len + 7) >> 3;
4090
4091 if (nla_len(tb[RTA_SRC]) < plen)
4092 goto errout;
4093
4094 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4095 }
86872cb5 4096
c3968a85 4097 if (tb[RTA_PREFSRC])
67b61f6c 4098 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4099
86872cb5
TG
4100 if (tb[RTA_OIF])
4101 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4102
4103 if (tb[RTA_PRIORITY])
4104 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4105
4106 if (tb[RTA_METRICS]) {
4107 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4108 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4109 }
86872cb5
TG
4110
4111 if (tb[RTA_TABLE])
4112 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4113
51ebd318
ND
4114 if (tb[RTA_MULTIPATH]) {
4115 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4116 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4117
4118 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4119 cfg->fc_mp_len, extack);
9ed59592
DA
4120 if (err < 0)
4121 goto errout;
51ebd318
ND
4122 }
4123
c78ba6d6
LR
4124 if (tb[RTA_PREF]) {
4125 pref = nla_get_u8(tb[RTA_PREF]);
4126 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4127 pref != ICMPV6_ROUTER_PREF_HIGH)
4128 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4129 cfg->fc_flags |= RTF_PREF(pref);
4130 }
4131
19e42e45
RP
4132 if (tb[RTA_ENCAP])
4133 cfg->fc_encap = tb[RTA_ENCAP];
4134
9ed59592 4135 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4136 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4137
c255bd68 4138 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4139 if (err < 0)
4140 goto errout;
4141 }
4142
32bc201e
XL
4143 if (tb[RTA_EXPIRES]) {
4144 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4145
4146 if (addrconf_finite_timeout(timeout)) {
4147 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4148 cfg->fc_flags |= RTF_EXPIRES;
4149 }
4150 }
4151
86872cb5
TG
4152 err = 0;
4153errout:
4154 return err;
1da177e4
LT
4155}
4156
6b9ea5a6
RP
4157struct rt6_nh {
4158 struct rt6_info *rt6_info;
4159 struct fib6_config r_cfg;
6b9ea5a6
RP
4160 struct list_head next;
4161};
4162
4163static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4164{
4165 struct rt6_nh *nh;
4166
4167 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4168 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4169 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4170 nh->r_cfg.fc_ifindex);
4171 }
4172}
4173
d4ead6b3
DA
4174static int ip6_route_info_append(struct net *net,
4175 struct list_head *rt6_nh_list,
6b9ea5a6
RP
4176 struct rt6_info *rt, struct fib6_config *r_cfg)
4177{
4178 struct rt6_nh *nh;
6b9ea5a6
RP
4179 int err = -EEXIST;
4180
4181 list_for_each_entry(nh, rt6_nh_list, next) {
4182 /* check if rt6_info already exists */
f06b7549 4183 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
6b9ea5a6
RP
4184 return err;
4185 }
4186
4187 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4188 if (!nh)
4189 return -ENOMEM;
4190 nh->rt6_info = rt;
d4ead6b3 4191 err = ip6_convert_metrics(net, rt, r_cfg);
6b9ea5a6
RP
4192 if (err) {
4193 kfree(nh);
4194 return err;
4195 }
4196 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4197 list_add_tail(&nh->next, rt6_nh_list);
4198
4199 return 0;
4200}
4201
3b1137fe
DA
4202static void ip6_route_mpath_notify(struct rt6_info *rt,
4203 struct rt6_info *rt_last,
4204 struct nl_info *info,
4205 __u16 nlflags)
4206{
4207 /* if this is an APPEND route, then rt points to the first route
4208 * inserted and rt_last points to last route inserted. Userspace
4209 * wants a consistent dump of the route which starts at the first
4210 * nexthop. Since sibling routes are always added at the end of
4211 * the list, find the first sibling of the last route appended
4212 */
4213 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
4214 rt = list_first_entry(&rt_last->rt6i_siblings,
4215 struct rt6_info,
4216 rt6i_siblings);
4217 }
4218
4219 if (rt)
4220 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4221}
4222
333c4301
DA
4223static int ip6_route_multipath_add(struct fib6_config *cfg,
4224 struct netlink_ext_ack *extack)
51ebd318 4225{
3b1137fe
DA
4226 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
4227 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4228 struct fib6_config r_cfg;
4229 struct rtnexthop *rtnh;
6b9ea5a6
RP
4230 struct rt6_info *rt;
4231 struct rt6_nh *err_nh;
4232 struct rt6_nh *nh, *nh_safe;
3b1137fe 4233 __u16 nlflags;
51ebd318
ND
4234 int remaining;
4235 int attrlen;
6b9ea5a6
RP
4236 int err = 1;
4237 int nhn = 0;
4238 int replace = (cfg->fc_nlinfo.nlh &&
4239 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4240 LIST_HEAD(rt6_nh_list);
51ebd318 4241
3b1137fe
DA
4242 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4243 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4244 nlflags |= NLM_F_APPEND;
4245
35f1b4e9 4246 remaining = cfg->fc_mp_len;
51ebd318 4247 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4248
6b9ea5a6
RP
4249 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4250 * rt6_info structs per nexthop
4251 */
51ebd318
ND
4252 while (rtnh_ok(rtnh, remaining)) {
4253 memcpy(&r_cfg, cfg, sizeof(*cfg));
4254 if (rtnh->rtnh_ifindex)
4255 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4256
4257 attrlen = rtnh_attrlen(rtnh);
4258 if (attrlen > 0) {
4259 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4260
4261 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4262 if (nla) {
67b61f6c 4263 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4264 r_cfg.fc_flags |= RTF_GATEWAY;
4265 }
19e42e45
RP
4266 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4267 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4268 if (nla)
4269 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4270 }
6b9ea5a6 4271
68e2ffde 4272 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4273 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4274 if (IS_ERR(rt)) {
4275 err = PTR_ERR(rt);
4276 rt = NULL;
6b9ea5a6 4277 goto cleanup;
8c5b83f0 4278 }
6b9ea5a6 4279
5e670d84 4280 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
398958ae 4281
d4ead6b3
DA
4282 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4283 rt, &r_cfg);
51ebd318 4284 if (err) {
93531c67 4285 fib6_info_release(rt);
6b9ea5a6
RP
4286 goto cleanup;
4287 }
4288
4289 rtnh = rtnh_next(rtnh, &remaining);
4290 }
4291
3b1137fe
DA
4292 /* for add and replace send one notification with all nexthops.
4293 * Skip the notification in fib6_add_rt2node and send one with
4294 * the full route when done
4295 */
4296 info->skip_notify = 1;
4297
6b9ea5a6
RP
4298 err_nh = NULL;
4299 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe 4300 rt_last = nh->rt6_info;
d4ead6b3 4301 err = __ip6_ins_rt(nh->rt6_info, info, extack);
93531c67
DA
4302 fib6_info_release(nh->rt6_info);
4303
3b1137fe
DA
4304 /* save reference to first route for notification */
4305 if (!rt_notif && !err)
4306 rt_notif = nh->rt6_info;
4307
6b9ea5a6
RP
4308 /* nh->rt6_info is used or freed at this point, reset to NULL*/
4309 nh->rt6_info = NULL;
4310 if (err) {
4311 if (replace && nhn)
4312 ip6_print_replace_route_err(&rt6_nh_list);
4313 err_nh = nh;
4314 goto add_errout;
51ebd318 4315 }
6b9ea5a6 4316
1a72418b 4317 /* Because each route is added like a single route we remove
27596472
MK
4318 * these flags after the first nexthop: if there is a collision,
4319 * we have already failed to add the first nexthop:
4320 * fib6_add_rt2node() has rejected it; when replacing, old
4321 * nexthops have been replaced by first new, the rest should
4322 * be added to it.
1a72418b 4323 */
27596472
MK
4324 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4325 NLM_F_REPLACE);
6b9ea5a6
RP
4326 nhn++;
4327 }
4328
3b1137fe
DA
4329 /* success ... tell user about new route */
4330 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4331 goto cleanup;
4332
4333add_errout:
3b1137fe
DA
4334 /* send notification for routes that were added so that
4335 * the delete notifications sent by ip6_route_del are
4336 * coherent
4337 */
4338 if (rt_notif)
4339 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4340
6b9ea5a6
RP
4341 /* Delete routes that were already added */
4342 list_for_each_entry(nh, &rt6_nh_list, next) {
4343 if (err_nh == nh)
4344 break;
333c4301 4345 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4346 }
4347
4348cleanup:
4349 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
587fea74 4350 if (nh->rt6_info)
93531c67 4351 fib6_info_release(nh->rt6_info);
6b9ea5a6
RP
4352 list_del(&nh->next);
4353 kfree(nh);
4354 }
4355
4356 return err;
4357}
4358
333c4301
DA
4359static int ip6_route_multipath_del(struct fib6_config *cfg,
4360 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4361{
4362 struct fib6_config r_cfg;
4363 struct rtnexthop *rtnh;
4364 int remaining;
4365 int attrlen;
4366 int err = 1, last_err = 0;
4367
4368 remaining = cfg->fc_mp_len;
4369 rtnh = (struct rtnexthop *)cfg->fc_mp;
4370
4371 /* Parse a Multipath Entry */
4372 while (rtnh_ok(rtnh, remaining)) {
4373 memcpy(&r_cfg, cfg, sizeof(*cfg));
4374 if (rtnh->rtnh_ifindex)
4375 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4376
4377 attrlen = rtnh_attrlen(rtnh);
4378 if (attrlen > 0) {
4379 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4380
4381 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4382 if (nla) {
4383 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4384 r_cfg.fc_flags |= RTF_GATEWAY;
4385 }
4386 }
333c4301 4387 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4388 if (err)
4389 last_err = err;
4390
51ebd318
ND
4391 rtnh = rtnh_next(rtnh, &remaining);
4392 }
4393
4394 return last_err;
4395}
4396
c21ef3e3
DA
4397static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4398 struct netlink_ext_ack *extack)
1da177e4 4399{
86872cb5
TG
4400 struct fib6_config cfg;
4401 int err;
1da177e4 4402
333c4301 4403 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4404 if (err < 0)
4405 return err;
4406
51ebd318 4407 if (cfg.fc_mp)
333c4301 4408 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4409 else {
4410 cfg.fc_delete_all_nh = 1;
333c4301 4411 return ip6_route_del(&cfg, extack);
0ae81335 4412 }
1da177e4
LT
4413}
4414
c21ef3e3
DA
4415static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4416 struct netlink_ext_ack *extack)
1da177e4 4417{
86872cb5
TG
4418 struct fib6_config cfg;
4419 int err;
1da177e4 4420
333c4301 4421 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4422 if (err < 0)
4423 return err;
4424
51ebd318 4425 if (cfg.fc_mp)
333c4301 4426 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4427 else
acb54e3c 4428 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4429}
4430
beb1afac 4431static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 4432{
beb1afac
DA
4433 int nexthop_len = 0;
4434
4435 if (rt->rt6i_nsiblings) {
4436 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4437 + NLA_ALIGN(sizeof(struct rtnexthop))
4438 + nla_total_size(16) /* RTA_GATEWAY */
5e670d84 4439 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
beb1afac
DA
4440
4441 nexthop_len *= rt->rt6i_nsiblings;
4442 }
4443
339bf98f
TG
4444 return NLMSG_ALIGN(sizeof(struct rtmsg))
4445 + nla_total_size(16) /* RTA_SRC */
4446 + nla_total_size(16) /* RTA_DST */
4447 + nla_total_size(16) /* RTA_GATEWAY */
4448 + nla_total_size(16) /* RTA_PREFSRC */
4449 + nla_total_size(4) /* RTA_TABLE */
4450 + nla_total_size(4) /* RTA_IIF */
4451 + nla_total_size(4) /* RTA_OIF */
4452 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4453 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4454 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4455 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4456 + nla_total_size(1) /* RTA_PREF */
5e670d84 4457 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
beb1afac
DA
4458 + nexthop_len;
4459}
4460
4461static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 4462 unsigned int *flags, bool skip_oif)
beb1afac 4463{
5e670d84 4464 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
f9d882ea
IS
4465 *flags |= RTNH_F_DEAD;
4466
5e670d84 4467 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
beb1afac
DA
4468 *flags |= RTNH_F_LINKDOWN;
4469 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
4470 *flags |= RTNH_F_DEAD;
4471 }
4472
4473 if (rt->rt6i_flags & RTF_GATEWAY) {
5e670d84 4474 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
beb1afac
DA
4475 goto nla_put_failure;
4476 }
4477
5e670d84
DA
4478 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4479 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4480 *flags |= RTNH_F_OFFLOAD;
4481
5be083ce 4482 /* not needed for multipath encoding b/c it has a rtnexthop struct */
5e670d84
DA
4483 if (!skip_oif && rt->fib6_nh.nh_dev &&
4484 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
beb1afac
DA
4485 goto nla_put_failure;
4486
5e670d84
DA
4487 if (rt->fib6_nh.nh_lwtstate &&
4488 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
beb1afac
DA
4489 goto nla_put_failure;
4490
4491 return 0;
4492
4493nla_put_failure:
4494 return -EMSGSIZE;
4495}
4496
5be083ce 4497/* add multipath next hop */
beb1afac
DA
4498static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4499{
5e670d84 4500 const struct net_device *dev = rt->fib6_nh.nh_dev;
beb1afac
DA
4501 struct rtnexthop *rtnh;
4502 unsigned int flags = 0;
4503
4504 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4505 if (!rtnh)
4506 goto nla_put_failure;
4507
5e670d84
DA
4508 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4509 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
beb1afac 4510
5be083ce 4511 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4512 goto nla_put_failure;
4513
4514 rtnh->rtnh_flags = flags;
4515
4516 /* length of rtnetlink header + attributes */
4517 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4518
4519 return 0;
4520
4521nla_put_failure:
4522 return -EMSGSIZE;
339bf98f
TG
4523}
4524
d4ead6b3
DA
4525static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4526 struct rt6_info *rt, struct dst_entry *dst,
4527 struct in6_addr *dest, struct in6_addr *src,
15e47304 4528 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4529 unsigned int flags)
1da177e4
LT
4530{
4531 struct rtmsg *rtm;
2d7202bf 4532 struct nlmsghdr *nlh;
d4ead6b3
DA
4533 long expires = 0;
4534 u32 *pmetrics;
9e762a4a 4535 u32 table;
1da177e4 4536
15e47304 4537 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4538 if (!nlh)
26932566 4539 return -EMSGSIZE;
2d7202bf
TG
4540
4541 rtm = nlmsg_data(nlh);
1da177e4
LT
4542 rtm->rtm_family = AF_INET6;
4543 rtm->rtm_dst_len = rt->rt6i_dst.plen;
4544 rtm->rtm_src_len = rt->rt6i_src.plen;
4545 rtm->rtm_tos = 0;
c71099ac 4546 if (rt->rt6i_table)
9e762a4a 4547 table = rt->rt6i_table->tb6_id;
c71099ac 4548 else
9e762a4a
PM
4549 table = RT6_TABLE_UNSPEC;
4550 rtm->rtm_table = table;
c78679e8
DM
4551 if (nla_put_u32(skb, RTA_TABLE, table))
4552 goto nla_put_failure;
e8478e80
DA
4553
4554 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4555 rtm->rtm_flags = 0;
4556 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4557 rtm->rtm_protocol = rt->rt6i_protocol;
1da177e4 4558
38308473 4559 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
4560 rtm->rtm_flags |= RTM_F_CLONED;
4561
d4ead6b3
DA
4562 if (dest) {
4563 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4564 goto nla_put_failure;
1ab1457c 4565 rtm->rtm_dst_len = 128;
1da177e4 4566 } else if (rtm->rtm_dst_len)
930345ea 4567 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 4568 goto nla_put_failure;
1da177e4
LT
4569#ifdef CONFIG_IPV6_SUBTREES
4570 if (src) {
930345ea 4571 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4572 goto nla_put_failure;
1ab1457c 4573 rtm->rtm_src_len = 128;
c78679e8 4574 } else if (rtm->rtm_src_len &&
930345ea 4575 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 4576 goto nla_put_failure;
1da177e4 4577#endif
7bc570c8
YH
4578 if (iif) {
4579#ifdef CONFIG_IPV6_MROUTE
4580 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
4581 int err = ip6mr_get_route(net, skb, rtm, portid);
4582
4583 if (err == 0)
4584 return 0;
4585 if (err < 0)
4586 goto nla_put_failure;
7bc570c8
YH
4587 } else
4588#endif
c78679e8
DM
4589 if (nla_put_u32(skb, RTA_IIF, iif))
4590 goto nla_put_failure;
d4ead6b3 4591 } else if (dest) {
1da177e4 4592 struct in6_addr saddr_buf;
d4ead6b3 4593 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4594 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4595 goto nla_put_failure;
1da177e4 4596 }
2d7202bf 4597
c3968a85
DW
4598 if (rt->rt6i_prefsrc.plen) {
4599 struct in6_addr saddr_buf;
4e3fd7a0 4600 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 4601 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4602 goto nla_put_failure;
c3968a85
DW
4603 }
4604
d4ead6b3
DA
4605 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4606 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4607 goto nla_put_failure;
4608
c78679e8
DM
4609 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4610 goto nla_put_failure;
8253947e 4611
beb1afac
DA
4612 /* For multipath routes, walk the siblings list and add
4613 * each as a nexthop within RTA_MULTIPATH.
4614 */
4615 if (rt->rt6i_nsiblings) {
4616 struct rt6_info *sibling, *next_sibling;
4617 struct nlattr *mp;
4618
4619 mp = nla_nest_start(skb, RTA_MULTIPATH);
4620 if (!mp)
4621 goto nla_put_failure;
4622
4623 if (rt6_add_nexthop(skb, rt) < 0)
4624 goto nla_put_failure;
4625
4626 list_for_each_entry_safe(sibling, next_sibling,
4627 &rt->rt6i_siblings, rt6i_siblings) {
4628 if (rt6_add_nexthop(skb, sibling) < 0)
4629 goto nla_put_failure;
4630 }
4631
4632 nla_nest_end(skb, mp);
4633 } else {
5be083ce 4634 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4635 goto nla_put_failure;
4636 }
4637
14895687
DA
4638 if (rt->rt6i_flags & RTF_EXPIRES) {
4639 expires = dst ? dst->expires : rt->expires;
4640 expires -= jiffies;
4641 }
69cdf8f9 4642
d4ead6b3 4643 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4644 goto nla_put_failure;
2d7202bf 4645
c78ba6d6
LR
4646 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4647 goto nla_put_failure;
4648
19e42e45 4649
053c095a
JB
4650 nlmsg_end(skb, nlh);
4651 return 0;
2d7202bf
TG
4652
4653nla_put_failure:
26932566
PM
4654 nlmsg_cancel(skb, nlh);
4655 return -EMSGSIZE;
1da177e4
LT
4656}
4657
1b43af54 4658int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
4659{
4660 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4661 struct net *net = arg->net;
4662
421842ed 4663 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4664 return 0;
1da177e4 4665
2d7202bf
TG
4666 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4667 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4668
4669 /* user wants prefix routes only */
4670 if (rtm->rtm_flags & RTM_F_PREFIX &&
4671 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4672 /* success since this is not a prefix route */
4673 return 1;
4674 }
4675 }
1da177e4 4676
d4ead6b3
DA
4677 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4678 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4679 arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
1da177e4
LT
4680}
4681
c21ef3e3
DA
4682static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4683 struct netlink_ext_ack *extack)
1da177e4 4684{
3b1e0a65 4685 struct net *net = sock_net(in_skb->sk);
ab364a6f 4686 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
4687 int err, iif = 0, oif = 0;
4688 struct dst_entry *dst;
ab364a6f 4689 struct rt6_info *rt;
1da177e4 4690 struct sk_buff *skb;
ab364a6f 4691 struct rtmsg *rtm;
4c9483b2 4692 struct flowi6 fl6;
18c3a61c 4693 bool fibmatch;
1da177e4 4694
fceb6435 4695 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4696 extack);
ab364a6f
TG
4697 if (err < 0)
4698 goto errout;
1da177e4 4699
ab364a6f 4700 err = -EINVAL;
4c9483b2 4701 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4702 rtm = nlmsg_data(nlh);
4703 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4704 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4705
ab364a6f
TG
4706 if (tb[RTA_SRC]) {
4707 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4708 goto errout;
4709
4e3fd7a0 4710 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4711 }
4712
4713 if (tb[RTA_DST]) {
4714 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4715 goto errout;
4716
4e3fd7a0 4717 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4718 }
4719
4720 if (tb[RTA_IIF])
4721 iif = nla_get_u32(tb[RTA_IIF]);
4722
4723 if (tb[RTA_OIF])
72331bc0 4724 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4725
2e47b291
LC
4726 if (tb[RTA_MARK])
4727 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4728
622ec2c9
LC
4729 if (tb[RTA_UID])
4730 fl6.flowi6_uid = make_kuid(current_user_ns(),
4731 nla_get_u32(tb[RTA_UID]));
4732 else
4733 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4734
1da177e4
LT
4735 if (iif) {
4736 struct net_device *dev;
72331bc0
SL
4737 int flags = 0;
4738
121622db
FW
4739 rcu_read_lock();
4740
4741 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4742 if (!dev) {
121622db 4743 rcu_read_unlock();
1da177e4 4744 err = -ENODEV;
ab364a6f 4745 goto errout;
1da177e4 4746 }
72331bc0
SL
4747
4748 fl6.flowi6_iif = iif;
4749
4750 if (!ipv6_addr_any(&fl6.saddr))
4751 flags |= RT6_LOOKUP_F_HAS_SADDR;
4752
b75cc8f9 4753 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4754
4755 rcu_read_unlock();
72331bc0
SL
4756 } else {
4757 fl6.flowi6_oif = oif;
4758
58acfd71 4759 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4760 }
4761
18c3a61c
RP
4762
4763 rt = container_of(dst, struct rt6_info, dst);
4764 if (rt->dst.error) {
4765 err = rt->dst.error;
4766 ip6_rt_put(rt);
4767 goto errout;
1da177e4
LT
4768 }
4769
9d6acb3b
WC
4770 if (rt == net->ipv6.ip6_null_entry) {
4771 err = rt->dst.error;
4772 ip6_rt_put(rt);
4773 goto errout;
4774 }
4775
ab364a6f 4776 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4777 if (!skb) {
94e187c0 4778 ip6_rt_put(rt);
ab364a6f
TG
4779 err = -ENOBUFS;
4780 goto errout;
4781 }
1da177e4 4782
d8d1f30b 4783 skb_dst_set(skb, &rt->dst);
18c3a61c 4784 if (fibmatch)
93531c67 4785 err = rt6_fill_node(net, skb, rt->from, NULL, NULL, NULL, iif,
18c3a61c
RP
4786 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4787 nlh->nlmsg_seq, 0);
4788 else
93531c67
DA
4789 err = rt6_fill_node(net, skb, rt->from, dst,
4790 &fl6.daddr, &fl6.saddr, iif, RTM_NEWROUTE,
d4ead6b3
DA
4791 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4792 0);
1da177e4 4793 if (err < 0) {
ab364a6f
TG
4794 kfree_skb(skb);
4795 goto errout;
1da177e4
LT
4796 }
4797
15e47304 4798 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4799errout:
1da177e4 4800 return err;
1da177e4
LT
4801}
4802
37a1d361
RP
4803void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4804 unsigned int nlm_flags)
1da177e4
LT
4805{
4806 struct sk_buff *skb;
5578689a 4807 struct net *net = info->nl_net;
528c4ceb
DL
4808 u32 seq;
4809 int err;
4810
4811 err = -ENOBUFS;
38308473 4812 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4813
19e42e45 4814 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4815 if (!skb)
21713ebc
TG
4816 goto errout;
4817
d4ead6b3
DA
4818 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4819 event, info->portid, seq, nlm_flags);
26932566
PM
4820 if (err < 0) {
4821 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4822 WARN_ON(err == -EMSGSIZE);
4823 kfree_skb(skb);
4824 goto errout;
4825 }
15e47304 4826 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4827 info->nlh, gfp_any());
4828 return;
21713ebc
TG
4829errout:
4830 if (err < 0)
5578689a 4831 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4832}
4833
8ed67789 4834static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4835 unsigned long event, void *ptr)
8ed67789 4836{
351638e7 4837 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4838 struct net *net = dev_net(dev);
8ed67789 4839
242d3a49
WC
4840 if (!(dev->flags & IFF_LOOPBACK))
4841 return NOTIFY_OK;
4842
4843 if (event == NETDEV_REGISTER) {
421842ed
DA
4844 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
4845 net->ipv6.fib6_null_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4846 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4847 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4848#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4849 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4850 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4851 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4852 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4853#endif
76da0704
WC
4854 } else if (event == NETDEV_UNREGISTER &&
4855 dev->reg_state != NETREG_UNREGISTERED) {
4856 /* NETDEV_UNREGISTER could be fired for multiple times by
4857 * netdev_wait_allrefs(). Make sure we only call this once.
4858 */
421842ed 4859 in6_dev_put_clear(&net->ipv6.fib6_null_entry->rt6i_idev);
12d94a80 4860 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4861#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4862 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4863 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4864#endif
4865 }
4866
4867 return NOTIFY_OK;
4868}
4869
1da177e4
LT
4870/*
4871 * /proc
4872 */
4873
4874#ifdef CONFIG_PROC_FS
4875
33120b30 4876static const struct file_operations ipv6_route_proc_fops = {
33120b30
AD
4877 .open = ipv6_route_open,
4878 .read = seq_read,
4879 .llseek = seq_lseek,
8d2ca1d7 4880 .release = seq_release_net,
33120b30
AD
4881};
4882
1da177e4
LT
4883static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4884{
69ddb805 4885 struct net *net = (struct net *)seq->private;
1da177e4 4886 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4887 net->ipv6.rt6_stats->fib_nodes,
4888 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 4889 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
4890 net->ipv6.rt6_stats->fib_rt_entries,
4891 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4892 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4893 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4894
4895 return 0;
4896}
4897
4898static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4899{
de05c557 4900 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4901}
4902
9a32144e 4903static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4904 .open = rt6_stats_seq_open,
4905 .read = seq_read,
4906 .llseek = seq_lseek,
b6fcbdb4 4907 .release = single_release_net,
1da177e4
LT
4908};
4909#endif /* CONFIG_PROC_FS */
4910
4911#ifdef CONFIG_SYSCTL
4912
1da177e4 4913static
fe2c6338 4914int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4915 void __user *buffer, size_t *lenp, loff_t *ppos)
4916{
c486da34
LAG
4917 struct net *net;
4918 int delay;
4919 if (!write)
1da177e4 4920 return -EINVAL;
c486da34
LAG
4921
4922 net = (struct net *)ctl->extra1;
4923 delay = net->ipv6.sysctl.flush_delay;
4924 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4925 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4926 return 0;
1da177e4
LT
4927}
4928
fe2c6338 4929struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4930 {
1da177e4 4931 .procname = "flush",
4990509f 4932 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4933 .maxlen = sizeof(int),
89c8b3a1 4934 .mode = 0200,
6d9f239a 4935 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4936 },
4937 {
1da177e4 4938 .procname = "gc_thresh",
9a7ec3a9 4939 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4940 .maxlen = sizeof(int),
4941 .mode = 0644,
6d9f239a 4942 .proc_handler = proc_dointvec,
1da177e4
LT
4943 },
4944 {
1da177e4 4945 .procname = "max_size",
4990509f 4946 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
4947 .maxlen = sizeof(int),
4948 .mode = 0644,
6d9f239a 4949 .proc_handler = proc_dointvec,
1da177e4
LT
4950 },
4951 {
1da177e4 4952 .procname = "gc_min_interval",
4990509f 4953 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4954 .maxlen = sizeof(int),
4955 .mode = 0644,
6d9f239a 4956 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4957 },
4958 {
1da177e4 4959 .procname = "gc_timeout",
4990509f 4960 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
4961 .maxlen = sizeof(int),
4962 .mode = 0644,
6d9f239a 4963 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4964 },
4965 {
1da177e4 4966 .procname = "gc_interval",
4990509f 4967 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
4968 .maxlen = sizeof(int),
4969 .mode = 0644,
6d9f239a 4970 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4971 },
4972 {
1da177e4 4973 .procname = "gc_elasticity",
4990509f 4974 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
4975 .maxlen = sizeof(int),
4976 .mode = 0644,
f3d3f616 4977 .proc_handler = proc_dointvec,
1da177e4
LT
4978 },
4979 {
1da177e4 4980 .procname = "mtu_expires",
4990509f 4981 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
4982 .maxlen = sizeof(int),
4983 .mode = 0644,
6d9f239a 4984 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4985 },
4986 {
1da177e4 4987 .procname = "min_adv_mss",
4990509f 4988 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
4989 .maxlen = sizeof(int),
4990 .mode = 0644,
f3d3f616 4991 .proc_handler = proc_dointvec,
1da177e4
LT
4992 },
4993 {
1da177e4 4994 .procname = "gc_min_interval_ms",
4990509f 4995 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4996 .maxlen = sizeof(int),
4997 .mode = 0644,
6d9f239a 4998 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 4999 },
f8572d8f 5000 { }
1da177e4
LT
5001};
5002
2c8c1e72 5003struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5004{
5005 struct ctl_table *table;
5006
5007 table = kmemdup(ipv6_route_table_template,
5008 sizeof(ipv6_route_table_template),
5009 GFP_KERNEL);
5ee09105
YH
5010
5011 if (table) {
5012 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5013 table[0].extra1 = net;
86393e52 5014 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5015 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5016 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5017 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5018 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5019 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5020 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5021 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5022 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
5023
5024 /* Don't export sysctls to unprivileged users */
5025 if (net->user_ns != &init_user_ns)
5026 table[0].procname = NULL;
5ee09105
YH
5027 }
5028
760f2d01
DL
5029 return table;
5030}
1da177e4
LT
5031#endif
5032
2c8c1e72 5033static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5034{
633d424b 5035 int ret = -ENOMEM;
8ed67789 5036
86393e52
AD
5037 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5038 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5039
fc66f95c
ED
5040 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5041 goto out_ip6_dst_ops;
5042
421842ed
DA
5043 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5044 sizeof(*net->ipv6.fib6_null_entry),
5045 GFP_KERNEL);
5046 if (!net->ipv6.fib6_null_entry)
5047 goto out_ip6_dst_entries;
5048
8ed67789
DL
5049 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5050 sizeof(*net->ipv6.ip6_null_entry),
5051 GFP_KERNEL);
5052 if (!net->ipv6.ip6_null_entry)
421842ed 5053 goto out_fib6_null_entry;
d8d1f30b 5054 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5055 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5056 ip6_template_metrics, true);
8ed67789
DL
5057
5058#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5059 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5060 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5061 sizeof(*net->ipv6.ip6_prohibit_entry),
5062 GFP_KERNEL);
68fffc67
PZ
5063 if (!net->ipv6.ip6_prohibit_entry)
5064 goto out_ip6_null_entry;
d8d1f30b 5065 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5066 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5067 ip6_template_metrics, true);
8ed67789
DL
5068
5069 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5070 sizeof(*net->ipv6.ip6_blk_hole_entry),
5071 GFP_KERNEL);
68fffc67
PZ
5072 if (!net->ipv6.ip6_blk_hole_entry)
5073 goto out_ip6_prohibit_entry;
d8d1f30b 5074 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5075 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5076 ip6_template_metrics, true);
8ed67789
DL
5077#endif
5078
b339a47c
PZ
5079 net->ipv6.sysctl.flush_delay = 0;
5080 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5081 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5082 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5083 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5084 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5085 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5086 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5087
6891a346
BT
5088 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5089
8ed67789
DL
5090 ret = 0;
5091out:
5092 return ret;
f2fc6a54 5093
68fffc67
PZ
5094#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5095out_ip6_prohibit_entry:
5096 kfree(net->ipv6.ip6_prohibit_entry);
5097out_ip6_null_entry:
5098 kfree(net->ipv6.ip6_null_entry);
5099#endif
421842ed
DA
5100out_fib6_null_entry:
5101 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5102out_ip6_dst_entries:
5103 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5104out_ip6_dst_ops:
f2fc6a54 5105 goto out;
cdb18761
DL
5106}
5107
2c8c1e72 5108static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5109{
421842ed 5110 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5111 kfree(net->ipv6.ip6_null_entry);
5112#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5113 kfree(net->ipv6.ip6_prohibit_entry);
5114 kfree(net->ipv6.ip6_blk_hole_entry);
5115#endif
41bb78b4 5116 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5117}
5118
d189634e
TG
5119static int __net_init ip6_route_net_init_late(struct net *net)
5120{
5121#ifdef CONFIG_PROC_FS
d4beaa66 5122 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
d6444062 5123 proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
5124#endif
5125 return 0;
5126}
5127
5128static void __net_exit ip6_route_net_exit_late(struct net *net)
5129{
5130#ifdef CONFIG_PROC_FS
ece31ffd
G
5131 remove_proc_entry("ipv6_route", net->proc_net);
5132 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5133#endif
5134}
5135
cdb18761
DL
5136static struct pernet_operations ip6_route_net_ops = {
5137 .init = ip6_route_net_init,
5138 .exit = ip6_route_net_exit,
5139};
5140
c3426b47
DM
5141static int __net_init ipv6_inetpeer_init(struct net *net)
5142{
5143 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5144
5145 if (!bp)
5146 return -ENOMEM;
5147 inet_peer_base_init(bp);
5148 net->ipv6.peers = bp;
5149 return 0;
5150}
5151
5152static void __net_exit ipv6_inetpeer_exit(struct net *net)
5153{
5154 struct inet_peer_base *bp = net->ipv6.peers;
5155
5156 net->ipv6.peers = NULL;
56a6b248 5157 inetpeer_invalidate_tree(bp);
c3426b47
DM
5158 kfree(bp);
5159}
5160
2b823f72 5161static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5162 .init = ipv6_inetpeer_init,
5163 .exit = ipv6_inetpeer_exit,
5164};
5165
d189634e
TG
5166static struct pernet_operations ip6_route_net_late_ops = {
5167 .init = ip6_route_net_init_late,
5168 .exit = ip6_route_net_exit_late,
5169};
5170
8ed67789
DL
5171static struct notifier_block ip6_route_dev_notifier = {
5172 .notifier_call = ip6_route_dev_notify,
242d3a49 5173 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5174};
5175
2f460933
WC
5176void __init ip6_route_init_special_entries(void)
5177{
5178 /* Registering of the loopback is done before this portion of code,
5179 * the loopback reference in rt6_info will not be taken, do it
5180 * manually for init_net */
421842ed
DA
5181 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
5182 init_net.ipv6.fib6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2f460933
WC
5183 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5184 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5185 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5186 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5187 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5188 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5189 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5190 #endif
5191}
5192
433d49c3 5193int __init ip6_route_init(void)
1da177e4 5194{
433d49c3 5195 int ret;
8d0b94af 5196 int cpu;
433d49c3 5197
9a7ec3a9
DL
5198 ret = -ENOMEM;
5199 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5200 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5201 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5202 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5203 goto out;
14e50e57 5204
fc66f95c 5205 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5206 if (ret)
bdb3289f 5207 goto out_kmem_cache;
bdb3289f 5208
c3426b47
DM
5209 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5210 if (ret)
e8803b6c 5211 goto out_dst_entries;
2a0c451a 5212
7e52b33b
DM
5213 ret = register_pernet_subsys(&ip6_route_net_ops);
5214 if (ret)
5215 goto out_register_inetpeer;
c3426b47 5216
5dc121e9
AE
5217 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5218
e8803b6c 5219 ret = fib6_init();
433d49c3 5220 if (ret)
8ed67789 5221 goto out_register_subsys;
433d49c3 5222
433d49c3
DL
5223 ret = xfrm6_init();
5224 if (ret)
e8803b6c 5225 goto out_fib6_init;
c35b7e72 5226
433d49c3
DL
5227 ret = fib6_rules_init();
5228 if (ret)
5229 goto xfrm6_init;
7e5449c2 5230
d189634e
TG
5231 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5232 if (ret)
5233 goto fib6_rules_init;
5234
16feebcf
FW
5235 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5236 inet6_rtm_newroute, NULL, 0);
5237 if (ret < 0)
5238 goto out_register_late_subsys;
5239
5240 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5241 inet6_rtm_delroute, NULL, 0);
5242 if (ret < 0)
5243 goto out_register_late_subsys;
5244
5245 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5246 inet6_rtm_getroute, NULL,
5247 RTNL_FLAG_DOIT_UNLOCKED);
5248 if (ret < 0)
d189634e 5249 goto out_register_late_subsys;
c127ea2c 5250
8ed67789 5251 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5252 if (ret)
d189634e 5253 goto out_register_late_subsys;
8ed67789 5254
8d0b94af
MKL
5255 for_each_possible_cpu(cpu) {
5256 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5257
5258 INIT_LIST_HEAD(&ul->head);
5259 spin_lock_init(&ul->lock);
5260 }
5261
433d49c3
DL
5262out:
5263 return ret;
5264
d189634e 5265out_register_late_subsys:
16feebcf 5266 rtnl_unregister_all(PF_INET6);
d189634e 5267 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5268fib6_rules_init:
433d49c3
DL
5269 fib6_rules_cleanup();
5270xfrm6_init:
433d49c3 5271 xfrm6_fini();
2a0c451a
TG
5272out_fib6_init:
5273 fib6_gc_cleanup();
8ed67789
DL
5274out_register_subsys:
5275 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5276out_register_inetpeer:
5277 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5278out_dst_entries:
5279 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5280out_kmem_cache:
f2fc6a54 5281 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5282 goto out;
1da177e4
LT
5283}
5284
5285void ip6_route_cleanup(void)
5286{
8ed67789 5287 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5288 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5289 fib6_rules_cleanup();
1da177e4 5290 xfrm6_fini();
1da177e4 5291 fib6_gc_cleanup();
c3426b47 5292 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5293 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5294 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5295 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5296}