Linux 6.10-rc3
[linux-2.6-block.git] / net / ipv6 / route.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * Linux INET6 implementation
4 * FIB front-end.
5 *
6 * Authors:
1ab1457c 7 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4
LT
8 */
9
10/* Changes:
11 *
12 * YOSHIFUJI Hideaki @USAGI
13 * reworked default router selection.
14 * - respect outgoing interface
15 * - select from (probably) reachable routers (i.e.
16 * routers in REACHABLE, STALE, DELAY or PROBE states).
17 * - always select the same router if it is (probably)
18 * reachable. otherwise, round-robin the list.
c0bece9f
YH
19 * Ville Nuorvala
20 * Fixed routing subtrees.
1da177e4
LT
21 */
22
f3213831
JP
23#define pr_fmt(fmt) "IPv6: " fmt
24
4fc268d2 25#include <linux/capability.h>
1da177e4 26#include <linux/errno.h>
bc3b2d7f 27#include <linux/export.h>
1da177e4
LT
28#include <linux/types.h>
29#include <linux/times.h>
30#include <linux/socket.h>
31#include <linux/sockios.h>
32#include <linux/net.h>
33#include <linux/route.h>
34#include <linux/netdevice.h>
35#include <linux/in6.h>
7bc570c8 36#include <linux/mroute6.h>
1da177e4 37#include <linux/init.h>
1da177e4 38#include <linux/if_arp.h>
1da177e4
LT
39#include <linux/proc_fs.h>
40#include <linux/seq_file.h>
5b7c931d 41#include <linux/nsproxy.h>
5a0e3ad6 42#include <linux/slab.h>
35732d01 43#include <linux/jhash.h>
4785305c 44#include <linux/siphash.h>
457c4cbc 45#include <net/net_namespace.h>
1da177e4
LT
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
904af04d 55#include <net/dst_metadata.h>
1da177e4 56#include <net/xfrm.h>
8d71740c 57#include <net/netevent.h>
21713ebc 58#include <net/netlink.h>
3c618c1d 59#include <net/rtnh.h>
19e42e45 60#include <net/lwtunnel.h>
904af04d 61#include <net/ip_tunnels.h>
ca254490 62#include <net/l3mdev.h>
eacb9384 63#include <net/ip.h>
7c0f6ba6 64#include <linux/uaccess.h>
951cf368 65#include <linux/btf_ids.h>
1da177e4
LT
66
67#ifdef CONFIG_SYSCTL
68#include <linux/sysctl.h>
69#endif
70
30d444d3
DA
71static int ip6_rt_type_to_error(u8 fib6_type);
72
73#define CREATE_TRACE_POINTS
74#include <trace/events/fib6.h>
75EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
76#undef CREATE_TRACE_POINTS
77
afc154e9 78enum rt6_nud_state {
7e980569
JB
79 RT6_NUD_FAIL_HARD = -3,
80 RT6_NUD_FAIL_PROBE = -2,
81 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
82 RT6_NUD_SUCCEED = 1
83};
84
bbd807df
BV
85INDIRECT_CALLABLE_SCOPE
86struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 87static unsigned int ip6_default_advmss(const struct dst_entry *dst);
f67fbeae
BV
88INDIRECT_CALLABLE_SCOPE
89unsigned int ip6_mtu(const struct dst_entry *dst);
92f1655a
ED
90static void ip6_negative_advice(struct sock *sk,
91 struct dst_entry *dst);
1da177e4
LT
92static void ip6_dst_destroy(struct dst_entry *);
93static void ip6_dst_ifdown(struct dst_entry *,
43c28172 94 struct net_device *dev);
af6d1034 95static void ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
96
97static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 98static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 99static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 100static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 101static void ip6_link_failure(struct sk_buff *skb);
6700c270 102static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
bd085ef6
HL
103 struct sk_buff *skb, u32 mtu,
104 bool confirm_neigh);
6700c270
DM
105static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
106 struct sk_buff *skb);
702cea56
DA
107static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
108 int strict);
a1b7a1f0 109static size_t rt6_nlmsg_size(struct fib6_info *f6i);
d4ead6b3 110static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 111 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 112 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
113 int iif, int type, u32 portid, u32 seq,
114 unsigned int flags);
7e4b5128 115static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
510e2ced
WW
116 const struct in6_addr *daddr,
117 const struct in6_addr *saddr);
1da177e4 118
70ceb4f5 119#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 120static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 121 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
122 const struct in6_addr *gwaddr,
123 struct net_device *dev,
95c96174 124 unsigned int pref);
8d1c802b 125static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 126 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
127 const struct in6_addr *gwaddr,
128 struct net_device *dev);
70ceb4f5
YH
129#endif
130
8d0b94af
MKL
131struct uncached_list {
132 spinlock_t lock;
133 struct list_head head;
ba55ef81 134 struct list_head quarantine;
8d0b94af
MKL
135};
136
137static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
138
510c321b 139void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
140{
141 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
142
d288a162 143 rt->dst.rt_uncached_list = ul;
8d0b94af
MKL
144
145 spin_lock_bh(&ul->lock);
d288a162 146 list_add_tail(&rt->dst.rt_uncached, &ul->head);
8d0b94af
MKL
147 spin_unlock_bh(&ul->lock);
148}
149
510c321b 150void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af 151{
d288a162
WG
152 if (!list_empty(&rt->dst.rt_uncached)) {
153 struct uncached_list *ul = rt->dst.rt_uncached_list;
8d0b94af
MKL
154
155 spin_lock_bh(&ul->lock);
d288a162 156 list_del_init(&rt->dst.rt_uncached);
8d0b94af
MKL
157 spin_unlock_bh(&ul->lock);
158 }
159}
160
e5f80fcf 161static void rt6_uncached_list_flush_dev(struct net_device *dev)
8d0b94af 162{
8d0b94af
MKL
163 int cpu;
164
165 for_each_possible_cpu(cpu) {
166 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
ba55ef81
ED
167 struct rt6_info *rt, *safe;
168
169 if (list_empty(&ul->head))
170 continue;
8d0b94af
MKL
171
172 spin_lock_bh(&ul->lock);
d288a162 173 list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) {
8d0b94af
MKL
174 struct inet6_dev *rt_idev = rt->rt6i_idev;
175 struct net_device *rt_dev = rt->dst.dev;
ba55ef81 176 bool handled = false;
8d0b94af 177
e332bc67 178 if (rt_idev->dev == dev) {
e5f80fcf 179 rt->rt6i_idev = in6_dev_get(blackhole_netdev);
8d0b94af 180 in6_dev_put(rt_idev);
ba55ef81 181 handled = true;
8d0b94af
MKL
182 }
183
e332bc67 184 if (rt_dev == dev) {
8d7017fd 185 rt->dst.dev = blackhole_netdev;
d62607c3
JK
186 netdev_ref_replace(rt_dev, blackhole_netdev,
187 &rt->dst.dev_tracker,
188 GFP_ATOMIC);
ba55ef81 189 handled = true;
8d0b94af 190 }
ba55ef81 191 if (handled)
d288a162 192 list_move(&rt->dst.rt_uncached,
ba55ef81 193 &ul->quarantine);
8d0b94af
MKL
194 }
195 spin_unlock_bh(&ul->lock);
196 }
197}
198
f8a1b43b 199static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
200 struct sk_buff *skb,
201 const void *daddr)
39232973 202{
a7563f34 203 if (!ipv6_addr_any(p))
39232973 204 return (const void *) p;
f894cbf8
DM
205 else if (skb)
206 return &ipv6_hdr(skb)->daddr;
39232973
DM
207 return daddr;
208}
209
f8a1b43b
DA
210struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
211 struct net_device *dev,
212 struct sk_buff *skb,
213 const void *daddr)
d3aaeb38 214{
39232973
DM
215 struct neighbour *n;
216
f8a1b43b
DA
217 daddr = choose_neigh_daddr(gw, skb, daddr);
218 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
219 if (n)
220 return n;
7adf3246
SB
221
222 n = neigh_create(&nd_tbl, daddr, dev);
223 return IS_ERR(n) ? NULL : n;
f8a1b43b
DA
224}
225
226static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
227 struct sk_buff *skb,
228 const void *daddr)
229{
e8dfd42c 230 const struct rt6_info *rt = dst_rt6_info(dst);
f8a1b43b 231
2c6b55f4
ND
232 return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
233 dst->dev, skb, daddr);
f83c7790
DM
234}
235
63fca65d
JA
236static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
237{
e8dfd42c 238 const struct rt6_info *rt = dst_rt6_info(dst);
63fca65d 239 struct net_device *dev = dst->dev;
63fca65d 240
cbfd6891 241 daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
63fca65d
JA
242 if (!daddr)
243 return;
244 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
245 return;
246 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
247 return;
248 __ipv6_confirm_neigh(dev, daddr);
249}
250
9a7ec3a9 251static struct dst_ops ip6_dst_ops_template = {
1da177e4 252 .family = AF_INET6,
1da177e4
LT
253 .gc = ip6_dst_gc,
254 .gc_thresh = 1024,
255 .check = ip6_dst_check,
0dbaee3b 256 .default_advmss = ip6_default_advmss,
ebb762f2 257 .mtu = ip6_mtu,
d4ead6b3 258 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
259 .destroy = ip6_dst_destroy,
260 .ifdown = ip6_dst_ifdown,
261 .negative_advice = ip6_negative_advice,
262 .link_failure = ip6_link_failure,
263 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 264 .redirect = rt6_do_redirect,
9f8955cc 265 .local_out = __ip6_local_out,
f8a1b43b 266 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 267 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
268};
269
14e50e57 270static struct dst_ops ip6_dst_blackhole_ops = {
c4c877b2
DB
271 .family = AF_INET6,
272 .default_advmss = ip6_default_advmss,
273 .neigh_lookup = ip6_dst_neigh_lookup,
274 .check = ip6_dst_check,
275 .destroy = ip6_dst_destroy,
276 .cow_metrics = dst_cow_metrics_generic,
277 .update_pmtu = dst_blackhole_update_pmtu,
278 .redirect = dst_blackhole_redirect,
279 .mtu = dst_blackhole_mtu,
14e50e57
DM
280};
281
62fa8a84 282static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 283 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
284};
285
8d1c802b 286static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
287 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
288 .fib6_protocol = RTPROT_KERNEL,
289 .fib6_metric = ~(u32)0,
f05713e0 290 .fib6_ref = REFCOUNT_INIT(1),
421842ed
DA
291 .fib6_type = RTN_UNREACHABLE,
292 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
293};
294
fb0af4c7 295static const struct rt6_info ip6_null_entry_template = {
d8d1f30b 296 .dst = {
bc9d3a9f 297 .__rcuref = RCUREF_INIT(1),
d8d1f30b 298 .__use = 1,
2c20cbd7 299 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 300 .error = -ENETUNREACH,
d8d1f30b
CG
301 .input = ip6_pkt_discard,
302 .output = ip6_pkt_discard_out,
1da177e4
LT
303 },
304 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
305};
306
101367c2
TG
307#ifdef CONFIG_IPV6_MULTIPLE_TABLES
308
fb0af4c7 309static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b 310 .dst = {
bc9d3a9f 311 .__rcuref = RCUREF_INIT(1),
d8d1f30b 312 .__use = 1,
2c20cbd7 313 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 314 .error = -EACCES,
d8d1f30b
CG
315 .input = ip6_pkt_prohibit,
316 .output = ip6_pkt_prohibit_out,
101367c2
TG
317 },
318 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
319};
320
fb0af4c7 321static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b 322 .dst = {
bc9d3a9f 323 .__rcuref = RCUREF_INIT(1),
d8d1f30b 324 .__use = 1,
2c20cbd7 325 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 326 .error = -EINVAL,
d8d1f30b 327 .input = dst_discard,
ede2059d 328 .output = dst_discard_out,
101367c2
TG
329 },
330 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
331};
332
333#endif
334
ebfa45f0
MKL
335static void rt6_info_init(struct rt6_info *rt)
336{
8f2a83b4 337 memset_after(rt, 0, dst);
ebfa45f0
MKL
338}
339
1da177e4 340/* allocate dst with ip6_dst_ops */
93531c67
DA
341struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
342 int flags)
1da177e4 343{
97bab73f 344 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
762c8dc7 345 DST_OBSOLETE_FORCE_CHK, flags);
cf911662 346
81eb8447 347 if (rt) {
ebfa45f0 348 rt6_info_init(rt);
81eb8447
WW
349 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
350 }
8104891b 351
cf911662 352 return rt;
1da177e4 353}
9ab179d8 354EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 355
1da177e4
LT
356static void ip6_dst_destroy(struct dst_entry *dst)
357{
e8dfd42c 358 struct rt6_info *rt = dst_rt6_info(dst);
a68886a6 359 struct fib6_info *from;
8d0b94af 360 struct inet6_dev *idev;
1da177e4 361
1620a336 362 ip_dst_metrics_put(dst);
8d0b94af
MKL
363 rt6_uncached_list_del(rt);
364
365 idev = rt->rt6i_idev;
38308473 366 if (idev) {
1da177e4
LT
367 rt->rt6i_idev = NULL;
368 in6_dev_put(idev);
1ab1457c 369 }
1716a961 370
0e233874 371 from = xchg((__force struct fib6_info **)&rt->from, NULL);
93531c67 372 fib6_info_release(from);
b3419363
DM
373}
374
43c28172 375static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
1da177e4 376{
e8dfd42c 377 struct rt6_info *rt = dst_rt6_info(dst);
1da177e4
LT
378 struct inet6_dev *idev = rt->rt6i_idev;
379
e5f80fcf
ED
380 if (idev && idev->dev != blackhole_netdev) {
381 struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev);
382
383 if (blackhole_idev) {
384 rt->rt6i_idev = blackhole_idev;
e5645f51 385 in6_dev_put(idev);
97cac082 386 }
1da177e4
LT
387 }
388}
389
5973fb1e
MKL
390static bool __rt6_check_expired(const struct rt6_info *rt)
391{
392 if (rt->rt6i_flags & RTF_EXPIRES)
393 return time_after(jiffies, rt->dst.expires);
394 else
395 return false;
396}
397
a50feda5 398static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 399{
a68886a6
DA
400 struct fib6_info *from;
401
402 from = rcu_dereference(rt->from);
403
1716a961
G
404 if (rt->rt6i_flags & RTF_EXPIRES) {
405 if (time_after(jiffies, rt->dst.expires))
a50feda5 406 return true;
a68886a6 407 } else if (from) {
1e2ea8ad 408 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 409 fib6_check_expired(from);
1716a961 410 }
a50feda5 411 return false;
1da177e4
LT
412}
413
b1d40991
DA
414void fib6_select_path(const struct net *net, struct fib6_result *res,
415 struct flowi6 *fl6, int oif, bool have_oif_match,
416 const struct sk_buff *skb, int strict)
51ebd318 417{
8d1c802b 418 struct fib6_info *sibling, *next_sibling;
b1d40991
DA
419 struct fib6_info *match = res->f6i;
420
34fe5a1c 421 if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
b1d40991 422 goto out;
51ebd318 423
34fe5a1c
DA
424 if (match->nh && have_oif_match && res->nh)
425 return;
426
8423be89
SY
427 if (skb)
428 IP6CB(skb)->flags |= IP6SKB_MULTIPATH;
429
b673d6cc
JS
430 /* We might have already computed the hash for ICMPv6 errors. In such
431 * case it will always be non-zero. Otherwise now is the time to do it.
432 */
f88d8ea6
DA
433 if (!fl6->mp_hash &&
434 (!match->nh || nexthop_is_multipath(match->nh)))
b4bac172 435 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 436
f88d8ea6
DA
437 if (unlikely(match->nh)) {
438 nexthop_path_fib6_result(res, fl6->mp_hash);
439 return;
440 }
441
1cf844c7 442 if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
b1d40991 443 goto out;
3d709f69 444
93c2fb25
DA
445 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
446 fib6_siblings) {
1cf844c7 447 const struct fib6_nh *nh = sibling->fib6_nh;
5e670d84
DA
448 int nh_upper_bound;
449
702cea56 450 nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
5e670d84 451 if (fl6->mp_hash > nh_upper_bound)
3d709f69 452 continue;
702cea56 453 if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
3d709f69
IS
454 break;
455 match = sibling;
456 break;
457 }
458
b1d40991
DA
459out:
460 res->f6i = match;
1cf844c7 461 res->nh = match->fib6_nh;
51ebd318
ND
462}
463
1da177e4 464/*
66f5d6ce 465 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
466 */
467
0c59d006
DA
468static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
469 const struct in6_addr *saddr, int oif, int flags)
470{
471 const struct net_device *dev;
472
473 if (nh->fib_nh_flags & RTNH_F_DEAD)
474 return false;
475
476 dev = nh->fib_nh_dev;
477 if (oif) {
478 if (dev->ifindex == oif)
479 return true;
480 } else {
481 if (ipv6_chk_addr(net, saddr, dev,
482 flags & RT6_LOOKUP_F_IFACE))
483 return true;
484 }
485
486 return false;
487}
488
962b6803
DA
489struct fib6_nh_dm_arg {
490 struct net *net;
491 const struct in6_addr *saddr;
492 int oif;
493 int flags;
494 struct fib6_nh *nh;
495};
496
497static int __rt6_nh_dev_match(struct fib6_nh *nh, void *_arg)
498{
499 struct fib6_nh_dm_arg *arg = _arg;
500
501 arg->nh = nh;
502 return __rt6_device_match(arg->net, nh, arg->saddr, arg->oif,
503 arg->flags);
504}
505
506/* returns fib6_nh from nexthop or NULL */
507static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh,
508 struct fib6_result *res,
509 const struct in6_addr *saddr,
510 int oif, int flags)
511{
512 struct fib6_nh_dm_arg arg = {
513 .net = net,
514 .saddr = saddr,
515 .oif = oif,
516 .flags = flags,
517 };
518
519 if (nexthop_is_blackhole(nh))
520 return NULL;
521
522 if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg))
523 return arg.nh;
524
525 return NULL;
526}
527
75ef7389
DA
528static void rt6_device_match(struct net *net, struct fib6_result *res,
529 const struct in6_addr *saddr, int oif, int flags)
1da177e4 530{
75ef7389
DA
531 struct fib6_info *f6i = res->f6i;
532 struct fib6_info *spf6i;
533 struct fib6_nh *nh;
1da177e4 534
75ef7389 535 if (!oif && ipv6_addr_any(saddr)) {
f88d8ea6
DA
536 if (unlikely(f6i->nh)) {
537 nh = nexthop_fib6_nh(f6i->nh);
538 if (nexthop_is_blackhole(f6i->nh))
539 goto out_blackhole;
540 } else {
541 nh = f6i->fib6_nh;
542 }
7d21fec9
DA
543 if (!(nh->fib_nh_flags & RTNH_F_DEAD))
544 goto out;
75ef7389 545 }
dd3abc4e 546
75ef7389 547 for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
962b6803
DA
548 bool matched = false;
549
550 if (unlikely(spf6i->nh)) {
551 nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr,
552 oif, flags);
553 if (nh)
554 matched = true;
555 } else {
556 nh = spf6i->fib6_nh;
557 if (__rt6_device_match(net, nh, saddr, oif, flags))
558 matched = true;
559 }
560 if (matched) {
75ef7389 561 res->f6i = spf6i;
7d21fec9 562 goto out;
75ef7389 563 }
dd3abc4e 564 }
1da177e4 565
75ef7389
DA
566 if (oif && flags & RT6_LOOKUP_F_IFACE) {
567 res->f6i = net->ipv6.fib6_null_entry;
1cf844c7 568 nh = res->f6i->fib6_nh;
7d21fec9 569 goto out;
75ef7389 570 }
8067bb8c 571
f88d8ea6
DA
572 if (unlikely(f6i->nh)) {
573 nh = nexthop_fib6_nh(f6i->nh);
574 if (nexthop_is_blackhole(f6i->nh))
575 goto out_blackhole;
576 } else {
577 nh = f6i->fib6_nh;
578 }
579
7d21fec9 580 if (nh->fib_nh_flags & RTNH_F_DEAD) {
75ef7389 581 res->f6i = net->ipv6.fib6_null_entry;
1cf844c7 582 nh = res->f6i->fib6_nh;
75ef7389 583 }
7d21fec9
DA
584out:
585 res->nh = nh;
586 res->fib6_type = res->f6i->fib6_type;
587 res->fib6_flags = res->f6i->fib6_flags;
f88d8ea6
DA
588 return;
589
590out_blackhole:
591 res->fib6_flags |= RTF_REJECT;
592 res->fib6_type = RTN_BLACKHOLE;
593 res->nh = nh;
1da177e4
LT
594}
595
27097255 596#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
597struct __rt6_probe_work {
598 struct work_struct work;
599 struct in6_addr target;
600 struct net_device *dev;
fb67510b 601 netdevice_tracker dev_tracker;
c2f17e82
HFS
602};
603
604static void rt6_probe_deferred(struct work_struct *w)
605{
606 struct in6_addr mcaddr;
607 struct __rt6_probe_work *work =
608 container_of(w, struct __rt6_probe_work, work);
609
610 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 611 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
d62607c3 612 netdev_put(work->dev, &work->dev_tracker);
662f5533 613 kfree(work);
c2f17e82
HFS
614}
615
cc3a86c8 616static void rt6_probe(struct fib6_nh *fib6_nh)
27097255 617{
f547fac6 618 struct __rt6_probe_work *work = NULL;
5e670d84 619 const struct in6_addr *nh_gw;
1bef4c22 620 unsigned long last_probe;
f2c31e32 621 struct neighbour *neigh;
5e670d84 622 struct net_device *dev;
f547fac6 623 struct inet6_dev *idev;
5e670d84 624
27097255
YH
625 /*
626 * Okay, this does not seem to be appropriate
627 * for now, however, we need to check if it
628 * is really so; aka Router Reachability Probing.
629 *
630 * Router Reachability Probe MUST be rate-limited
631 * to no more than one per minute.
632 */
004b3942 633 if (!fib6_nh->fib_nh_gw_family)
7ff74a59 634 return;
5e670d84 635
cc3a86c8
DA
636 nh_gw = &fib6_nh->fib_nh_gw6;
637 dev = fib6_nh->fib_nh_dev;
09eed119 638 rcu_read_lock();
1bef4c22 639 last_probe = READ_ONCE(fib6_nh->last_probe);
f547fac6 640 idev = __in6_dev_get(dev);
5e670d84 641 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 642 if (neigh) {
b071af52 643 if (READ_ONCE(neigh->nud_state) & NUD_VALID)
8d6c31bf
MKL
644 goto out;
645
09eed119 646 write_lock_bh(&neigh->lock);
990edb42
MKL
647 if (!(neigh->nud_state & NUD_VALID) &&
648 time_after(jiffies,
e248948a
ED
649 neigh->updated +
650 READ_ONCE(idev->cnf.rtr_probe_interval))) {
990edb42
MKL
651 work = kmalloc(sizeof(*work), GFP_ATOMIC);
652 if (work)
653 __neigh_set_probe_once(neigh);
c2f17e82 654 }
09eed119 655 write_unlock_bh(&neigh->lock);
1bef4c22 656 } else if (time_after(jiffies, last_probe +
e248948a 657 READ_ONCE(idev->cnf.rtr_probe_interval))) {
990edb42 658 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 659 }
990edb42 660
1bef4c22
ED
661 if (!work || cmpxchg(&fib6_nh->last_probe,
662 last_probe, jiffies) != last_probe) {
663 kfree(work);
664 } else {
990edb42 665 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84 666 work->target = *nh_gw;
d62607c3 667 netdev_hold(dev, &work->dev_tracker, GFP_ATOMIC);
5e670d84 668 work->dev = dev;
990edb42
MKL
669 schedule_work(&work->work);
670 }
671
8d6c31bf 672out:
09eed119 673 rcu_read_unlock();
27097255
YH
674}
675#else
cc3a86c8 676static inline void rt6_probe(struct fib6_nh *fib6_nh)
27097255 677{
27097255
YH
678}
679#endif
680
1da177e4 681/*
554cfb7e 682 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 683 */
1ba9a895 684static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
1da177e4 685{
afc154e9 686 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 687 struct neighbour *neigh;
f2c31e32 688
09eed119 689 rcu_read_lock();
1ba9a895
DA
690 neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
691 &fib6_nh->fib_nh_gw6);
145a3621 692 if (neigh) {
c486640a
ED
693 u8 nud_state = READ_ONCE(neigh->nud_state);
694
695 if (nud_state & NUD_VALID)
afc154e9 696 ret = RT6_NUD_SUCCEED;
398bcbeb 697#ifdef CONFIG_IPV6_ROUTER_PREF
c486640a 698 else if (!(nud_state & NUD_FAILED))
afc154e9 699 ret = RT6_NUD_SUCCEED;
7e980569
JB
700 else
701 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 702#endif
afc154e9
HFS
703 } else {
704 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 705 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 706 }
09eed119 707 rcu_read_unlock();
145a3621 708
a5a81f0b 709 return ret;
1da177e4
LT
710}
711
702cea56
DA
712static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
713 int strict)
1da177e4 714{
6e1809a5
DA
715 int m = 0;
716
717 if (!oif || nh->fib_nh_dev->ifindex == oif)
718 m = 2;
1ab1457c 719
77d16f45 720 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 721 return RT6_NUD_FAIL_HARD;
ebacaaa0 722#ifdef CONFIG_IPV6_ROUTER_PREF
702cea56 723 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
ebacaaa0 724#endif
1ba9a895 725 if ((strict & RT6_LOOKUP_F_REACHABLE) &&
702cea56 726 !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
1ba9a895 727 int n = rt6_check_neigh(nh);
afc154e9
HFS
728 if (n < 0)
729 return n;
730 }
554cfb7e
YH
731 return m;
732}
733
28679ed1
DA
734static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
735 int oif, int strict, int *mpri, bool *do_rr)
554cfb7e 736{
afc154e9 737 bool match_do_rr = false;
28679ed1
DA
738 bool rc = false;
739 int m;
35103d11 740
28679ed1 741 if (nh->fib_nh_flags & RTNH_F_DEAD)
8067bb8c
IS
742 goto out;
743
28679ed1
DA
744 if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
745 nh->fib_nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 746 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 747 goto out;
f11e6659 748
28679ed1 749 m = rt6_score_route(nh, fib6_flags, oif, strict);
7e980569 750 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
751 match_do_rr = true;
752 m = 0; /* lowest valid score */
7e980569 753 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 754 goto out;
afc154e9
HFS
755 }
756
757 if (strict & RT6_LOOKUP_F_REACHABLE)
28679ed1 758 rt6_probe(nh);
f11e6659 759
7e980569 760 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 761 if (m > *mpri) {
afc154e9 762 *do_rr = match_do_rr;
f11e6659 763 *mpri = m;
28679ed1 764 rc = true;
f11e6659 765 }
f11e6659 766out:
28679ed1 767 return rc;
f11e6659
DM
768}
769
17a5984e
DA
770struct fib6_nh_frl_arg {
771 u32 flags;
772 int oif;
773 int strict;
774 int *mpri;
775 bool *do_rr;
776 struct fib6_nh *nh;
777};
778
779static int rt6_nh_find_match(struct fib6_nh *nh, void *_arg)
780{
781 struct fib6_nh_frl_arg *arg = _arg;
782
783 arg->nh = nh;
784 return find_match(nh, arg->flags, arg->oif, arg->strict,
785 arg->mpri, arg->do_rr);
786}
787
b7bc4b6a 788static void __find_rr_leaf(struct fib6_info *f6i_start,
30c15f03 789 struct fib6_info *nomatch, u32 metric,
b7bc4b6a 790 struct fib6_result *res, struct fib6_info **cont,
30c15f03 791 int oif, int strict, bool *do_rr, int *mpri)
f11e6659 792{
b7bc4b6a 793 struct fib6_info *f6i;
1da177e4 794
b7bc4b6a
DA
795 for (f6i = f6i_start;
796 f6i && f6i != nomatch;
797 f6i = rcu_dereference(f6i->fib6_next)) {
17a5984e 798 bool matched = false;
30c15f03
DA
799 struct fib6_nh *nh;
800
b7bc4b6a
DA
801 if (cont && f6i->fib6_metric != metric) {
802 *cont = f6i;
30c15f03 803 return;
9fbdcfaf
SK
804 }
805
b7bc4b6a 806 if (fib6_check_expired(f6i))
28679ed1
DA
807 continue;
808
17a5984e
DA
809 if (unlikely(f6i->nh)) {
810 struct fib6_nh_frl_arg arg = {
811 .flags = f6i->fib6_flags,
812 .oif = oif,
813 .strict = strict,
814 .mpri = mpri,
815 .do_rr = do_rr
816 };
817
818 if (nexthop_is_blackhole(f6i->nh)) {
819 res->fib6_flags = RTF_REJECT;
820 res->fib6_type = RTN_BLACKHOLE;
821 res->f6i = f6i;
822 res->nh = nexthop_fib6_nh(f6i->nh);
823 return;
824 }
825 if (nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_find_match,
826 &arg)) {
827 matched = true;
828 nh = arg.nh;
829 }
830 } else {
831 nh = f6i->fib6_nh;
832 if (find_match(nh, f6i->fib6_flags, oif, strict,
833 mpri, do_rr))
834 matched = true;
835 }
836 if (matched) {
b7bc4b6a
DA
837 res->f6i = f6i;
838 res->nh = nh;
7d21fec9
DA
839 res->fib6_flags = f6i->fib6_flags;
840 res->fib6_type = f6i->fib6_type;
b7bc4b6a 841 }
9fbdcfaf 842 }
30c15f03 843}
9fbdcfaf 844
b7bc4b6a
DA
845static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
846 struct fib6_info *rr_head, int oif, int strict,
847 bool *do_rr, struct fib6_result *res)
30c15f03 848{
b7bc4b6a
DA
849 u32 metric = rr_head->fib6_metric;
850 struct fib6_info *cont = NULL;
30c15f03 851 int mpri = -1;
9fbdcfaf 852
b7bc4b6a 853 __find_rr_leaf(rr_head, NULL, metric, res, &cont,
30c15f03 854 oif, strict, do_rr, &mpri);
28679ed1 855
b7bc4b6a 856 __find_rr_leaf(leaf, rr_head, metric, res, &cont,
30c15f03 857 oif, strict, do_rr, &mpri);
9fbdcfaf 858
b7bc4b6a
DA
859 if (res->f6i || !cont)
860 return;
9fbdcfaf 861
b7bc4b6a 862 __find_rr_leaf(cont, NULL, metric, res, NULL,
30c15f03 863 oif, strict, do_rr, &mpri);
f11e6659 864}
1da177e4 865
b7bc4b6a
DA
866static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
867 struct fib6_result *res, int strict)
f11e6659 868{
8d1c802b 869 struct fib6_info *leaf = rcu_dereference(fn->leaf);
b7bc4b6a 870 struct fib6_info *rt0;
afc154e9 871 bool do_rr = false;
17ecf590 872 int key_plen;
1da177e4 873
b7bc4b6a
DA
874 /* make sure this function or its helpers sets f6i */
875 res->f6i = NULL;
876
421842ed 877 if (!leaf || leaf == net->ipv6.fib6_null_entry)
b7bc4b6a 878 goto out;
8d1040e8 879
66f5d6ce 880 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 881 if (!rt0)
66f5d6ce 882 rt0 = leaf;
1da177e4 883
17ecf590
WW
884 /* Double check to make sure fn is not an intermediate node
885 * and fn->leaf does not points to its child's leaf
886 * (This might happen if all routes under fn are deleted from
887 * the tree and fib6_repair_tree() is called on the node.)
888 */
93c2fb25 889 key_plen = rt0->fib6_dst.plen;
17ecf590 890#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
891 if (rt0->fib6_src.plen)
892 key_plen = rt0->fib6_src.plen;
17ecf590
WW
893#endif
894 if (fn->fn_bit != key_plen)
b7bc4b6a 895 goto out;
1da177e4 896
b7bc4b6a 897 find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
afc154e9 898 if (do_rr) {
8fb11a9a 899 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
f11e6659 900
554cfb7e 901 /* no entries matched; do round-robin */
93c2fb25 902 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 903 next = leaf;
f11e6659 904
66f5d6ce 905 if (next != rt0) {
93c2fb25 906 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 907 /* make sure next is not being deleted from the tree */
93c2fb25 908 if (next->fib6_node)
66f5d6ce 909 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 910 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 911 }
1da177e4 912 }
1da177e4 913
b7bc4b6a
DA
914out:
915 if (!res->f6i) {
916 res->f6i = net->ipv6.fib6_null_entry;
1cf844c7 917 res->nh = res->f6i->fib6_nh;
7d21fec9
DA
918 res->fib6_flags = res->f6i->fib6_flags;
919 res->fib6_type = res->f6i->fib6_type;
b7bc4b6a 920 }
1da177e4
LT
921}
922
85bd05de 923static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
8b9df265 924{
85bd05de
DA
925 return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
926 res->nh->fib_nh_gw_family;
8b9df265
MKL
927}
928
70ceb4f5
YH
929#ifdef CONFIG_IPV6_ROUTE_INFO
930int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 931 const struct in6_addr *gwaddr)
70ceb4f5 932{
c346dca1 933 struct net *net = dev_net(dev);
70ceb4f5
YH
934 struct route_info *rinfo = (struct route_info *) opt;
935 struct in6_addr prefix_buf, *prefix;
5eb902b8 936 struct fib6_table *table;
70ceb4f5 937 unsigned int pref;
4bed72e4 938 unsigned long lifetime;
8d1c802b 939 struct fib6_info *rt;
70ceb4f5
YH
940
941 if (len < sizeof(struct route_info)) {
942 return -EINVAL;
943 }
944
945 /* Sanity check for prefix_len and length */
946 if (rinfo->length > 3) {
947 return -EINVAL;
948 } else if (rinfo->prefix_len > 128) {
949 return -EINVAL;
950 } else if (rinfo->prefix_len > 64) {
951 if (rinfo->length < 2) {
952 return -EINVAL;
953 }
954 } else if (rinfo->prefix_len > 0) {
955 if (rinfo->length < 1) {
956 return -EINVAL;
957 }
958 }
959
960 pref = rinfo->route_pref;
961 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 962 return -EINVAL;
70ceb4f5 963
4bed72e4 964 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
965
966 if (rinfo->length == 3)
967 prefix = (struct in6_addr *)rinfo->prefix;
968 else {
969 /* this function is safe */
970 ipv6_addr_prefix(&prefix_buf,
971 (struct in6_addr *)rinfo->prefix,
972 rinfo->prefix_len);
973 prefix = &prefix_buf;
974 }
975
f104a567 976 if (rinfo->prefix_len == 0)
afb1d4b5 977 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
978 else
979 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 980 gwaddr, dev);
70ceb4f5
YH
981
982 if (rt && !lifetime) {
11dd74b3 983 ip6_del_rt(net, rt, false);
70ceb4f5
YH
984 rt = NULL;
985 }
986
987 if (!rt && lifetime)
830218c1
DA
988 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
989 dev, pref);
70ceb4f5 990 else if (rt)
93c2fb25
DA
991 rt->fib6_flags = RTF_ROUTEINFO |
992 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
993
994 if (rt) {
5eb902b8
KFL
995 table = rt->fib6_table;
996 spin_lock_bh(&table->tb6_lock);
997
998 if (!addrconf_finite_timeout(lifetime)) {
14895687 999 fib6_clean_expires(rt);
5eb902b8
KFL
1000 fib6_remove_gc_list(rt);
1001 } else {
14895687 1002 fib6_set_expires(rt, jiffies + HZ * lifetime);
5eb902b8
KFL
1003 fib6_add_gc_list(rt);
1004 }
1005
1006 spin_unlock_bh(&table->tb6_lock);
1716a961 1007
93531c67 1008 fib6_info_release(rt);
70ceb4f5
YH
1009 }
1010 return 0;
1011}
1012#endif
1013
ae90d867
DA
1014/*
1015 * Misc support functions
1016 */
1017
1018/* called with rcu_lock held */
0d161581 1019static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
ae90d867 1020{
0d161581 1021 struct net_device *dev = res->nh->fib_nh_dev;
ae90d867 1022
7d21fec9 1023 if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
1024 /* for copies of local routes, dst->dev needs to be the
1025 * device if it is a master device, the master device if
1026 * device is enslaved, and the loopback as the default
1027 */
1028 if (netif_is_l3_slave(dev) &&
7d21fec9 1029 !rt6_need_strict(&res->f6i->fib6_dst.addr))
ae90d867
DA
1030 dev = l3mdev_master_dev_rcu(dev);
1031 else if (!netif_is_l3_master(dev))
1032 dev = dev_net(dev)->loopback_dev;
1033 /* last case is netif_is_l3_master(dev) is true in which
1034 * case we want dev returned to be dev
1035 */
1036 }
1037
1038 return dev;
1039}
1040
6edb3c96
DA
1041static const int fib6_prop[RTN_MAX + 1] = {
1042 [RTN_UNSPEC] = 0,
1043 [RTN_UNICAST] = 0,
1044 [RTN_LOCAL] = 0,
1045 [RTN_BROADCAST] = 0,
1046 [RTN_ANYCAST] = 0,
1047 [RTN_MULTICAST] = 0,
1048 [RTN_BLACKHOLE] = -EINVAL,
1049 [RTN_UNREACHABLE] = -EHOSTUNREACH,
1050 [RTN_PROHIBIT] = -EACCES,
1051 [RTN_THROW] = -EAGAIN,
1052 [RTN_NAT] = -EINVAL,
1053 [RTN_XRESOLVE] = -EINVAL,
1054};
1055
1056static int ip6_rt_type_to_error(u8 fib6_type)
1057{
1058 return fib6_prop[fib6_type];
1059}
1060
8d1c802b 1061static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
1062{
1063 unsigned short flags = 0;
1064
1065 if (rt->dst_nocount)
1066 flags |= DST_NOCOUNT;
1067 if (rt->dst_nopolicy)
1068 flags |= DST_NOPOLICY;
3b6761d1
DA
1069
1070 return flags;
1071}
1072
7d21fec9 1073static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
6edb3c96 1074{
7d21fec9 1075 rt->dst.error = ip6_rt_type_to_error(fib6_type);
6edb3c96 1076
7d21fec9 1077 switch (fib6_type) {
6edb3c96
DA
1078 case RTN_BLACKHOLE:
1079 rt->dst.output = dst_discard_out;
1080 rt->dst.input = dst_discard;
1081 break;
1082 case RTN_PROHIBIT:
1083 rt->dst.output = ip6_pkt_prohibit_out;
1084 rt->dst.input = ip6_pkt_prohibit;
1085 break;
1086 case RTN_THROW:
1087 case RTN_UNREACHABLE:
1088 default:
1089 rt->dst.output = ip6_pkt_discard_out;
1090 rt->dst.input = ip6_pkt_discard;
1091 break;
1092 }
1093}
1094
0d161581 1095static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
6edb3c96 1096{
7d21fec9 1097 struct fib6_info *f6i = res->f6i;
0d161581 1098
7d21fec9
DA
1099 if (res->fib6_flags & RTF_REJECT) {
1100 ip6_rt_init_dst_reject(rt, res->fib6_type);
6edb3c96
DA
1101 return;
1102 }
1103
1104 rt->dst.error = 0;
1105 rt->dst.output = ip6_output;
1106
7d21fec9 1107 if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
6edb3c96 1108 rt->dst.input = ip6_input;
7d21fec9 1109 } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
1110 rt->dst.input = ip6_mc_input;
1111 } else {
1112 rt->dst.input = ip6_forward;
1113 }
1114
0d161581
DA
1115 if (res->nh->fib_nh_lws) {
1116 rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
6edb3c96
DA
1117 lwtunnel_set_redirect(&rt->dst);
1118 }
1119
1120 rt->dst.lastuse = jiffies;
1121}
1122
e873e4b9 1123/* Caller must already hold reference to @from */
8d1c802b 1124static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 1125{
ae90d867 1126 rt->rt6i_flags &= ~RTF_EXPIRES;
a68886a6 1127 rcu_assign_pointer(rt->from, from);
e1255ed4 1128 ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
ae90d867
DA
1129}
1130
0d161581
DA
1131/* Caller must already hold reference to f6i in result */
1132static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
ae90d867 1133{
0d161581
DA
1134 const struct fib6_nh *nh = res->nh;
1135 const struct net_device *dev = nh->fib_nh_dev;
1136 struct fib6_info *f6i = res->f6i;
dcd1f572 1137
0d161581 1138 ip6_rt_init_dst(rt, res);
6edb3c96 1139
0d161581 1140 rt->rt6i_dst = f6i->fib6_dst;
dcd1f572 1141 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
7d21fec9 1142 rt->rt6i_flags = res->fib6_flags;
0d161581
DA
1143 if (nh->fib_nh_gw_family) {
1144 rt->rt6i_gateway = nh->fib_nh_gw6;
2b2450ca
DA
1145 rt->rt6i_flags |= RTF_GATEWAY;
1146 }
0d161581 1147 rt6_set_from(rt, f6i);
ae90d867 1148#ifdef CONFIG_IPV6_SUBTREES
0d161581 1149 rt->rt6i_src = f6i->fib6_src;
ae90d867 1150#endif
ae90d867
DA
1151}
1152
a3c00e46
MKL
1153static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1154 struct in6_addr *saddr)
1155{
66f5d6ce 1156 struct fib6_node *pn, *sn;
a3c00e46
MKL
1157 while (1) {
1158 if (fn->fn_flags & RTN_TL_ROOT)
1159 return NULL;
66f5d6ce
WW
1160 pn = rcu_dereference(fn->parent);
1161 sn = FIB6_SUBTREE(pn);
1162 if (sn && sn != fn)
6454743b 1163 fn = fib6_node_lookup(sn, NULL, saddr);
a3c00e46
MKL
1164 else
1165 fn = pn;
1166 if (fn->fn_flags & RTN_RTINFO)
1167 return fn;
1168 }
1169}
c71099ac 1170
10585b43 1171static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
d3843fe5
WW
1172{
1173 struct rt6_info *rt = *prt;
1174
1175 if (dst_hold_safe(&rt->dst))
1176 return true;
10585b43 1177 if (net) {
d3843fe5
WW
1178 rt = net->ipv6.ip6_null_entry;
1179 dst_hold(&rt->dst);
1180 } else {
1181 rt = NULL;
1182 }
1183 *prt = rt;
1184 return false;
1185}
1186
dec9b0e2 1187/* called with rcu_lock held */
9b6b35ab 1188static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
dec9b0e2 1189{
9b6b35ab
DA
1190 struct net_device *dev = res->nh->fib_nh_dev;
1191 struct fib6_info *f6i = res->f6i;
1192 unsigned short flags;
dec9b0e2
DA
1193 struct rt6_info *nrt;
1194
9b6b35ab 1195 if (!fib6_info_hold_safe(f6i))
1c87e79a 1196 goto fallback;
e873e4b9 1197
9b6b35ab 1198 flags = fib6_info_dst_flags(f6i);
93531c67 1199 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1c87e79a 1200 if (!nrt) {
9b6b35ab 1201 fib6_info_release(f6i);
1c87e79a
XL
1202 goto fallback;
1203 }
dec9b0e2 1204
0d161581 1205 ip6_rt_copy_init(nrt, res);
1c87e79a
XL
1206 return nrt;
1207
1208fallback:
1209 nrt = dev_net(dev)->ipv6.ip6_null_entry;
1210 dst_hold(&nrt->dst);
dec9b0e2
DA
1211 return nrt;
1212}
1213
55cced4f 1214INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
8ed67789 1215 struct fib6_table *table,
b75cc8f9
DA
1216 struct flowi6 *fl6,
1217 const struct sk_buff *skb,
1218 int flags)
1da177e4 1219{
b1d40991 1220 struct fib6_result res = {};
1da177e4 1221 struct fib6_node *fn;
23fb93a4 1222 struct rt6_info *rt;
1da177e4 1223
66f5d6ce 1224 rcu_read_lock();
6454743b 1225 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1226restart:
b1d40991
DA
1227 res.f6i = rcu_dereference(fn->leaf);
1228 if (!res.f6i)
1229 res.f6i = net->ipv6.fib6_null_entry;
af52a52c 1230 else
75ef7389
DA
1231 rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
1232 flags);
af52a52c 1233
b1d40991 1234 if (res.f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1235 fn = fib6_backtrack(fn, &fl6->saddr);
1236 if (fn)
1237 goto restart;
2b760fcf 1238
af52a52c
DA
1239 rt = net->ipv6.ip6_null_entry;
1240 dst_hold(&rt->dst);
1241 goto out;
f88d8ea6
DA
1242 } else if (res.fib6_flags & RTF_REJECT) {
1243 goto do_create;
af52a52c 1244 }
d3843fe5 1245
b1d40991
DA
1246 fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
1247 fl6->flowi6_oif != 0, skb, flags);
1248
2b760fcf 1249 /* Search through exception table */
7e4b5128 1250 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
23fb93a4 1251 if (rt) {
10585b43 1252 if (ip6_hold_safe(net, &rt))
dec9b0e2 1253 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1254 } else {
f88d8ea6 1255do_create:
9b6b35ab 1256 rt = ip6_create_rt_rcu(&res);
dec9b0e2 1257 }
b811580d 1258
af52a52c 1259out:
8ff2e5b2 1260 trace_fib6_table_lookup(net, &res, table, fl6);
af52a52c 1261
66f5d6ce 1262 rcu_read_unlock();
b811580d 1263
c71099ac 1264 return rt;
c71099ac
TG
1265}
1266
67ba4152 1267struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1268 const struct sk_buff *skb, int flags)
ea6e574e 1269{
b75cc8f9 1270 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1271}
1272EXPORT_SYMBOL_GPL(ip6_route_lookup);
1273
9acd9f3a 1274struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1275 const struct in6_addr *saddr, int oif,
1276 const struct sk_buff *skb, int strict)
c71099ac 1277{
4c9483b2
DM
1278 struct flowi6 fl6 = {
1279 .flowi6_oif = oif,
1280 .daddr = *daddr,
c71099ac
TG
1281 };
1282 struct dst_entry *dst;
77d16f45 1283 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1284
adaa70bb 1285 if (saddr) {
4c9483b2 1286 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1287 flags |= RT6_LOOKUP_F_HAS_SADDR;
1288 }
1289
b75cc8f9 1290 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac 1291 if (dst->error == 0)
e8dfd42c 1292 return dst_rt6_info(dst);
c71099ac
TG
1293
1294 dst_release(dst);
1295
1da177e4
LT
1296 return NULL;
1297}
7159039a
YH
1298EXPORT_SYMBOL(rt6_lookup);
1299
c71099ac 1300/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1301 * It takes new route entry, the addition fails by any reason the
1302 * route is released.
1303 * Caller must hold dst before calling it.
1da177e4
LT
1304 */
1305
8d1c802b 1306static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1307 struct netlink_ext_ack *extack)
1da177e4
LT
1308{
1309 int err;
c71099ac 1310 struct fib6_table *table;
1da177e4 1311
93c2fb25 1312 table = rt->fib6_table;
66f5d6ce 1313 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1314 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1315 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1316
1317 return err;
1318}
1319
8d1c802b 1320int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1321{
afb1d4b5 1322 struct nl_info info = { .nl_net = net, };
e715b6d3 1323
d4ead6b3 1324 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1325}
1326
85bd05de 1327static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
8b9df265
MKL
1328 const struct in6_addr *daddr,
1329 const struct in6_addr *saddr)
1da177e4 1330{
85bd05de 1331 struct fib6_info *f6i = res->f6i;
4832c30d 1332 struct net_device *dev;
1da177e4
LT
1333 struct rt6_info *rt;
1334
1335 /*
1336 * Clone the route.
1337 */
1338
85bd05de 1339 if (!fib6_info_hold_safe(f6i))
e873e4b9
WW
1340 return NULL;
1341
0d161581 1342 dev = ip6_rt_get_dev_rcu(res);
93531c67 1343 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
e873e4b9 1344 if (!rt) {
85bd05de 1345 fib6_info_release(f6i);
83a09abd 1346 return NULL;
e873e4b9 1347 }
83a09abd 1348
0d161581 1349 ip6_rt_copy_init(rt, res);
83a09abd 1350 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1351 rt->rt6i_dst.addr = *daddr;
1352 rt->rt6i_dst.plen = 128;
1da177e4 1353
85bd05de
DA
1354 if (!rt6_is_gw_or_nonexthop(res)) {
1355 if (f6i->fib6_dst.plen != 128 &&
1356 ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
83a09abd 1357 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1358#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1359 if (rt->rt6i_src.plen && saddr) {
1360 rt->rt6i_src.addr = *saddr;
1361 rt->rt6i_src.plen = 128;
8b9df265 1362 }
83a09abd 1363#endif
95a9a5ba 1364 }
1da177e4 1365
95a9a5ba
YH
1366 return rt;
1367}
1da177e4 1368
db3fedee 1369static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
d52d3997 1370{
db3fedee
DA
1371 struct fib6_info *f6i = res->f6i;
1372 unsigned short flags = fib6_info_dst_flags(f6i);
4832c30d 1373 struct net_device *dev;
d52d3997
MKL
1374 struct rt6_info *pcpu_rt;
1375
db3fedee 1376 if (!fib6_info_hold_safe(f6i))
e873e4b9
WW
1377 return NULL;
1378
4832c30d 1379 rcu_read_lock();
0d161581 1380 dev = ip6_rt_get_dev_rcu(res);
d8882935 1381 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags | DST_NOCOUNT);
4832c30d 1382 rcu_read_unlock();
e873e4b9 1383 if (!pcpu_rt) {
db3fedee 1384 fib6_info_release(f6i);
d52d3997 1385 return NULL;
e873e4b9 1386 }
0d161581 1387 ip6_rt_copy_init(pcpu_rt, res);
d52d3997 1388 pcpu_rt->rt6i_flags |= RTF_PCPU;
8f34e53b
DA
1389
1390 if (f6i->nh)
1391 pcpu_rt->sernum = rt_genid_ipv6(dev_net(dev));
1392
d52d3997
MKL
1393 return pcpu_rt;
1394}
1395
8f34e53b
DA
1396static bool rt6_is_valid(const struct rt6_info *rt6)
1397{
1398 return rt6->sernum == rt_genid_ipv6(dev_net(rt6->dst.dev));
1399}
1400
66f5d6ce 1401/* It should be called with rcu_read_lock() acquired */
db3fedee 1402static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
d52d3997 1403{
c353071a 1404 struct rt6_info *pcpu_rt;
d52d3997 1405
c353071a 1406 pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
d52d3997 1407
8f34e53b
DA
1408 if (pcpu_rt && pcpu_rt->sernum && !rt6_is_valid(pcpu_rt)) {
1409 struct rt6_info *prev, **p;
1410
1411 p = this_cpu_ptr(res->nh->rt6i_pcpu);
b01e1c03 1412 /* Paired with READ_ONCE() in __fib6_drop_pcpu_from() */
8f34e53b
DA
1413 prev = xchg(p, NULL);
1414 if (prev) {
1415 dst_dev_put(&prev->dst);
1416 dst_release(&prev->dst);
1417 }
1418
1419 pcpu_rt = NULL;
1420 }
1421
a73e4195
MKL
1422 return pcpu_rt;
1423}
1424
afb1d4b5 1425static struct rt6_info *rt6_make_pcpu_route(struct net *net,
db3fedee 1426 const struct fib6_result *res)
a73e4195
MKL
1427{
1428 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997 1429
db3fedee 1430 pcpu_rt = ip6_rt_pcpu_alloc(res);
0e09edcc
WW
1431 if (!pcpu_rt)
1432 return NULL;
d52d3997 1433
f40b6ae2 1434 p = this_cpu_ptr(res->nh->rt6i_pcpu);
a94b9367 1435 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1436 BUG_ON(prev);
a94b9367 1437
61fb0d01
ED
1438 if (res->f6i->fib6_destroying) {
1439 struct fib6_info *from;
1440
1441 from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
1442 fib6_info_release(from);
1443 }
1444
d52d3997
MKL
1445 return pcpu_rt;
1446}
1447
35732d01
WW
1448/* exception hash table implementation
1449 */
1450static DEFINE_SPINLOCK(rt6_exception_lock);
1451
1452/* Remove rt6_ex from hash table and free the memory
1453 * Caller must hold rt6_exception_lock
1454 */
1455static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1456 struct rt6_exception *rt6_ex)
1457{
f5b51fe8 1458 struct fib6_info *from;
b2427e67 1459 struct net *net;
81eb8447 1460
35732d01
WW
1461 if (!bucket || !rt6_ex)
1462 return;
b2427e67
CIK
1463
1464 net = dev_net(rt6_ex->rt6i->dst.dev);
f5b51fe8
PA
1465 net->ipv6.rt6_stats->fib_rt_cache--;
1466
1467 /* purge completely the exception to allow releasing the held resources:
1468 * some [sk] cache may keep the dst around for unlimited time
1469 */
0e233874 1470 from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
f5b51fe8
PA
1471 fib6_info_release(from);
1472 dst_dev_put(&rt6_ex->rt6i->dst);
1473
35732d01 1474 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1475 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1476 kfree_rcu(rt6_ex, rcu);
1477 WARN_ON_ONCE(!bucket->depth);
1478 bucket->depth--;
1479}
1480
1481/* Remove oldest rt6_ex in bucket and free the memory
1482 * Caller must hold rt6_exception_lock
1483 */
1484static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1485{
1486 struct rt6_exception *rt6_ex, *oldest = NULL;
1487
1488 if (!bucket)
1489 return;
1490
1491 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1492 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1493 oldest = rt6_ex;
1494 }
1495 rt6_remove_exception(bucket, oldest);
1496}
1497
1498static u32 rt6_exception_hash(const struct in6_addr *dst,
1499 const struct in6_addr *src)
1500{
49ecc2e9 1501 static siphash_aligned_key_t rt6_exception_key;
4785305c
ED
1502 struct {
1503 struct in6_addr dst;
1504 struct in6_addr src;
1505 } __aligned(SIPHASH_ALIGNMENT) combined = {
1506 .dst = *dst,
1507 };
1508 u64 val;
35732d01 1509
4785305c 1510 net_get_random_once(&rt6_exception_key, sizeof(rt6_exception_key));
35732d01
WW
1511
1512#ifdef CONFIG_IPV6_SUBTREES
1513 if (src)
4785305c 1514 combined.src = *src;
35732d01 1515#endif
4785305c
ED
1516 val = siphash(&combined, sizeof(combined), &rt6_exception_key);
1517
1518 return hash_64(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
35732d01
WW
1519}
1520
1521/* Helper function to find the cached rt in the hash table
1522 * and update bucket pointer to point to the bucket for this
1523 * (daddr, saddr) pair
1524 * Caller must hold rt6_exception_lock
1525 */
1526static struct rt6_exception *
1527__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1528 const struct in6_addr *daddr,
1529 const struct in6_addr *saddr)
1530{
1531 struct rt6_exception *rt6_ex;
1532 u32 hval;
1533
1534 if (!(*bucket) || !daddr)
1535 return NULL;
1536
1537 hval = rt6_exception_hash(daddr, saddr);
1538 *bucket += hval;
1539
1540 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1541 struct rt6_info *rt6 = rt6_ex->rt6i;
1542 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1543
1544#ifdef CONFIG_IPV6_SUBTREES
1545 if (matched && saddr)
1546 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1547#endif
1548 if (matched)
1549 return rt6_ex;
1550 }
1551 return NULL;
1552}
1553
1554/* Helper function to find the cached rt in the hash table
1555 * and update bucket pointer to point to the bucket for this
1556 * (daddr, saddr) pair
1557 * Caller must hold rcu_read_lock()
1558 */
1559static struct rt6_exception *
1560__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1561 const struct in6_addr *daddr,
1562 const struct in6_addr *saddr)
1563{
1564 struct rt6_exception *rt6_ex;
1565 u32 hval;
1566
1567 WARN_ON_ONCE(!rcu_read_lock_held());
1568
1569 if (!(*bucket) || !daddr)
1570 return NULL;
1571
1572 hval = rt6_exception_hash(daddr, saddr);
1573 *bucket += hval;
1574
1575 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1576 struct rt6_info *rt6 = rt6_ex->rt6i;
1577 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1578
1579#ifdef CONFIG_IPV6_SUBTREES
1580 if (matched && saddr)
1581 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1582#endif
1583 if (matched)
1584 return rt6_ex;
1585 }
1586 return NULL;
1587}
1588
b748f260 1589static unsigned int fib6_mtu(const struct fib6_result *res)
d4ead6b3 1590{
b748f260 1591 const struct fib6_nh *nh = res->nh;
d4ead6b3
DA
1592 unsigned int mtu;
1593
b748f260
DA
1594 if (res->f6i->fib6_pmtu) {
1595 mtu = res->f6i->fib6_pmtu;
dcd1f572 1596 } else {
b748f260 1597 struct net_device *dev = nh->fib_nh_dev;
dcd1f572
DA
1598 struct inet6_dev *idev;
1599
1600 rcu_read_lock();
1601 idev = __in6_dev_get(dev);
e7135f48 1602 mtu = READ_ONCE(idev->cnf.mtu6);
dcd1f572
DA
1603 rcu_read_unlock();
1604 }
1605
d4ead6b3
DA
1606 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1607
b748f260 1608 return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
d4ead6b3
DA
1609}
1610
cc5c073a
DA
1611#define FIB6_EXCEPTION_BUCKET_FLUSHED 0x1UL
1612
1613/* used when the flushed bit is not relevant, only access to the bucket
1614 * (ie., all bucket users except rt6_insert_exception);
1615 *
1616 * called under rcu lock; sometimes called with rt6_exception_lock held
1617 */
1618static
1619struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh,
1620 spinlock_t *lock)
1621{
1622 struct rt6_exception_bucket *bucket;
1623
1624 if (lock)
1625 bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1626 lockdep_is_held(lock));
1627 else
1628 bucket = rcu_dereference(nh->rt6i_exception_bucket);
1629
1630 /* remove bucket flushed bit if set */
1631 if (bucket) {
1632 unsigned long p = (unsigned long)bucket;
1633
1634 p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED;
1635 bucket = (struct rt6_exception_bucket *)p;
1636 }
1637
1638 return bucket;
1639}
1640
1641static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket)
1642{
1643 unsigned long p = (unsigned long)bucket;
1644
1645 return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED);
1646}
1647
1648/* called with rt6_exception_lock held */
1649static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh,
1650 spinlock_t *lock)
1651{
1652 struct rt6_exception_bucket *bucket;
1653 unsigned long p;
1654
1655 bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1656 lockdep_is_held(lock));
1657
1658 p = (unsigned long)bucket;
1659 p |= FIB6_EXCEPTION_BUCKET_FLUSHED;
1660 bucket = (struct rt6_exception_bucket *)p;
1661 rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1662}
1663
35732d01 1664static int rt6_insert_exception(struct rt6_info *nrt,
5012f0a5 1665 const struct fib6_result *res)
35732d01 1666{
5e670d84 1667 struct net *net = dev_net(nrt->dst.dev);
35732d01 1668 struct rt6_exception_bucket *bucket;
cc5c073a 1669 struct fib6_info *f6i = res->f6i;
35732d01
WW
1670 struct in6_addr *src_key = NULL;
1671 struct rt6_exception *rt6_ex;
cc5c073a 1672 struct fib6_nh *nh = res->nh;
a00df2ca 1673 int max_depth;
35732d01
WW
1674 int err = 0;
1675
35732d01
WW
1676 spin_lock_bh(&rt6_exception_lock);
1677
cc5c073a
DA
1678 bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1679 lockdep_is_held(&rt6_exception_lock));
35732d01
WW
1680 if (!bucket) {
1681 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1682 GFP_ATOMIC);
1683 if (!bucket) {
1684 err = -ENOMEM;
1685 goto out;
1686 }
cc5c073a
DA
1687 rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1688 } else if (fib6_nh_excptn_bucket_flushed(bucket)) {
1689 err = -EINVAL;
1690 goto out;
35732d01
WW
1691 }
1692
1693#ifdef CONFIG_IPV6_SUBTREES
5012f0a5 1694 /* fib6_src.plen != 0 indicates f6i is in subtree
35732d01 1695 * and exception table is indexed by a hash of
5012f0a5 1696 * both fib6_dst and fib6_src.
35732d01 1697 * Otherwise, the exception table is indexed by
5012f0a5 1698 * a hash of only fib6_dst.
35732d01 1699 */
5012f0a5 1700 if (f6i->fib6_src.plen)
35732d01
WW
1701 src_key = &nrt->rt6i_src.addr;
1702#endif
5012f0a5 1703 /* rt6_mtu_change() might lower mtu on f6i.
f5bbe7ee 1704 * Only insert this exception route if its mtu
5012f0a5 1705 * is less than f6i's mtu value.
f5bbe7ee 1706 */
b748f260 1707 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
f5bbe7ee
WW
1708 err = -EINVAL;
1709 goto out;
1710 }
60006a48 1711
35732d01
WW
1712 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1713 src_key);
1714 if (rt6_ex)
1715 rt6_remove_exception(bucket, rt6_ex);
1716
1717 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1718 if (!rt6_ex) {
1719 err = -ENOMEM;
1720 goto out;
1721 }
1722 rt6_ex->rt6i = nrt;
1723 rt6_ex->stamp = jiffies;
35732d01
WW
1724 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1725 bucket->depth++;
81eb8447 1726 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01 1727
a00df2ca 1728 /* Randomize max depth to avoid some side channels attacks. */
8032bf12 1729 max_depth = FIB6_MAX_DEPTH + get_random_u32_below(FIB6_MAX_DEPTH);
a00df2ca 1730 while (bucket->depth > max_depth)
35732d01
WW
1731 rt6_exception_remove_oldest(bucket);
1732
1733out:
1734 spin_unlock_bh(&rt6_exception_lock);
1735
1736 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1737 if (!err) {
5012f0a5
DA
1738 spin_lock_bh(&f6i->fib6_table->tb6_lock);
1739 fib6_update_sernum(net, f6i);
1740 spin_unlock_bh(&f6i->fib6_table->tb6_lock);
b886d5f2
PA
1741 fib6_force_start_gc(net);
1742 }
35732d01
WW
1743
1744 return err;
1745}
1746
c0b220cf 1747static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
35732d01
WW
1748{
1749 struct rt6_exception_bucket *bucket;
1750 struct rt6_exception *rt6_ex;
1751 struct hlist_node *tmp;
1752 int i;
1753
1754 spin_lock_bh(&rt6_exception_lock);
35732d01 1755
cc5c073a 1756 bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
35732d01
WW
1757 if (!bucket)
1758 goto out;
1759
cc5c073a
DA
1760 /* Prevent rt6_insert_exception() to recreate the bucket list */
1761 if (!from)
1762 fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock);
1763
35732d01 1764 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
cc5c073a
DA
1765 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
1766 if (!from ||
1767 rcu_access_pointer(rt6_ex->rt6i->from) == from)
1768 rt6_remove_exception(bucket, rt6_ex);
1769 }
1770 WARN_ON_ONCE(!from && bucket->depth);
35732d01
WW
1771 bucket++;
1772 }
35732d01
WW
1773out:
1774 spin_unlock_bh(&rt6_exception_lock);
1775}
1776
e659ba31
DA
1777static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)
1778{
1779 struct fib6_info *f6i = arg;
1780
1781 fib6_nh_flush_exceptions(nh, f6i);
1782
1783 return 0;
1784}
1785
c0b220cf
DA
1786void rt6_flush_exceptions(struct fib6_info *f6i)
1787{
e659ba31
DA
1788 if (f6i->nh)
1789 nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions,
1790 f6i);
1791 else
1792 fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
c0b220cf
DA
1793}
1794
35732d01
WW
1795/* Find cached rt in the hash table inside passed in rt
1796 * Caller has to hold rcu_read_lock()
1797 */
7e4b5128 1798static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
510e2ced
WW
1799 const struct in6_addr *daddr,
1800 const struct in6_addr *saddr)
35732d01 1801{
510e2ced 1802 const struct in6_addr *src_key = NULL;
35732d01 1803 struct rt6_exception_bucket *bucket;
35732d01 1804 struct rt6_exception *rt6_ex;
7e4b5128 1805 struct rt6_info *ret = NULL;
35732d01 1806
35732d01 1807#ifdef CONFIG_IPV6_SUBTREES
7e4b5128 1808 /* fib6i_src.plen != 0 indicates f6i is in subtree
35732d01 1809 * and exception table is indexed by a hash of
7e4b5128 1810 * both fib6_dst and fib6_src.
510e2ced
WW
1811 * However, the src addr used to create the hash
1812 * might not be exactly the passed in saddr which
1813 * is a /128 addr from the flow.
1814 * So we need to use f6i->fib6_src to redo lookup
1815 * if the passed in saddr does not find anything.
1816 * (See the logic in ip6_rt_cache_alloc() on how
1817 * rt->rt6i_src is updated.)
35732d01 1818 */
7e4b5128 1819 if (res->f6i->fib6_src.plen)
35732d01 1820 src_key = saddr;
510e2ced 1821find_ex:
35732d01 1822#endif
cc5c073a 1823 bucket = fib6_nh_get_excptn_bucket(res->nh, NULL);
35732d01
WW
1824 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1825
1826 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
7e4b5128 1827 ret = rt6_ex->rt6i;
35732d01 1828
510e2ced
WW
1829#ifdef CONFIG_IPV6_SUBTREES
1830 /* Use fib6_src as src_key and redo lookup */
1831 if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
1832 src_key = &res->f6i->fib6_src.addr;
1833 goto find_ex;
1834 }
1835#endif
1836
7e4b5128 1837 return ret;
35732d01
WW
1838}
1839
1840/* Remove the passed in cached rt from the hash table that contains it */
cc5c073a 1841static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
c0b220cf 1842 const struct rt6_info *rt)
35732d01 1843{
c0b220cf 1844 const struct in6_addr *src_key = NULL;
35732d01 1845 struct rt6_exception_bucket *bucket;
35732d01
WW
1846 struct rt6_exception *rt6_ex;
1847 int err;
1848
cc5c073a 1849 if (!rcu_access_pointer(nh->rt6i_exception_bucket))
35732d01
WW
1850 return -ENOENT;
1851
1852 spin_lock_bh(&rt6_exception_lock);
cc5c073a
DA
1853 bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
1854
35732d01 1855#ifdef CONFIG_IPV6_SUBTREES
cc5c073a
DA
1856 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1857 * and exception table is indexed by a hash of
1858 * both rt6i_dst and rt6i_src.
35732d01
WW
1859 * Otherwise, the exception table is indexed by
1860 * a hash of only rt6i_dst.
1861 */
c0b220cf 1862 if (plen)
35732d01
WW
1863 src_key = &rt->rt6i_src.addr;
1864#endif
1865 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1866 &rt->rt6i_dst.addr,
1867 src_key);
1868 if (rt6_ex) {
1869 rt6_remove_exception(bucket, rt6_ex);
1870 err = 0;
1871 } else {
1872 err = -ENOENT;
1873 }
1874
1875 spin_unlock_bh(&rt6_exception_lock);
1876 return err;
1877}
1878
e659ba31
DA
1879struct fib6_nh_excptn_arg {
1880 struct rt6_info *rt;
1881 int plen;
1882};
1883
1884static int rt6_nh_remove_exception_rt(struct fib6_nh *nh, void *_arg)
1885{
1886 struct fib6_nh_excptn_arg *arg = _arg;
1887 int err;
1888
1889 err = fib6_nh_remove_exception(nh, arg->plen, arg->rt);
1890 if (err == 0)
1891 return 1;
1892
1893 return 0;
1894}
1895
c0b220cf
DA
1896static int rt6_remove_exception_rt(struct rt6_info *rt)
1897{
1898 struct fib6_info *from;
1899
1900 from = rcu_dereference(rt->from);
cc5c073a 1901 if (!from || !(rt->rt6i_flags & RTF_CACHE))
c0b220cf
DA
1902 return -EINVAL;
1903
e659ba31
DA
1904 if (from->nh) {
1905 struct fib6_nh_excptn_arg arg = {
1906 .rt = rt,
1907 .plen = from->fib6_src.plen
1908 };
1909 int rc;
1910
1911 /* rc = 1 means an entry was found */
1912 rc = nexthop_for_each_fib6_nh(from->nh,
1913 rt6_nh_remove_exception_rt,
1914 &arg);
1915 return rc ? 0 : -ENOENT;
1916 }
1917
1cf844c7 1918 return fib6_nh_remove_exception(from->fib6_nh,
cc5c073a 1919 from->fib6_src.plen, rt);
c0b220cf
DA
1920}
1921
35732d01
WW
1922/* Find rt6_ex which contains the passed in rt cache and
1923 * refresh its stamp
1924 */
cc5c073a 1925static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
c0b220cf 1926 const struct rt6_info *rt)
35732d01 1927{
c0b220cf 1928 const struct in6_addr *src_key = NULL;
35732d01 1929 struct rt6_exception_bucket *bucket;
35732d01 1930 struct rt6_exception *rt6_ex;
193f3685 1931
cc5c073a 1932 bucket = fib6_nh_get_excptn_bucket(nh, NULL);
35732d01 1933#ifdef CONFIG_IPV6_SUBTREES
cc5c073a
DA
1934 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1935 * and exception table is indexed by a hash of
1936 * both rt6i_dst and rt6i_src.
35732d01
WW
1937 * Otherwise, the exception table is indexed by
1938 * a hash of only rt6i_dst.
1939 */
c0b220cf 1940 if (plen)
35732d01
WW
1941 src_key = &rt->rt6i_src.addr;
1942#endif
cc5c073a 1943 rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
35732d01
WW
1944 if (rt6_ex)
1945 rt6_ex->stamp = jiffies;
c0b220cf
DA
1946}
1947
e659ba31
DA
1948struct fib6_nh_match_arg {
1949 const struct net_device *dev;
1950 const struct in6_addr *gw;
1951 struct fib6_nh *match;
1952};
1953
1954/* determine if fib6_nh has given device and gateway */
1955static int fib6_nh_find_match(struct fib6_nh *nh, void *_arg)
1956{
1957 struct fib6_nh_match_arg *arg = _arg;
1958
1959 if (arg->dev != nh->fib_nh_dev ||
1960 (arg->gw && !nh->fib_nh_gw_family) ||
1961 (!arg->gw && nh->fib_nh_gw_family) ||
1962 (arg->gw && !ipv6_addr_equal(arg->gw, &nh->fib_nh_gw6)))
1963 return 0;
1964
1965 arg->match = nh;
1966
1967 /* found a match, break the loop */
1968 return 1;
1969}
1970
c0b220cf
DA
1971static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1972{
1973 struct fib6_info *from;
e659ba31 1974 struct fib6_nh *fib6_nh;
35732d01 1975
c0b220cf
DA
1976 rcu_read_lock();
1977
1978 from = rcu_dereference(rt->from);
1979 if (!from || !(rt->rt6i_flags & RTF_CACHE))
1980 goto unlock;
1981
e659ba31
DA
1982 if (from->nh) {
1983 struct fib6_nh_match_arg arg = {
1984 .dev = rt->dst.dev,
1985 .gw = &rt->rt6i_gateway,
1986 };
1987
1988 nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg);
1989
1990 if (!arg.match)
cff6a327 1991 goto unlock;
e659ba31
DA
1992 fib6_nh = arg.match;
1993 } else {
1994 fib6_nh = from->fib6_nh;
1995 }
1996 fib6_nh_update_exception(fib6_nh, from->fib6_src.plen, rt);
193f3685 1997unlock:
35732d01
WW
1998 rcu_read_unlock();
1999}
2000
e9fa1495
SB
2001static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
2002 struct rt6_info *rt, int mtu)
2003{
2004 /* If the new MTU is lower than the route PMTU, this new MTU will be the
2005 * lowest MTU in the path: always allow updating the route PMTU to
2006 * reflect PMTU decreases.
2007 *
2008 * If the new MTU is higher, and the route PMTU is equal to the local
2009 * MTU, this means the old MTU is the lowest in the path, so allow
2010 * updating it: if other nodes now have lower MTUs, PMTU discovery will
2011 * handle this.
2012 */
2013
2014 if (dst_mtu(&rt->dst) >= mtu)
2015 return true;
2016
2017 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
2018 return true;
2019
2020 return false;
2021}
2022
2023static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
cc5c073a 2024 const struct fib6_nh *nh, int mtu)
f5bbe7ee
WW
2025{
2026 struct rt6_exception_bucket *bucket;
2027 struct rt6_exception *rt6_ex;
2028 int i;
2029
cc5c073a 2030 bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
e9fa1495
SB
2031 if (!bucket)
2032 return;
2033
2034 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2035 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
2036 struct rt6_info *entry = rt6_ex->rt6i;
2037
2038 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 2039 * route), the metrics of its rt->from have already
e9fa1495
SB
2040 * been updated.
2041 */
d4ead6b3 2042 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 2043 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 2044 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 2045 }
e9fa1495 2046 bucket++;
f5bbe7ee
WW
2047 }
2048}
2049
b16cb459
WW
2050#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2051
cc5c073a
DA
2052static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh,
2053 const struct in6_addr *gateway)
b16cb459
WW
2054{
2055 struct rt6_exception_bucket *bucket;
2056 struct rt6_exception *rt6_ex;
2057 struct hlist_node *tmp;
2058 int i;
2059
cc5c073a 2060 if (!rcu_access_pointer(nh->rt6i_exception_bucket))
b16cb459
WW
2061 return;
2062
2063 spin_lock_bh(&rt6_exception_lock);
cc5c073a 2064 bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
b16cb459
WW
2065 if (bucket) {
2066 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2067 hlist_for_each_entry_safe(rt6_ex, tmp,
2068 &bucket->chain, hlist) {
2069 struct rt6_info *entry = rt6_ex->rt6i;
2070
2071 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
2072 RTF_CACHE_GATEWAY &&
2073 ipv6_addr_equal(gateway,
2074 &entry->rt6i_gateway)) {
2075 rt6_remove_exception(bucket, rt6_ex);
2076 }
2077 }
2078 bucket++;
2079 }
2080 }
2081
2082 spin_unlock_bh(&rt6_exception_lock);
2083}
2084
c757faa8
WW
2085static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
2086 struct rt6_exception *rt6_ex,
2087 struct fib6_gc_args *gc_args,
2088 unsigned long now)
2089{
2090 struct rt6_info *rt = rt6_ex->rt6i;
2091
1859bac0
PA
2092 /* we are pruning and obsoleting aged-out and non gateway exceptions
2093 * even if others have still references to them, so that on next
2094 * dst_check() such references can be dropped.
2095 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
2096 * expired, independently from their aging, as per RFC 8201 section 4
2097 */
31afeb42
WW
2098 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
2099 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
20df28fb 2100 pr_debug("aging clone %p\n", rt);
31afeb42
WW
2101 rt6_remove_exception(bucket, rt6_ex);
2102 return;
2103 }
2104 } else if (time_after(jiffies, rt->dst.expires)) {
20df28fb 2105 pr_debug("purging expired route %p\n", rt);
c757faa8
WW
2106 rt6_remove_exception(bucket, rt6_ex);
2107 return;
31afeb42
WW
2108 }
2109
2110 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8 2111 struct neighbour *neigh;
c757faa8 2112
1bfa26ff 2113 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1bfa26ff 2114
b7a320c3 2115 if (!(neigh && (neigh->flags & NTF_ROUTER))) {
20df28fb
BL
2116 pr_debug("purging route %p via non-router but gateway\n",
2117 rt);
c757faa8
WW
2118 rt6_remove_exception(bucket, rt6_ex);
2119 return;
2120 }
2121 }
31afeb42 2122
c757faa8
WW
2123 gc_args->more++;
2124}
2125
cc5c073a 2126static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
c0b220cf
DA
2127 struct fib6_gc_args *gc_args,
2128 unsigned long now)
c757faa8
WW
2129{
2130 struct rt6_exception_bucket *bucket;
2131 struct rt6_exception *rt6_ex;
2132 struct hlist_node *tmp;
2133 int i;
2134
cc5c073a 2135 if (!rcu_access_pointer(nh->rt6i_exception_bucket))
c757faa8
WW
2136 return;
2137
1bfa26ff
ED
2138 rcu_read_lock_bh();
2139 spin_lock(&rt6_exception_lock);
cc5c073a 2140 bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
c757faa8
WW
2141 if (bucket) {
2142 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2143 hlist_for_each_entry_safe(rt6_ex, tmp,
2144 &bucket->chain, hlist) {
2145 rt6_age_examine_exception(bucket, rt6_ex,
2146 gc_args, now);
2147 }
2148 bucket++;
2149 }
2150 }
1bfa26ff
ED
2151 spin_unlock(&rt6_exception_lock);
2152 rcu_read_unlock_bh();
c757faa8
WW
2153}
2154
e659ba31
DA
2155struct fib6_nh_age_excptn_arg {
2156 struct fib6_gc_args *gc_args;
2157 unsigned long now;
2158};
2159
2160static int rt6_nh_age_exceptions(struct fib6_nh *nh, void *_arg)
2161{
2162 struct fib6_nh_age_excptn_arg *arg = _arg;
2163
2164 fib6_nh_age_exceptions(nh, arg->gc_args, arg->now);
2165 return 0;
2166}
2167
cc5c073a 2168void rt6_age_exceptions(struct fib6_info *f6i,
c0b220cf
DA
2169 struct fib6_gc_args *gc_args,
2170 unsigned long now)
2171{
e659ba31
DA
2172 if (f6i->nh) {
2173 struct fib6_nh_age_excptn_arg arg = {
2174 .gc_args = gc_args,
2175 .now = now
2176 };
2177
2178 nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_age_exceptions,
2179 &arg);
2180 } else {
2181 fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
2182 }
c0b220cf
DA
2183}
2184
1d053da9 2185/* must be called with rcu lock held */
effda4dd
DA
2186int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
2187 struct flowi6 *fl6, struct fib6_result *res, int strict)
1da177e4 2188{
367efcb9 2189 struct fib6_node *fn, *saved_fn;
1da177e4 2190
6454743b 2191 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 2192 saved_fn = fn;
1da177e4 2193
a3c00e46 2194redo_rt6_select:
effda4dd
DA
2195 rt6_select(net, fn, oif, res, strict);
2196 if (res->f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2197 fn = fib6_backtrack(fn, &fl6->saddr);
2198 if (fn)
2199 goto redo_rt6_select;
367efcb9
MKL
2200 else if (strict & RT6_LOOKUP_F_REACHABLE) {
2201 /* also consider unreachable route */
2202 strict &= ~RT6_LOOKUP_F_REACHABLE;
2203 fn = saved_fn;
2204 goto redo_rt6_select;
367efcb9 2205 }
a3c00e46
MKL
2206 }
2207
effda4dd 2208 trace_fib6_table_lookup(net, res, table, fl6);
fb9de91e 2209
effda4dd 2210 return 0;
1d053da9
DA
2211}
2212
2213struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
2214 int oif, struct flowi6 *fl6,
2215 const struct sk_buff *skb, int flags)
2216{
b1d40991 2217 struct fib6_result res = {};
0e09edcc 2218 struct rt6_info *rt = NULL;
1d053da9
DA
2219 int strict = 0;
2220
0e09edcc
WW
2221 WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) &&
2222 !rcu_read_lock_held());
2223
1d053da9
DA
2224 strict |= flags & RT6_LOOKUP_F_IFACE;
2225 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
32f75417 2226 if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0)
1d053da9
DA
2227 strict |= RT6_LOOKUP_F_REACHABLE;
2228
2229 rcu_read_lock();
2230
effda4dd 2231 fib6_table_lookup(net, table, oif, fl6, &res, strict);
0e09edcc
WW
2232 if (res.f6i == net->ipv6.fib6_null_entry)
2233 goto out;
23fb93a4 2234
b1d40991 2235 fib6_select_path(net, &res, fl6, oif, false, skb, strict);
d83009d4 2236
23fb93a4 2237 /*Search through exception table */
7e4b5128 2238 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
23fb93a4 2239 if (rt) {
0e09edcc 2240 goto out;
3da59bd9 2241 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
b1d40991 2242 !res.nh->fib_nh_gw_family)) {
3da59bd9
MKL
2243 /* Create a RTF_CACHE clone which will not be
2244 * owned by the fib6 tree. It is for the special case where
2245 * the daddr in the skb during the neighbor look-up is different
2246 * from the fl6->daddr used to look-up route here.
2247 */
0e09edcc 2248 rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
c71099ac 2249
0e09edcc
WW
2250 if (rt) {
2251 /* 1 refcnt is taken during ip6_rt_cache_alloc().
2252 * As rt6_uncached_list_add() does not consume refcnt,
2253 * this refcnt is always returned to the caller even
2254 * if caller sets RT6_LOOKUP_F_DST_NOREF flag.
1cfb71ee 2255 */
0e09edcc 2256 rt6_uncached_list_add(rt);
0e09edcc 2257 rcu_read_unlock();
b811580d 2258
0e09edcc
WW
2259 return rt;
2260 }
d52d3997
MKL
2261 } else {
2262 /* Get a percpu copy */
951f788a 2263 local_bh_disable();
0e09edcc 2264 rt = rt6_get_pcpu_route(&res);
d52d3997 2265
0e09edcc
WW
2266 if (!rt)
2267 rt = rt6_make_pcpu_route(net, &res);
93531c67 2268
951f788a 2269 local_bh_enable();
d52d3997 2270 }
0e09edcc
WW
2271out:
2272 if (!rt)
2273 rt = net->ipv6.ip6_null_entry;
2274 if (!(flags & RT6_LOOKUP_F_DST_NOREF))
2275 ip6_hold_safe(net, &rt);
2276 rcu_read_unlock();
2277
2278 return rt;
1da177e4 2279}
9ff74384 2280EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 2281
55cced4f 2282INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_input(struct net *net,
b75cc8f9
DA
2283 struct fib6_table *table,
2284 struct flowi6 *fl6,
2285 const struct sk_buff *skb,
2286 int flags)
4acad72d 2287{
b75cc8f9 2288 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
2289}
2290
d409b847
MB
2291struct dst_entry *ip6_route_input_lookup(struct net *net,
2292 struct net_device *dev,
b75cc8f9
DA
2293 struct flowi6 *fl6,
2294 const struct sk_buff *skb,
2295 int flags)
72331bc0
SL
2296{
2297 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
2298 flags |= RT6_LOOKUP_F_IFACE;
2299
b75cc8f9 2300 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 2301}
d409b847 2302EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 2303
23aebdac 2304static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
2305 struct flow_keys *keys,
2306 struct flow_keys *flkeys)
23aebdac
JS
2307{
2308 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
2309 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 2310 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
2311 const struct ipv6hdr *inner_iph;
2312 const struct icmp6hdr *icmph;
2313 struct ipv6hdr _inner_iph;
cea67a2d 2314 struct icmp6hdr _icmph;
23aebdac
JS
2315
2316 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2317 goto out;
2318
cea67a2d
ED
2319 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
2320 sizeof(_icmph), &_icmph);
2321 if (!icmph)
2322 goto out;
2323
54074f1d 2324 if (!icmpv6_is_err(icmph->icmp6_type))
23aebdac
JS
2325 goto out;
2326
2327 inner_iph = skb_header_pointer(skb,
2328 skb_transport_offset(skb) + sizeof(*icmph),
2329 sizeof(_inner_iph), &_inner_iph);
2330 if (!inner_iph)
2331 goto out;
2332
2333 key_iph = inner_iph;
5e5d6fed 2334 _flkeys = NULL;
23aebdac 2335out:
5e5d6fed
RP
2336 if (_flkeys) {
2337 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2338 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2339 keys->tags.flow_label = _flkeys->tags.flow_label;
2340 keys->basic.ip_proto = _flkeys->basic.ip_proto;
2341 } else {
2342 keys->addrs.v6addrs.src = key_iph->saddr;
2343 keys->addrs.v6addrs.dst = key_iph->daddr;
fa1be7e0 2344 keys->tags.flow_label = ip6_flowlabel(key_iph);
5e5d6fed
RP
2345 keys->basic.ip_proto = key_iph->nexthdr;
2346 }
23aebdac
JS
2347}
2348
73c2c5cb
IS
2349static u32 rt6_multipath_custom_hash_outer(const struct net *net,
2350 const struct sk_buff *skb,
2351 bool *p_has_inner)
2352{
2353 u32 hash_fields = ip6_multipath_hash_fields(net);
2354 struct flow_keys keys, hash_keys;
2355
2356 if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
2357 return 0;
2358
2359 memset(&hash_keys, 0, sizeof(hash_keys));
2360 skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
2361
2362 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2363 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
2364 hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
2365 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
2366 hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
2367 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
2368 hash_keys.basic.ip_proto = keys.basic.ip_proto;
2369 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
2370 hash_keys.tags.flow_label = keys.tags.flow_label;
2371 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
2372 hash_keys.ports.src = keys.ports.src;
2373 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
2374 hash_keys.ports.dst = keys.ports.dst;
2375
2376 *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
2377 return flow_hash_from_keys(&hash_keys);
2378}
2379
2380static u32 rt6_multipath_custom_hash_inner(const struct net *net,
2381 const struct sk_buff *skb,
2382 bool has_inner)
2383{
2384 u32 hash_fields = ip6_multipath_hash_fields(net);
2385 struct flow_keys keys, hash_keys;
2386
2387 /* We assume the packet carries an encapsulation, but if none was
2388 * encountered during dissection of the outer flow, then there is no
2389 * point in calling the flow dissector again.
2390 */
2391 if (!has_inner)
2392 return 0;
2393
2394 if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK))
2395 return 0;
2396
2397 memset(&hash_keys, 0, sizeof(hash_keys));
2398 skb_flow_dissect_flow_keys(skb, &keys, 0);
2399
2400 if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION))
2401 return 0;
2402
2403 if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2404 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2405 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
2406 hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
2407 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
2408 hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
2409 } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2410 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2411 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
2412 hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
2413 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
2414 hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
2415 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
2416 hash_keys.tags.flow_label = keys.tags.flow_label;
2417 }
2418
2419 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
2420 hash_keys.basic.ip_proto = keys.basic.ip_proto;
2421 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
2422 hash_keys.ports.src = keys.ports.src;
2423 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
2424 hash_keys.ports.dst = keys.ports.dst;
2425
2426 return flow_hash_from_keys(&hash_keys);
2427}
2428
2429static u32 rt6_multipath_custom_hash_skb(const struct net *net,
2430 const struct sk_buff *skb)
2431{
2432 u32 mhash, mhash_inner;
2433 bool has_inner = true;
2434
2435 mhash = rt6_multipath_custom_hash_outer(net, skb, &has_inner);
2436 mhash_inner = rt6_multipath_custom_hash_inner(net, skb, has_inner);
2437
2438 return jhash_2words(mhash, mhash_inner, 0);
2439}
2440
2441static u32 rt6_multipath_custom_hash_fl6(const struct net *net,
2442 const struct flowi6 *fl6)
2443{
2444 u32 hash_fields = ip6_multipath_hash_fields(net);
2445 struct flow_keys hash_keys;
2446
2447 if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
2448 return 0;
2449
2450 memset(&hash_keys, 0, sizeof(hash_keys));
2451 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2452 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
2453 hash_keys.addrs.v6addrs.src = fl6->saddr;
2454 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
2455 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2456 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
2457 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2458 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
2459 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2460 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
2461 hash_keys.ports.src = fl6->fl6_sport;
2462 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
2463 hash_keys.ports.dst = fl6->fl6_dport;
2464
2465 return flow_hash_from_keys(&hash_keys);
2466}
2467
23aebdac 2468/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
2469u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2470 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
2471{
2472 struct flow_keys hash_keys;
b95b6e07 2473 u32 mhash = 0;
23aebdac 2474
bbfa047a 2475 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
2476 case 0:
2477 memset(&hash_keys, 0, sizeof(hash_keys));
2478 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2479 if (skb) {
2480 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2481 } else {
2482 hash_keys.addrs.v6addrs.src = fl6->saddr;
2483 hash_keys.addrs.v6addrs.dst = fl6->daddr;
fa1be7e0 2484 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
b4bac172
DA
2485 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2486 }
b95b6e07 2487 mhash = flow_hash_from_keys(&hash_keys);
b4bac172
DA
2488 break;
2489 case 1:
2490 if (skb) {
2491 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2492 struct flow_keys keys;
2493
2494 /* short-circuit if we already have L4 hash present */
2495 if (skb->l4_hash)
2496 return skb_get_hash_raw(skb) >> 1;
2497
2498 memset(&hash_keys, 0, sizeof(hash_keys));
2499
13fdb940 2500 if (!flkeys) {
b4bac172
DA
2501 skb_flow_dissect_flow_keys(skb, &keys, flag);
2502 flkeys = &keys;
2503 }
2504 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2505 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2506 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2507 hash_keys.ports.src = flkeys->ports.src;
2508 hash_keys.ports.dst = flkeys->ports.dst;
2509 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2510 } else {
2511 memset(&hash_keys, 0, sizeof(hash_keys));
2512 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2513 hash_keys.addrs.v6addrs.src = fl6->saddr;
2514 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2515 hash_keys.ports.src = fl6->fl6_sport;
2516 hash_keys.ports.dst = fl6->fl6_dport;
2517 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2518 }
b95b6e07 2519 mhash = flow_hash_from_keys(&hash_keys);
b4bac172 2520 break;
d8f74f09
SS
2521 case 2:
2522 memset(&hash_keys, 0, sizeof(hash_keys));
2523 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2524 if (skb) {
2525 struct flow_keys keys;
2526
2527 if (!flkeys) {
2528 skb_flow_dissect_flow_keys(skb, &keys, 0);
2529 flkeys = &keys;
2530 }
2531
2532 /* Inner can be v4 or v6 */
2533 if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2534 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2535 hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
2536 hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
2537 } else if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2538 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2539 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2540 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2541 hash_keys.tags.flow_label = flkeys->tags.flow_label;
2542 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2543 } else {
2544 /* Same as case 0 */
2545 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2546 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2547 }
2548 } else {
2549 /* Same as case 0 */
2550 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2551 hash_keys.addrs.v6addrs.src = fl6->saddr;
2552 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2553 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2554 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2555 }
b95b6e07 2556 mhash = flow_hash_from_keys(&hash_keys);
d8f74f09 2557 break;
73c2c5cb
IS
2558 case 3:
2559 if (skb)
2560 mhash = rt6_multipath_custom_hash_skb(net, skb);
2561 else
2562 mhash = rt6_multipath_custom_hash_fl6(net, fl6);
2563 break;
23aebdac
JS
2564 }
2565
9a2a537a 2566 return mhash >> 1;
23aebdac
JS
2567}
2568
67f415dd 2569/* Called with rcu held */
c71099ac
TG
2570void ip6_route_input(struct sk_buff *skb)
2571{
b71d1d42 2572 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2573 struct net *net = dev_net(skb->dev);
67f415dd 2574 int flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_DST_NOREF;
904af04d 2575 struct ip_tunnel_info *tun_info;
4c9483b2 2576 struct flowi6 fl6 = {
e0d56fdd 2577 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2578 .daddr = iph->daddr,
2579 .saddr = iph->saddr,
6502ca52 2580 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2581 .flowi6_mark = skb->mark,
2582 .flowi6_proto = iph->nexthdr,
c71099ac 2583 };
5e5d6fed 2584 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2585
904af04d 2586 tun_info = skb_tunnel_info(skb);
46fa062a 2587 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2588 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2589
2590 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2591 flkeys = &_flkeys;
2592
23aebdac 2593 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2594 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2595 skb_dst_drop(skb);
67f415dd
WW
2596 skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev,
2597 &fl6, skb, flags));
c71099ac
TG
2598}
2599
55cced4f 2600INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net,
b75cc8f9
DA
2601 struct fib6_table *table,
2602 struct flowi6 *fl6,
2603 const struct sk_buff *skb,
2604 int flags)
1da177e4 2605{
b75cc8f9 2606 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2607}
2608
90317bcd
GN
2609static struct dst_entry *ip6_route_output_flags_noref(struct net *net,
2610 const struct sock *sk,
2611 struct flowi6 *fl6,
2612 int flags)
c71099ac 2613{
d46a9d67 2614 bool any_src;
c71099ac 2615
3ede0bbc
RS
2616 if (ipv6_addr_type(&fl6->daddr) &
2617 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
4c1feac5
DA
2618 struct dst_entry *dst;
2619
7d9e5f42 2620 /* This function does not take refcnt on the dst */
4c1feac5
DA
2621 dst = l3mdev_link_scope_lookup(net, fl6);
2622 if (dst)
2623 return dst;
2624 }
ca254490 2625
1fb9489b 2626 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2627
7d9e5f42 2628 flags |= RT6_LOOKUP_F_DST_NOREF;
d46a9d67 2629 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2630 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2631 (fl6->flowi6_oif && any_src))
77d16f45 2632 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2633
d46a9d67 2634 if (!any_src)
adaa70bb 2635 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1 2636 else if (sk)
fa17a6d8 2637 flags |= rt6_srcprefs2flags(READ_ONCE(inet6_sk(sk)->srcprefs));
adaa70bb 2638
b75cc8f9 2639 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2640}
7d9e5f42
WW
2641
2642struct dst_entry *ip6_route_output_flags(struct net *net,
2643 const struct sock *sk,
2644 struct flowi6 *fl6,
2645 int flags)
2646{
13fdb940
SK
2647 struct dst_entry *dst;
2648 struct rt6_info *rt6;
7d9e5f42 2649
13fdb940
SK
2650 rcu_read_lock();
2651 dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
e8dfd42c 2652 rt6 = dst_rt6_info(dst);
13fdb940 2653 /* For dst cached in uncached_list, refcnt is already taken. */
d288a162 2654 if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) {
13fdb940
SK
2655 dst = &net->ipv6.ip6_null_entry->dst;
2656 dst_hold(dst);
2657 }
2658 rcu_read_unlock();
7d9e5f42 2659
13fdb940 2660 return dst;
7d9e5f42 2661}
6f21c96a 2662EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2663
2774c131 2664struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2665{
e8dfd42c 2666 struct rt6_info *rt, *ort = dst_rt6_info(dst_orig);
1dbe3252 2667 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2668 struct dst_entry *new = NULL;
2669
762c8dc7 2670 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev,
62cf27e5 2671 DST_OBSOLETE_DEAD, 0);
14e50e57 2672 if (rt) {
0a1f5962 2673 rt6_info_init(rt);
81eb8447 2674 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2675
0a1f5962 2676 new = &rt->dst;
14e50e57 2677 new->__use = 1;
352e512c 2678 new->input = dst_discard;
ede2059d 2679 new->output = dst_discard_out;
14e50e57 2680
0a1f5962 2681 dst_copy_metrics(new, &ort->dst);
14e50e57 2682
1dbe3252 2683 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2684 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2685 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2686
2687 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2688#ifdef CONFIG_IPV6_SUBTREES
2689 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2690#endif
14e50e57
DM
2691 }
2692
69ead7af
DM
2693 dst_release(dst_orig);
2694 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2695}
14e50e57 2696
1da177e4
LT
2697/*
2698 * Destination cache support functions
2699 */
2700
8d1c802b 2701static bool fib6_check(struct fib6_info *f6i, u32 cookie)
4b32b5ad 2702{
93531c67
DA
2703 u32 rt_cookie = 0;
2704
8ae86971 2705 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
93531c67
DA
2706 return false;
2707
2708 if (fib6_check_expired(f6i))
2709 return false;
2710
2711 return true;
4b32b5ad
MKL
2712}
2713
a68886a6
DA
2714static struct dst_entry *rt6_check(struct rt6_info *rt,
2715 struct fib6_info *from,
2716 u32 cookie)
3da59bd9 2717{
36143645 2718 u32 rt_cookie = 0;
c5cff856 2719
49d05fe2 2720 if (!from || !fib6_get_cookie_safe(from, &rt_cookie) ||
93531c67 2721 rt_cookie != cookie)
3da59bd9
MKL
2722 return NULL;
2723
2724 if (rt6_check_expired(rt))
2725 return NULL;
2726
2727 return &rt->dst;
2728}
2729
a68886a6
DA
2730static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2731 struct fib6_info *from,
2732 u32 cookie)
3da59bd9 2733{
5973fb1e
MKL
2734 if (!__rt6_check_expired(rt) &&
2735 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2736 fib6_check(from, cookie))
3da59bd9
MKL
2737 return &rt->dst;
2738 else
2739 return NULL;
2740}
2741
bbd807df
BV
2742INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
2743 u32 cookie)
1da177e4 2744{
a87b7dc9 2745 struct dst_entry *dst_ret;
a68886a6 2746 struct fib6_info *from;
1da177e4
LT
2747 struct rt6_info *rt;
2748
e8dfd42c 2749 rt = dst_rt6_info(dst);
a87b7dc9 2750
8f34e53b
DA
2751 if (rt->sernum)
2752 return rt6_is_valid(rt) ? dst : NULL;
2753
a87b7dc9 2754 rcu_read_lock();
1da177e4 2755
6f3118b5
ND
2756 /* All IPV6 dsts are created with ->obsolete set to the value
2757 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2758 * into this function always.
2759 */
e3bc10bd 2760
a68886a6 2761 from = rcu_dereference(rt->from);
4b32b5ad 2762
a68886a6 2763 if (from && (rt->rt6i_flags & RTF_PCPU ||
d288a162 2764 unlikely(!list_empty(&rt->dst.rt_uncached))))
a68886a6 2765 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2766 else
a68886a6 2767 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2768
2769 rcu_read_unlock();
2770
2771 return dst_ret;
1da177e4 2772}
9c97921a 2773EXPORT_INDIRECT_CALLABLE(ip6_dst_check);
1da177e4 2774
92f1655a
ED
2775static void ip6_negative_advice(struct sock *sk,
2776 struct dst_entry *dst)
1da177e4 2777{
e8dfd42c 2778 struct rt6_info *rt = dst_rt6_info(dst);
1da177e4 2779
92f1655a
ED
2780 if (rt->rt6i_flags & RTF_CACHE) {
2781 rcu_read_lock();
2782 if (rt6_check_expired(rt)) {
2783 /* counteract the dst_release() in sk_dst_reset() */
2784 dst_hold(dst);
2785 sk_dst_reset(sk);
2786
2787 rt6_remove_exception_rt(rt);
54c1a859 2788 }
92f1655a
ED
2789 rcu_read_unlock();
2790 return;
1da177e4 2791 }
92f1655a 2792 sk_dst_reset(sk);
1da177e4
LT
2793}
2794
2795static void ip6_link_failure(struct sk_buff *skb)
2796{
2797 struct rt6_info *rt;
2798
3ffe533c 2799 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2800
e8dfd42c 2801 rt = dst_rt6_info(skb_dst(skb));
1da177e4 2802 if (rt) {
8a14e46f 2803 rcu_read_lock();
1eb4f758 2804 if (rt->rt6i_flags & RTF_CACHE) {
761f6026 2805 rt6_remove_exception_rt(rt);
c5cff856 2806 } else {
a68886a6 2807 struct fib6_info *from;
c5cff856
WW
2808 struct fib6_node *fn;
2809
a68886a6
DA
2810 from = rcu_dereference(rt->from);
2811 if (from) {
2812 fn = rcu_dereference(from->fib6_node);
2813 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
aafc2e32 2814 WRITE_ONCE(fn->fn_sernum, -1);
a68886a6 2815 }
1eb4f758 2816 }
8a14e46f 2817 rcu_read_unlock();
1da177e4
LT
2818 }
2819}
2820
6a3e030f
DA
2821static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2822{
a68886a6
DA
2823 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2824 struct fib6_info *from;
2825
2826 rcu_read_lock();
2827 from = rcu_dereference(rt0->from);
2828 if (from)
2829 rt0->dst.expires = from->expires;
2830 rcu_read_unlock();
2831 }
6a3e030f
DA
2832
2833 dst_set_expires(&rt0->dst, timeout);
2834 rt0->rt6i_flags |= RTF_EXPIRES;
2835}
2836
45e4fd26
MKL
2837static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2838{
2839 struct net *net = dev_net(rt->dst.dev);
2840
d4ead6b3 2841 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2842 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2843 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2844}
2845
0d3f6d29
MKL
2846static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2847{
2848 return !(rt->rt6i_flags & RTF_CACHE) &&
1490ed2a 2849 (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
0d3f6d29
MKL
2850}
2851
45e4fd26 2852static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
bd085ef6
HL
2853 const struct ipv6hdr *iph, u32 mtu,
2854 bool confirm_neigh)
1da177e4 2855{
0dec879f 2856 const struct in6_addr *daddr, *saddr;
e8dfd42c 2857 struct rt6_info *rt6 = dst_rt6_info(dst);
1da177e4 2858
09454fd0
2859 /* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
2860 * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
2861 * [see also comment in rt6_mtu_change_route()]
2862 */
19bda36c 2863
0dec879f
JA
2864 if (iph) {
2865 daddr = &iph->daddr;
2866 saddr = &iph->saddr;
2867 } else if (sk) {
2868 daddr = &sk->sk_v6_daddr;
2869 saddr = &inet6_sk(sk)->saddr;
2870 } else {
2871 daddr = NULL;
2872 saddr = NULL;
2873 }
bd085ef6
HL
2874
2875 if (confirm_neigh)
2876 dst_confirm_neigh(dst, daddr);
2877
4a65dff8
GK
2878 if (mtu < IPV6_MIN_MTU)
2879 return;
45e4fd26
MKL
2880 if (mtu >= dst_mtu(dst))
2881 return;
9d289715 2882
0d3f6d29 2883 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2884 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2885 /* update rt6_ex->stamp for cache */
2886 if (rt6->rt6i_flags & RTF_CACHE)
2887 rt6_update_exception_stamp_rt(rt6);
0dec879f 2888 } else if (daddr) {
85bd05de 2889 struct fib6_result res = {};
45e4fd26
MKL
2890 struct rt6_info *nrt6;
2891
4d85cd0c 2892 rcu_read_lock();
85bd05de 2893 res.f6i = rcu_dereference(rt6->from);
43a4b60d
DA
2894 if (!res.f6i)
2895 goto out_unlock;
2896
7d21fec9
DA
2897 res.fib6_flags = res.f6i->fib6_flags;
2898 res.fib6_type = res.f6i->fib6_type;
2899
2d44234b
DA
2900 if (res.f6i->nh) {
2901 struct fib6_nh_match_arg arg = {
2902 .dev = dst->dev,
2903 .gw = &rt6->rt6i_gateway,
2904 };
2905
2906 nexthop_for_each_fib6_nh(res.f6i->nh,
2907 fib6_nh_find_match, &arg);
2908
2909 /* fib6_info uses a nexthop that does not have fib6_nh
2910 * using the dst->dev + gw. Should be impossible.
2911 */
43a4b60d
DA
2912 if (!arg.match)
2913 goto out_unlock;
2d44234b
DA
2914
2915 res.nh = arg.match;
2916 } else {
2917 res.nh = res.f6i->fib6_nh;
2918 }
2919
85bd05de 2920 nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
45e4fd26
MKL
2921 if (nrt6) {
2922 rt6_do_update_pmtu(nrt6, mtu);
5012f0a5 2923 if (rt6_insert_exception(nrt6, &res))
2b760fcf 2924 dst_release_immediate(&nrt6->dst);
45e4fd26 2925 }
43a4b60d 2926out_unlock:
a68886a6 2927 rcu_read_unlock();
1da177e4
LT
2928 }
2929}
2930
45e4fd26 2931static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
bd085ef6
HL
2932 struct sk_buff *skb, u32 mtu,
2933 bool confirm_neigh)
45e4fd26 2934{
bd085ef6
HL
2935 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
2936 confirm_neigh);
45e4fd26
MKL
2937}
2938
42ae66c8 2939void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2940 int oif, u32 mark, kuid_t uid)
81aded24
DM
2941{
2942 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2943 struct dst_entry *dst;
dc92095d
2944 struct flowi6 fl6 = {
2945 .flowi6_oif = oif,
2946 .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2947 .daddr = iph->daddr,
2948 .saddr = iph->saddr,
2949 .flowlabel = ip6_flowinfo(iph),
2950 .flowi6_uid = uid,
2951 };
81aded24
DM
2952
2953 dst = ip6_route_output(net, NULL, &fl6);
2954 if (!dst->error)
bd085ef6 2955 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
81aded24
DM
2956 dst_release(dst);
2957}
2958EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2959
2960void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2961{
7ddacfa5 2962 int oif = sk->sk_bound_dev_if;
33c162a9
MKL
2963 struct dst_entry *dst;
2964
7ddacfa5
DA
2965 if (!oif && skb->dev)
2966 oif = l3mdev_master_ifindex(skb->dev);
2967
3c5b4d69
ED
2968 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark),
2969 sk->sk_uid);
33c162a9
MKL
2970
2971 dst = __sk_dst_get(sk);
2972 if (!dst || !dst->obsolete ||
2973 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2974 return;
2975
2976 bh_lock_sock(sk);
2977 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2978 ip6_datagram_dst_update(sk, false);
2979 bh_unlock_sock(sk);
81aded24
DM
2980}
2981EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2982
7d6850f7
AK
2983void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2984 const struct flowi6 *fl6)
2985{
2986#ifdef CONFIG_IPV6_SUBTREES
2987 struct ipv6_pinfo *np = inet6_sk(sk);
2988#endif
2989
2990 ip6_dst_store(sk, dst,
2991 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2992 &sk->sk_v6_daddr : NULL,
2993#ifdef CONFIG_IPV6_SUBTREES
2994 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2995 &np->saddr :
2996#endif
2997 NULL);
2998}
2999
9b6b35ab 3000static bool ip6_redirect_nh_match(const struct fib6_result *res,
0b34eb00
DA
3001 struct flowi6 *fl6,
3002 const struct in6_addr *gw,
3003 struct rt6_info **ret)
3004{
9b6b35ab
DA
3005 const struct fib6_nh *nh = res->nh;
3006
0b34eb00
DA
3007 if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
3008 fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
3009 return false;
3010
3011 /* rt_cache's gateway might be different from its 'parent'
3012 * in the case of an ip redirect.
3013 * So we keep searching in the exception table if the gateway
3014 * is different.
3015 */
3016 if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
3017 struct rt6_info *rt_cache;
3018
9b6b35ab 3019 rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
0b34eb00
DA
3020 if (rt_cache &&
3021 ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
3022 *ret = rt_cache;
3023 return true;
3024 }
3025 return false;
3026 }
3027 return true;
3028}
3029
c55c8988
DA
3030struct fib6_nh_rd_arg {
3031 struct fib6_result *res;
3032 struct flowi6 *fl6;
3033 const struct in6_addr *gw;
3034 struct rt6_info **ret;
3035};
3036
3037static int fib6_nh_redirect_match(struct fib6_nh *nh, void *_arg)
3038{
3039 struct fib6_nh_rd_arg *arg = _arg;
3040
3041 arg->res->nh = nh;
3042 return ip6_redirect_nh_match(arg->res, arg->fl6, arg->gw, arg->ret);
3043}
3044
b55b76b2
DJ
3045/* Handle redirects */
3046struct ip6rd_flowi {
3047 struct flowi6 fl6;
3048 struct in6_addr gateway;
3049};
3050
55cced4f 3051INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net,
b55b76b2
DJ
3052 struct fib6_table *table,
3053 struct flowi6 *fl6,
b75cc8f9 3054 const struct sk_buff *skb,
b55b76b2
DJ
3055 int flags)
3056{
3057 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
0b34eb00 3058 struct rt6_info *ret = NULL;
9b6b35ab 3059 struct fib6_result res = {};
c55c8988
DA
3060 struct fib6_nh_rd_arg arg = {
3061 .res = &res,
3062 .fl6 = fl6,
3063 .gw = &rdfl->gateway,
3064 .ret = &ret
3065 };
8d1c802b 3066 struct fib6_info *rt;
b55b76b2
DJ
3067 struct fib6_node *fn;
3068
3069 /* Get the "current" route for this destination and
67c408cf 3070 * check if the redirect has come from appropriate router.
b55b76b2
DJ
3071 *
3072 * RFC 4861 specifies that redirects should only be
3073 * accepted if they come from the nexthop to the target.
3074 * Due to the way the routes are chosen, this notion
3075 * is a bit fuzzy and one might need to check all possible
3076 * routes.
3077 */
3078
66f5d6ce 3079 rcu_read_lock();
6454743b 3080 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
b55b76b2 3081restart:
66f5d6ce 3082 for_each_fib6_node_rt_rcu(fn) {
9b6b35ab 3083 res.f6i = rt;
14895687 3084 if (fib6_check_expired(rt))
b55b76b2 3085 continue;
93c2fb25 3086 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 3087 break;
c55c8988
DA
3088 if (unlikely(rt->nh)) {
3089 if (nexthop_is_blackhole(rt->nh))
3090 continue;
3091 /* on match, res->nh is filled in and potentially ret */
3092 if (nexthop_for_each_fib6_nh(rt->nh,
3093 fib6_nh_redirect_match,
3094 &arg))
3095 goto out;
3096 } else {
3097 res.nh = rt->fib6_nh;
3098 if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway,
3099 &ret))
3100 goto out;
3101 }
b55b76b2
DJ
3102 }
3103
3104 if (!rt)
421842ed 3105 rt = net->ipv6.fib6_null_entry;
93c2fb25 3106 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 3107 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
3108 goto out;
3109 }
3110
421842ed 3111 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
3112 fn = fib6_backtrack(fn, &fl6->saddr);
3113 if (fn)
3114 goto restart;
b55b76b2 3115 }
a3c00e46 3116
9b6b35ab 3117 res.f6i = rt;
1cf844c7 3118 res.nh = rt->fib6_nh;
b0a1ba59 3119out:
7d21fec9 3120 if (ret) {
10585b43 3121 ip6_hold_safe(net, &ret);
7d21fec9
DA
3122 } else {
3123 res.fib6_flags = res.f6i->fib6_flags;
3124 res.fib6_type = res.f6i->fib6_type;
9b6b35ab 3125 ret = ip6_create_rt_rcu(&res);
7d21fec9 3126 }
b55b76b2 3127
66f5d6ce 3128 rcu_read_unlock();
b55b76b2 3129
8ff2e5b2 3130 trace_fib6_table_lookup(net, &res, table, fl6);
23fb93a4 3131 return ret;
b55b76b2
DJ
3132};
3133
3134static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
3135 const struct flowi6 *fl6,
3136 const struct sk_buff *skb,
3137 const struct in6_addr *gateway)
b55b76b2
DJ
3138{
3139 int flags = RT6_LOOKUP_F_HAS_SADDR;
3140 struct ip6rd_flowi rdfl;
3141
3142 rdfl.fl6 = *fl6;
3143 rdfl.gateway = *gateway;
3144
b75cc8f9 3145 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
3146 flags, __ip6_route_redirect);
3147}
3148
e2d118a1
LC
3149void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
3150 kuid_t uid)
3a5ad2ee
DM
3151{
3152 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
3153 struct dst_entry *dst;
1f7f10ac
3154 struct flowi6 fl6 = {
3155 .flowi6_iif = LOOPBACK_IFINDEX,
3156 .flowi6_oif = oif,
3157 .flowi6_mark = mark,
3158 .daddr = iph->daddr,
3159 .saddr = iph->saddr,
3160 .flowlabel = ip6_flowinfo(iph),
3161 .flowi6_uid = uid,
3162 };
3a5ad2ee 3163
b75cc8f9 3164 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 3165 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
3166 dst_release(dst);
3167}
3168EXPORT_SYMBOL_GPL(ip6_redirect);
3169
d456336d 3170void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
c92a59ec
DJ
3171{
3172 const struct ipv6hdr *iph = ipv6_hdr(skb);
3173 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
3174 struct dst_entry *dst;
0b26fb17
3175 struct flowi6 fl6 = {
3176 .flowi6_iif = LOOPBACK_IFINDEX,
3177 .flowi6_oif = oif,
0b26fb17
3178 .daddr = msg->dest,
3179 .saddr = iph->daddr,
3180 .flowi6_uid = sock_net_uid(net, NULL),
3181 };
c92a59ec 3182
b75cc8f9 3183 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 3184 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
3185 dst_release(dst);
3186}
3187
3a5ad2ee
DM
3188void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
3189{
3c5b4d69
ED
3190 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if,
3191 READ_ONCE(sk->sk_mark), sk->sk_uid);
3a5ad2ee
DM
3192}
3193EXPORT_SYMBOL_GPL(ip6_sk_redirect);
3194
0dbaee3b 3195static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 3196{
0dbaee3b
DM
3197 struct net_device *dev = dst->dev;
3198 unsigned int mtu = dst_mtu(dst);
3199 struct net *net = dev_net(dev);
3200
1da177e4
LT
3201 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
3202
5578689a
DL
3203 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
3204 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
3205
3206 /*
1ab1457c
YH
3207 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
3208 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
3209 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
3210 * rely only on pmtu discovery"
3211 */
3212 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
3213 mtu = IPV6_MAXPLEN;
3214 return mtu;
3215}
3216
f67fbeae 3217INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 3218{
427faee1 3219 return ip6_dst_mtu_maybe_forward(dst, false);
d33e4553 3220}
9c97921a 3221EXPORT_INDIRECT_CALLABLE(ip6_mtu);
d33e4553 3222
901731b8
DA
3223/* MTU selection:
3224 * 1. mtu on route is locked - use it
3225 * 2. mtu from nexthop exception
3226 * 3. mtu from egress device
3227 *
3228 * based on ip6_dst_mtu_forward and exception logic of
3229 * rt6_find_cached_rt; called with rcu_read_lock
3230 */
b748f260
DA
3231u32 ip6_mtu_from_fib6(const struct fib6_result *res,
3232 const struct in6_addr *daddr,
3233 const struct in6_addr *saddr)
901731b8 3234{
b748f260
DA
3235 const struct fib6_nh *nh = res->nh;
3236 struct fib6_info *f6i = res->f6i;
901731b8 3237 struct inet6_dev *idev;
510e2ced 3238 struct rt6_info *rt;
901731b8
DA
3239 u32 mtu = 0;
3240
3241 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
3242 mtu = f6i->fib6_pmtu;
3243 if (mtu)
3244 goto out;
3245 }
3246
510e2ced
WW
3247 rt = rt6_find_cached_rt(res, daddr, saddr);
3248 if (unlikely(rt)) {
3249 mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
3250 } else {
b748f260 3251 struct net_device *dev = nh->fib_nh_dev;
901731b8
DA
3252
3253 mtu = IPV6_MIN_MTU;
3254 idev = __in6_dev_get(dev);
e7135f48
ED
3255 if (idev)
3256 mtu = max_t(u32, mtu, READ_ONCE(idev->cnf.mtu6));
901731b8
DA
3257 }
3258
3259 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3260out:
b748f260 3261 return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
901731b8
DA
3262}
3263
3b00944c 3264struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 3265 struct flowi6 *fl6)
1da177e4 3266{
87a11578 3267 struct dst_entry *dst;
1da177e4
LT
3268 struct rt6_info *rt;
3269 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 3270 struct net *net = dev_net(dev);
1da177e4 3271
38308473 3272 if (unlikely(!idev))
122bdf67 3273 return ERR_PTR(-ENODEV);
1da177e4 3274
ad706862 3275 rt = ip6_dst_alloc(net, dev, 0);
38308473 3276 if (unlikely(!rt)) {
1da177e4 3277 in6_dev_put(idev);
87a11578 3278 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
3279 goto out;
3280 }
3281
588753f1 3282 rt->dst.input = ip6_input;
8e2ec639 3283 rt->dst.output = ip6_output;
550bab42 3284 rt->rt6i_gateway = fl6->daddr;
87a11578 3285 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
3286 rt->rt6i_dst.plen = 128;
3287 rt->rt6i_idev = idev;
14edd87d 3288 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 3289
4c981e28 3290 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
3291 * do proper release of the net_device
3292 */
3293 rt6_uncached_list_add(rt);
1da177e4 3294
87a11578
DM
3295 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
3296
1da177e4 3297out:
87a11578 3298 return dst;
1da177e4
LT
3299}
3300
af6d1034 3301static void ip6_dst_gc(struct dst_ops *ops)
1da177e4 3302{
86393e52 3303 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e 3304 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
7019b78e
DL
3305 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
3306 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
3307 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
9cb7c013 3308 unsigned int val;
fc66f95c 3309 int entries;
7019b78e 3310
af6d1034 3311 if (time_after(rt_last_gc + rt_min_interval, jiffies))
1da177e4
LT
3312 goto out;
3313
9cb7c013 3314 fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
fc66f95c
ED
3315 entries = dst_entries_get_slow(ops);
3316 if (entries < ops->gc_thresh)
9cb7c013 3317 atomic_set(&net->ipv6.ip6_rt_gc_expire, rt_gc_timeout >> 1);
1da177e4 3318out:
9cb7c013
ED
3319 val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
3320 atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
1da177e4
LT
3321}
3322
b2c709cc
DA
3323static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg,
3324 const struct in6_addr *gw_addr, u32 tbid,
3325 int flags, struct fib6_result *res)
8c14586f
DA
3326{
3327 struct flowi6 fl6 = {
3328 .flowi6_oif = cfg->fc_ifindex,
3329 .daddr = *gw_addr,
3330 .saddr = cfg->fc_prefsrc,
3331 };
3332 struct fib6_table *table;
b2c709cc 3333 int err;
8c14586f 3334
f4797b33 3335 table = fib6_get_table(net, tbid);
8c14586f 3336 if (!table)
b2c709cc 3337 return -EINVAL;
8c14586f
DA
3338
3339 if (!ipv6_addr_any(&cfg->fc_prefsrc))
3340 flags |= RT6_LOOKUP_F_HAS_SADDR;
3341
f4797b33 3342 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
8c14586f 3343
b2c709cc
DA
3344 err = fib6_table_lookup(net, table, cfg->fc_ifindex, &fl6, res, flags);
3345 if (!err && res->f6i != net->ipv6.fib6_null_entry)
3346 fib6_select_path(net, res, &fl6, cfg->fc_ifindex,
3347 cfg->fc_ifindex != 0, NULL, flags);
8c14586f 3348
b2c709cc 3349 return err;
8c14586f
DA
3350}
3351
fc1e64e1
DA
3352static int ip6_route_check_nh_onlink(struct net *net,
3353 struct fib6_config *cfg,
9fbb704c 3354 const struct net_device *dev,
fc1e64e1
DA
3355 struct netlink_ext_ack *extack)
3356{
b2c709cc 3357 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
fc1e64e1 3358 const struct in6_addr *gw_addr = &cfg->fc_gateway;
b2c709cc 3359 struct fib6_result res = {};
fc1e64e1
DA
3360 int err;
3361
b2c709cc
DA
3362 err = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0, &res);
3363 if (!err && !(res.fib6_flags & RTF_REJECT) &&
3364 /* ignore match if it is the default route */
3365 !ipv6_addr_any(&res.f6i->fib6_dst.addr) &&
3366 (res.fib6_type != RTN_UNICAST || dev != res.nh->fib_nh_dev)) {
3367 NL_SET_ERR_MSG(extack,
3368 "Nexthop has invalid gateway or device mismatch");
3369 err = -EINVAL;
fc1e64e1
DA
3370 }
3371
3372 return err;
3373}
3374
1edce99f
DA
3375static int ip6_route_check_nh(struct net *net,
3376 struct fib6_config *cfg,
3377 struct net_device **_dev,
3515440d 3378 netdevice_tracker *dev_tracker,
1edce99f
DA
3379 struct inet6_dev **idev)
3380{
3381 const struct in6_addr *gw_addr = &cfg->fc_gateway;
3382 struct net_device *dev = _dev ? *_dev : NULL;
b2c709cc
DA
3383 int flags = RT6_LOOKUP_F_IFACE;
3384 struct fib6_result res = {};
1edce99f
DA
3385 int err = -EHOSTUNREACH;
3386
3387 if (cfg->fc_table) {
b2c709cc
DA
3388 err = ip6_nh_lookup_table(net, cfg, gw_addr,
3389 cfg->fc_table, flags, &res);
3390 /* gw_addr can not require a gateway or resolve to a reject
3391 * route. If a device is given, it must match the result.
3392 */
3393 if (err || res.fib6_flags & RTF_REJECT ||
3394 res.nh->fib_nh_gw_family ||
3395 (dev && dev != res.nh->fib_nh_dev))
3396 err = -EHOSTUNREACH;
1edce99f
DA
3397 }
3398
b2c709cc
DA
3399 if (err < 0) {
3400 struct flowi6 fl6 = {
3401 .flowi6_oif = cfg->fc_ifindex,
3402 .daddr = *gw_addr,
3403 };
1edce99f 3404
b2c709cc
DA
3405 err = fib6_lookup(net, cfg->fc_ifindex, &fl6, &res, flags);
3406 if (err || res.fib6_flags & RTF_REJECT ||
3407 res.nh->fib_nh_gw_family)
3408 err = -EHOSTUNREACH;
3409
3410 if (err)
3411 return err;
3412
3413 fib6_select_path(net, &res, &fl6, cfg->fc_ifindex,
3414 cfg->fc_ifindex != 0, NULL, flags);
3415 }
1edce99f 3416
b2c709cc 3417 err = 0;
1edce99f 3418 if (dev) {
b2c709cc
DA
3419 if (dev != res.nh->fib_nh_dev)
3420 err = -EHOSTUNREACH;
1edce99f 3421 } else {
b2c709cc 3422 *_dev = dev = res.nh->fib_nh_dev;
3515440d 3423 netdev_hold(dev, dev_tracker, GFP_ATOMIC);
b2c709cc 3424 *idev = in6_dev_get(dev);
1edce99f
DA
3425 }
3426
1edce99f
DA
3427 return err;
3428}
3429
9fbb704c 3430static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
3515440d
ED
3431 struct net_device **_dev,
3432 netdevice_tracker *dev_tracker,
3433 struct inet6_dev **idev,
9fbb704c
DA
3434 struct netlink_ext_ack *extack)
3435{
3436 const struct in6_addr *gw_addr = &cfg->fc_gateway;
3437 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 3438 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 3439 const struct net_device *dev = *_dev;
232378e8 3440 bool need_addr_check = !dev;
9fbb704c
DA
3441 int err = -EINVAL;
3442
3443 /* if gw_addr is local we will fail to detect this in case
3444 * address is still TENTATIVE (DAD in progress). rt6_lookup()
3445 * will return already-added prefix route via interface that
3446 * prefix route was assigned to, which might be non-loopback.
3447 */
232378e8
DA
3448 if (dev &&
3449 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3450 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
3451 goto out;
3452 }
3453
3454 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
3455 /* IPv6 strictly inhibits using not link-local
3456 * addresses as nexthop address.
3457 * Otherwise, router will not able to send redirects.
3458 * It is very good, but in some (rare!) circumstances
3459 * (SIT, PtP, NBMA NOARP links) it is handy to allow
3460 * some exceptions. --ANK
3461 * We allow IPv4-mapped nexthops to support RFC4798-type
3462 * addressing
3463 */
3464 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
3465 NL_SET_ERR_MSG(extack, "Invalid gateway address");
3466 goto out;
3467 }
3468
b2c709cc
DA
3469 rcu_read_lock();
3470
9fbb704c
DA
3471 if (cfg->fc_flags & RTNH_F_ONLINK)
3472 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
3473 else
3515440d
ED
3474 err = ip6_route_check_nh(net, cfg, _dev, dev_tracker,
3475 idev);
9fbb704c 3476
b2c709cc
DA
3477 rcu_read_unlock();
3478
9fbb704c
DA
3479 if (err)
3480 goto out;
3481 }
3482
3483 /* reload in case device was changed */
3484 dev = *_dev;
3485
3486 err = -EINVAL;
3487 if (!dev) {
3488 NL_SET_ERR_MSG(extack, "Egress device not specified");
3489 goto out;
3490 } else if (dev->flags & IFF_LOOPBACK) {
3491 NL_SET_ERR_MSG(extack,
3492 "Egress device can not be loopback device for this route");
3493 goto out;
3494 }
232378e8
DA
3495
3496 /* if we did not check gw_addr above, do so now that the
3497 * egress device has been resolved.
3498 */
3499 if (need_addr_check &&
3500 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3501 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
3502 goto out;
3503 }
3504
9fbb704c
DA
3505 err = 0;
3506out:
3507 return err;
3508}
3509
83c44251
DA
3510static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
3511{
3512 if ((flags & RTF_REJECT) ||
3513 (dev && (dev->flags & IFF_LOOPBACK) &&
3514 !(addr_type & IPV6_ADDR_LOOPBACK) &&
aea23c32 3515 !(flags & (RTF_ANYCAST | RTF_LOCAL))))
83c44251
DA
3516 return true;
3517
3518 return false;
3519}
3520
3521int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
3522 struct fib6_config *cfg, gfp_t gfp_flags,
3523 struct netlink_ext_ack *extack)
3524{
70f7457a 3525 netdevice_tracker *dev_tracker = &fib6_nh->fib_nh_dev_tracker;
83c44251
DA
3526 struct net_device *dev = NULL;
3527 struct inet6_dev *idev = NULL;
3528 int addr_type;
3529 int err;
3530
f1741730 3531 fib6_nh->fib_nh_family = AF_INET6;
1bef4c22
ED
3532#ifdef CONFIG_IPV6_ROUTER_PREF
3533 fib6_nh->last_probe = jiffies;
3534#endif
38428d68
RP
3535 if (cfg->fc_is_fdb) {
3536 fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
3537 fib6_nh->fib_nh_gw_family = AF_INET6;
3538 return 0;
3539 }
f1741730 3540
83c44251
DA
3541 err = -ENODEV;
3542 if (cfg->fc_ifindex) {
70f7457a
JK
3543 dev = netdev_get_by_index(net, cfg->fc_ifindex,
3544 dev_tracker, gfp_flags);
83c44251
DA
3545 if (!dev)
3546 goto out;
3547 idev = in6_dev_get(dev);
3548 if (!idev)
3549 goto out;
3550 }
3551
3552 if (cfg->fc_flags & RTNH_F_ONLINK) {
3553 if (!dev) {
3554 NL_SET_ERR_MSG(extack,
3555 "Nexthop device required for onlink");
3556 goto out;
3557 }
3558
3559 if (!(dev->flags & IFF_UP)) {
3560 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3561 err = -ENETDOWN;
3562 goto out;
3563 }
3564
ad1601ae 3565 fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
83c44251
DA
3566 }
3567
ad1601ae 3568 fib6_nh->fib_nh_weight = 1;
83c44251
DA
3569
3570 /* We cannot add true routes via loopback here,
3571 * they would result in kernel looping; promote them to reject routes
3572 */
3573 addr_type = ipv6_addr_type(&cfg->fc_dst);
3574 if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
3575 /* hold loopback dev/idev if we haven't done so. */
3576 if (dev != net->loopback_dev) {
3577 if (dev) {
70f7457a 3578 netdev_put(dev, dev_tracker);
83c44251
DA
3579 in6_dev_put(idev);
3580 }
3581 dev = net->loopback_dev;
70f7457a 3582 netdev_hold(dev, dev_tracker, gfp_flags);
83c44251
DA
3583 idev = in6_dev_get(dev);
3584 if (!idev) {
3585 err = -ENODEV;
3586 goto out;
3587 }
3588 }
7dd73168 3589 goto pcpu_alloc;
83c44251
DA
3590 }
3591
3592 if (cfg->fc_flags & RTF_GATEWAY) {
3515440d
ED
3593 err = ip6_validate_gw(net, cfg, &dev, dev_tracker,
3594 &idev, extack);
83c44251
DA
3595 if (err)
3596 goto out;
3597
ad1601ae 3598 fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
bdf00467 3599 fib6_nh->fib_nh_gw_family = AF_INET6;
83c44251
DA
3600 }
3601
3602 err = -ENODEV;
3603 if (!dev)
3604 goto out;
3605
3606 if (idev->cnf.disable_ipv6) {
3607 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3608 err = -EACCES;
3609 goto out;
3610 }
3611
3612 if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
3613 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3614 err = -ENETDOWN;
3615 goto out;
3616 }
3617
3618 if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3619 !netif_carrier_ok(dev))
ad1601ae 3620 fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
83c44251 3621
faee6769 3622 err = fib_nh_common_init(net, &fib6_nh->nh_common, cfg->fc_encap,
7dd73168
DA
3623 cfg->fc_encap_type, cfg, gfp_flags, extack);
3624 if (err)
3625 goto out;
3626
3627pcpu_alloc:
f40b6ae2
DA
3628 fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
3629 if (!fib6_nh->rt6i_pcpu) {
3630 err = -ENOMEM;
3631 goto out;
3632 }
3633
ad1601ae 3634 fib6_nh->fib_nh_dev = dev;
f1741730 3635 fib6_nh->fib_nh_oif = dev->ifindex;
83c44251
DA
3636 err = 0;
3637out:
3638 if (idev)
3639 in6_dev_put(idev);
3640
3641 if (err) {
ad1601ae
DA
3642 lwtstate_put(fib6_nh->fib_nh_lws);
3643 fib6_nh->fib_nh_lws = NULL;
70f7457a 3644 netdev_put(dev, dev_tracker);
83c44251
DA
3645 }
3646
3647 return err;
3648}
3649
dac7d0f2
DA
3650void fib6_nh_release(struct fib6_nh *fib6_nh)
3651{
cc5c073a
DA
3652 struct rt6_exception_bucket *bucket;
3653
3654 rcu_read_lock();
3655
3656 fib6_nh_flush_exceptions(fib6_nh, NULL);
3657 bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL);
3658 if (bucket) {
3659 rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL);
3660 kfree(bucket);
3661 }
3662
3663 rcu_read_unlock();
3664
61308050
NA
3665 fib6_nh_release_dsts(fib6_nh);
3666 free_percpu(fib6_nh->rt6i_pcpu);
f40b6ae2 3667
979e276e 3668 fib_nh_common_release(&fib6_nh->nh_common);
dac7d0f2
DA
3669}
3670
8837cbbf
NA
3671void fib6_nh_release_dsts(struct fib6_nh *fib6_nh)
3672{
3673 int cpu;
3674
3675 if (!fib6_nh->rt6i_pcpu)
3676 return;
3677
3678 for_each_possible_cpu(cpu) {
3679 struct rt6_info *pcpu_rt, **ppcpu_rt;
3680
3681 ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
3682 pcpu_rt = xchg(ppcpu_rt, NULL);
3683 if (pcpu_rt) {
3684 dst_dev_put(&pcpu_rt->dst);
3685 dst_release(&pcpu_rt->dst);
3686 }
3687 }
3688}
3689
8d1c802b 3690static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 3691 gfp_t gfp_flags,
333c4301 3692 struct netlink_ext_ack *extack)
1da177e4 3693{
5578689a 3694 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 3695 struct fib6_info *rt = NULL;
f88d8ea6 3696 struct nexthop *nh = NULL;
c71099ac 3697 struct fib6_table *table;
f88d8ea6 3698 struct fib6_nh *fib6_nh;
8c5b83f0 3699 int err = -EINVAL;
83c44251 3700 int addr_type;
1da177e4 3701
557c44be 3702 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
3703 if (cfg->fc_flags & RTF_PCPU) {
3704 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 3705 goto out;
d5d531cb 3706 }
557c44be 3707
2ea2352e
WW
3708 /* RTF_CACHE is an internal flag; can not be set by userspace */
3709 if (cfg->fc_flags & RTF_CACHE) {
3710 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3711 goto out;
3712 }
3713
e8478e80
DA
3714 if (cfg->fc_type > RTN_MAX) {
3715 NL_SET_ERR_MSG(extack, "Invalid route type");
3716 goto out;
3717 }
3718
d5d531cb
DA
3719 if (cfg->fc_dst_len > 128) {
3720 NL_SET_ERR_MSG(extack, "Invalid prefix length");
3721 goto out;
3722 }
3723 if (cfg->fc_src_len > 128) {
3724 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 3725 goto out;
d5d531cb 3726 }
1da177e4 3727#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
3728 if (cfg->fc_src_len) {
3729 NL_SET_ERR_MSG(extack,
3730 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 3731 goto out;
d5d531cb 3732 }
1da177e4 3733#endif
5b98324e
DA
3734 if (cfg->fc_nh_id) {
3735 nh = nexthop_find_by_id(net, cfg->fc_nh_id);
3736 if (!nh) {
3737 NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
3738 goto out;
3739 }
3740 err = fib6_check_nexthop(nh, cfg, extack);
3741 if (err)
3742 goto out;
3743 }
fc1e64e1 3744
d71314b4 3745 err = -ENOBUFS;
38308473
DM
3746 if (cfg->fc_nlinfo.nlh &&
3747 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 3748 table = fib6_get_table(net, cfg->fc_table);
38308473 3749 if (!table) {
f3213831 3750 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
3751 table = fib6_new_table(net, cfg->fc_table);
3752 }
3753 } else {
3754 table = fib6_new_table(net, cfg->fc_table);
3755 }
38308473
DM
3756
3757 if (!table)
c71099ac 3758 goto out;
c71099ac 3759
93531c67 3760 err = -ENOMEM;
f88d8ea6 3761 rt = fib6_info_alloc(gfp_flags, !nh);
93531c67 3762 if (!rt)
1da177e4 3763 goto out;
93531c67 3764
d7e774f3
DA
3765 rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3766 extack);
767a2217
DA
3767 if (IS_ERR(rt->fib6_metrics)) {
3768 err = PTR_ERR(rt->fib6_metrics);
fda21d46
ED
3769 /* Do not leave garbage there. */
3770 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
8fb4792f 3771 goto out_free;
767a2217
DA
3772 }
3773
93531c67
DA
3774 if (cfg->fc_flags & RTF_ADDRCONF)
3775 rt->dst_nocount = true;
1da177e4 3776
1716a961 3777 if (cfg->fc_flags & RTF_EXPIRES)
dade3f6a
DA
3778 fib6_set_expires(rt, jiffies +
3779 clock_t_to_jiffies(cfg->fc_expires));
1da177e4 3780
86872cb5
TG
3781 if (cfg->fc_protocol == RTPROT_UNSPEC)
3782 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 3783 rt->fib6_protocol = cfg->fc_protocol;
86872cb5 3784
83c44251
DA
3785 rt->fib6_table = table;
3786 rt->fib6_metric = cfg->fc_metric;
c7036d97 3787 rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
2b2450ca 3788 rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
19e42e45 3789
93c2fb25
DA
3790 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3791 rt->fib6_dst.plen = cfg->fc_dst_len;
e5fd387a 3792
1da177e4 3793#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
3794 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3795 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4 3796#endif
f88d8ea6 3797 if (nh) {
f88d8ea6 3798 if (rt->fib6_src.plen) {
4daa95af 3799 NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
821bbf79 3800 goto out_free;
f88d8ea6 3801 }
706ec919
XY
3802 if (!nexthop_get(nh)) {
3803 NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
821bbf79 3804 goto out_free;
706ec919 3805 }
f88d8ea6
DA
3806 rt->nh = nh;
3807 fib6_nh = nexthop_fib6_nh(rt->nh);
3808 } else {
3809 err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
3810 if (err)
3811 goto out;
1da177e4 3812
f88d8ea6
DA
3813 fib6_nh = rt->fib6_nh;
3814
3815 /* We cannot add true routes via loopback here, they would
3816 * result in kernel looping; promote them to reject routes
3817 */
3818 addr_type = ipv6_addr_type(&cfg->fc_dst);
3819 if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev,
3820 addr_type))
3821 rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
3822 }
955ec4cb 3823
c3968a85 3824 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
f88d8ea6 3825 struct net_device *dev = fib6_nh->fib_nh_dev;
83c44251 3826
c3968a85 3827 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3828 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3829 err = -EINVAL;
3830 goto out;
3831 }
93c2fb25
DA
3832 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3833 rt->fib6_prefsrc.plen = 128;
c3968a85 3834 } else
93c2fb25 3835 rt->fib6_prefsrc.plen = 0;
c3968a85 3836
8c5b83f0 3837 return rt;
6b9ea5a6 3838out:
93531c67 3839 fib6_info_release(rt);
8c5b83f0 3840 return ERR_PTR(err);
821bbf79
CL
3841out_free:
3842 ip_fib_metrics_put(rt->fib6_metrics);
3843 kfree(rt);
3844 return ERR_PTR(err);
6b9ea5a6
RP
3845}
3846
acb54e3c 3847int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
333c4301 3848 struct netlink_ext_ack *extack)
6b9ea5a6 3849{
8d1c802b 3850 struct fib6_info *rt;
6b9ea5a6
RP
3851 int err;
3852
acb54e3c 3853 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3854 if (IS_ERR(rt))
3855 return PTR_ERR(rt);
6b9ea5a6 3856
d4ead6b3 3857 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3858 fib6_info_release(rt);
6b9ea5a6 3859
1da177e4
LT
3860 return err;
3861}
3862
8d1c802b 3863static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3864{
afb1d4b5 3865 struct net *net = info->nl_net;
c71099ac 3866 struct fib6_table *table;
afb1d4b5 3867 int err;
1da177e4 3868
421842ed 3869 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3870 err = -ENOENT;
3871 goto out;
3872 }
6c813a72 3873
93c2fb25 3874 table = rt->fib6_table;
66f5d6ce 3875 spin_lock_bh(&table->tb6_lock);
86872cb5 3876 err = fib6_del(rt, info);
66f5d6ce 3877 spin_unlock_bh(&table->tb6_lock);
1da177e4 3878
6825a26c 3879out:
93531c67 3880 fib6_info_release(rt);
1da177e4
LT
3881 return err;
3882}
3883
11dd74b3 3884int ip6_del_rt(struct net *net, struct fib6_info *rt, bool skip_notify)
e0a1ad73 3885{
11dd74b3
RP
3886 struct nl_info info = {
3887 .nl_net = net,
3888 .skip_notify = skip_notify
3889 };
afb1d4b5 3890
528c4ceb 3891 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3892}
3893
8d1c802b 3894static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3895{
3896 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3897 struct net *net = info->nl_net;
16a16cd3 3898 struct sk_buff *skb = NULL;
0ae81335 3899 struct fib6_table *table;
e3330039 3900 int err = -ENOENT;
0ae81335 3901
421842ed 3902 if (rt == net->ipv6.fib6_null_entry)
e3330039 3903 goto out_put;
93c2fb25 3904 table = rt->fib6_table;
66f5d6ce 3905 spin_lock_bh(&table->tb6_lock);
0ae81335 3906
93c2fb25 3907 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3908 struct fib6_info *sibling, *next_sibling;
0284696b 3909 struct fib6_node *fn;
0ae81335 3910
16a16cd3
DA
3911 /* prefer to send a single notification with all hops */
3912 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3913 if (skb) {
3914 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3915
d4ead6b3 3916 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3917 NULL, NULL, 0, RTM_DELROUTE,
3918 info->portid, seq, 0) < 0) {
3919 kfree_skb(skb);
3920 skb = NULL;
3921 } else
3922 info->skip_notify = 1;
3923 }
3924
0284696b
IS
3925 /* 'rt' points to the first sibling route. If it is not the
3926 * leaf, then we do not need to send a notification. Otherwise,
3927 * we need to check if the last sibling has a next route or not
3928 * and emit a replace or delete notification, respectively.
3929 */
2881fd61 3930 info->skip_notify_kernel = 1;
0284696b
IS
3931 fn = rcu_dereference_protected(rt->fib6_node,
3932 lockdep_is_held(&table->tb6_lock));
3933 if (rcu_access_pointer(fn->leaf) == rt) {
3934 struct fib6_info *last_sibling, *replace_rt;
3935
3936 last_sibling = list_last_entry(&rt->fib6_siblings,
3937 struct fib6_info,
3938 fib6_siblings);
3939 replace_rt = rcu_dereference_protected(
3940 last_sibling->fib6_next,
3941 lockdep_is_held(&table->tb6_lock));
3942 if (replace_rt)
3943 call_fib6_entry_notifiers_replace(net,
3944 replace_rt);
3945 else
3946 call_fib6_multipath_entry_notifiers(net,
caafb250 3947 FIB_EVENT_ENTRY_DEL,
0284696b
IS
3948 rt, rt->fib6_nsiblings,
3949 NULL);
3950 }
0ae81335 3951 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3952 &rt->fib6_siblings,
3953 fib6_siblings) {
0ae81335
DA
3954 err = fib6_del(sibling, info);
3955 if (err)
e3330039 3956 goto out_unlock;
0ae81335
DA
3957 }
3958 }
3959
3960 err = fib6_del(rt, info);
e3330039 3961out_unlock:
66f5d6ce 3962 spin_unlock_bh(&table->tb6_lock);
e3330039 3963out_put:
93531c67 3964 fib6_info_release(rt);
16a16cd3
DA
3965
3966 if (skb) {
e3330039 3967 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3968 info->nlh, gfp_any());
3969 }
0ae81335
DA
3970 return err;
3971}
3972
0fa6efc5 3973static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
23fb93a4
DA
3974{
3975 int rc = -ESRCH;
3976
3977 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3978 goto out;
3979
3980 if (cfg->fc_flags & RTF_GATEWAY &&
3981 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3982 goto out;
761f6026
XL
3983
3984 rc = rt6_remove_exception_rt(rt);
23fb93a4
DA
3985out:
3986 return rc;
3987}
3988
0fa6efc5
DA
3989static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
3990 struct fib6_nh *nh)
3991{
3992 struct fib6_result res = {
3993 .f6i = rt,
3994 .nh = nh,
3995 };
3996 struct rt6_info *rt_cache;
3997
3998 rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src);
3999 if (rt_cache)
4000 return __ip6_del_cached_rt(rt_cache, cfg);
4001
4002 return 0;
4003}
4004
5b98324e
DA
4005struct fib6_nh_del_cached_rt_arg {
4006 struct fib6_config *cfg;
4007 struct fib6_info *f6i;
4008};
4009
4010static int fib6_nh_del_cached_rt(struct fib6_nh *nh, void *_arg)
4011{
4012 struct fib6_nh_del_cached_rt_arg *arg = _arg;
4013 int rc;
4014
4015 rc = ip6_del_cached_rt(arg->cfg, arg->f6i, nh);
4016 return rc != -ESRCH ? rc : 0;
4017}
4018
4019static int ip6_del_cached_rt_nh(struct fib6_config *cfg, struct fib6_info *f6i)
4020{
4021 struct fib6_nh_del_cached_rt_arg arg = {
4022 .cfg = cfg,
4023 .f6i = f6i
4024 };
4025
4026 return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_del_cached_rt, &arg);
4027}
4028
333c4301
DA
4029static int ip6_route_del(struct fib6_config *cfg,
4030 struct netlink_ext_ack *extack)
1da177e4 4031{
c71099ac 4032 struct fib6_table *table;
8d1c802b 4033 struct fib6_info *rt;
1da177e4 4034 struct fib6_node *fn;
1da177e4
LT
4035 int err = -ESRCH;
4036
5578689a 4037 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
4038 if (!table) {
4039 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 4040 return err;
d5d531cb 4041 }
c71099ac 4042
66f5d6ce 4043 rcu_read_lock();
1da177e4 4044
c71099ac 4045 fn = fib6_locate(&table->tb6_root,
86872cb5 4046 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 4047 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 4048 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 4049
1da177e4 4050 if (fn) {
66f5d6ce 4051 for_each_fib6_node_rt_rcu(fn) {
ad1601ae
DA
4052 struct fib6_nh *nh;
4053
3401bfb1
SB
4054 if (rt->nh && cfg->fc_nh_id &&
4055 rt->nh->id != cfg->fc_nh_id)
5b98324e 4056 continue;
23fb93a4 4057
5b98324e
DA
4058 if (cfg->fc_flags & RTF_CACHE) {
4059 int rc = 0;
4060
4061 if (rt->nh) {
4062 rc = ip6_del_cached_rt_nh(cfg, rt);
4063 } else if (cfg->fc_nh_id) {
4064 continue;
4065 } else {
4066 nh = rt->fib6_nh;
4067 rc = ip6_del_cached_rt(cfg, rt, nh);
4068 }
0fa6efc5
DA
4069 if (rc != -ESRCH) {
4070 rcu_read_unlock();
4071 return rc;
23fb93a4
DA
4072 }
4073 continue;
2b760fcf 4074 }
ad1601ae 4075
5b98324e
DA
4076 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
4077 continue;
4078 if (cfg->fc_protocol &&
4079 cfg->fc_protocol != rt->fib6_protocol)
4080 continue;
4081
4082 if (rt->nh) {
4083 if (!fib6_info_hold_safe(rt))
4084 continue;
4085 rcu_read_unlock();
4086
4087 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
4088 }
4089 if (cfg->fc_nh_id)
4090 continue;
4091
4092 nh = rt->fib6_nh;
86872cb5 4093 if (cfg->fc_ifindex &&
ad1601ae
DA
4094 (!nh->fib_nh_dev ||
4095 nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 4096 continue;
86872cb5 4097 if (cfg->fc_flags & RTF_GATEWAY &&
ad1601ae 4098 !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
1da177e4 4099 continue;
e873e4b9
WW
4100 if (!fib6_info_hold_safe(rt))
4101 continue;
66f5d6ce 4102 rcu_read_unlock();
1da177e4 4103
0ae81335
DA
4104 /* if gateway was specified only delete the one hop */
4105 if (cfg->fc_flags & RTF_GATEWAY)
4106 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
4107
4108 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
4109 }
4110 }
66f5d6ce 4111 rcu_read_unlock();
1da177e4
LT
4112
4113 return err;
4114}
4115
6700c270 4116static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 4117{
a6279458 4118 struct netevent_redirect netevent;
e8599ff4 4119 struct rt6_info *rt, *nrt = NULL;
85bd05de 4120 struct fib6_result res = {};
e8599ff4
DM
4121 struct ndisc_options ndopts;
4122 struct inet6_dev *in6_dev;
4123 struct neighbour *neigh;
71bcdba0 4124 struct rd_msg *msg;
6e157b6a
DM
4125 int optlen, on_link;
4126 u8 *lladdr;
e8599ff4 4127
29a3cad5 4128 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 4129 optlen -= sizeof(*msg);
e8599ff4
DM
4130
4131 if (optlen < 0) {
6e157b6a 4132 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
4133 return;
4134 }
4135
71bcdba0 4136 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 4137
71bcdba0 4138 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 4139 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
4140 return;
4141 }
4142
6e157b6a 4143 on_link = 0;
71bcdba0 4144 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 4145 on_link = 1;
71bcdba0 4146 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 4147 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 4148 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
4149 return;
4150 }
4151
4152 in6_dev = __in6_dev_get(skb->dev);
4153 if (!in6_dev)
4154 return;
2aba913f
ED
4155 if (READ_ONCE(in6_dev->cnf.forwarding) ||
4156 !READ_ONCE(in6_dev->cnf.accept_redirects))
e8599ff4
DM
4157 return;
4158
4159 /* RFC2461 8.1:
4160 * The IP source address of the Redirect MUST be the same as the current
4161 * first-hop router for the specified ICMP Destination Address.
4162 */
4163
f997c55c 4164 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
4165 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
4166 return;
4167 }
6e157b6a
DM
4168
4169 lladdr = NULL;
e8599ff4
DM
4170 if (ndopts.nd_opts_tgt_lladdr) {
4171 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
4172 skb->dev);
4173 if (!lladdr) {
4174 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
4175 return;
4176 }
4177 }
4178
e8dfd42c 4179 rt = dst_rt6_info(dst);
ec13ad1d 4180 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 4181 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 4182 return;
6e157b6a 4183 }
e8599ff4 4184
6e157b6a
DM
4185 /* Redirect received -> path was valid.
4186 * Look, redirects are sent only in response to data packets,
4187 * so that this nexthop apparently is reachable. --ANK
4188 */
0dec879f 4189 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 4190
71bcdba0 4191 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
4192 if (!neigh)
4193 return;
a6279458 4194
1da177e4
LT
4195 /*
4196 * We have finally decided to accept it.
4197 */
4198
f997c55c 4199 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
4200 NEIGH_UPDATE_F_WEAK_OVERRIDE|
4201 NEIGH_UPDATE_F_OVERRIDE|
4202 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
4203 NEIGH_UPDATE_F_ISROUTER)),
4204 NDISC_REDIRECT, &ndopts);
1da177e4 4205
4d85cd0c 4206 rcu_read_lock();
85bd05de 4207 res.f6i = rcu_dereference(rt->from);
ff24e498 4208 if (!res.f6i)
886b7a50 4209 goto out;
8a14e46f 4210
49d5b8ef
DA
4211 if (res.f6i->nh) {
4212 struct fib6_nh_match_arg arg = {
4213 .dev = dst->dev,
4214 .gw = &rt->rt6i_gateway,
4215 };
4216
4217 nexthop_for_each_fib6_nh(res.f6i->nh,
4218 fib6_nh_find_match, &arg);
4219
4220 /* fib6_info uses a nexthop that does not have fib6_nh
4221 * using the dst->dev. Should be impossible
4222 */
4223 if (!arg.match)
4224 goto out;
4225 res.nh = arg.match;
4226 } else {
4227 res.nh = res.f6i->fib6_nh;
4228 }
4229
7d21fec9
DA
4230 res.fib6_flags = res.f6i->fib6_flags;
4231 res.fib6_type = res.f6i->fib6_type;
85bd05de 4232 nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
38308473 4233 if (!nrt)
1da177e4
LT
4234 goto out;
4235
4236 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
4237 if (on_link)
4238 nrt->rt6i_flags &= ~RTF_GATEWAY;
4239
4e3fd7a0 4240 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 4241
886b7a50 4242 /* rt6_insert_exception() will take care of duplicated exceptions */
5012f0a5 4243 if (rt6_insert_exception(nrt, &res)) {
2b760fcf
WW
4244 dst_release_immediate(&nrt->dst);
4245 goto out;
4246 }
1da177e4 4247
d8d1f30b
CG
4248 netevent.old = &rt->dst;
4249 netevent.new = &nrt->dst;
71bcdba0 4250 netevent.daddr = &msg->dest;
60592833 4251 netevent.neigh = neigh;
8d71740c
TT
4252 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
4253
1da177e4 4254out:
886b7a50 4255 rcu_read_unlock();
e8599ff4 4256 neigh_release(neigh);
6e157b6a
DM
4257}
4258
70ceb4f5 4259#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 4260static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 4261 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
4262 const struct in6_addr *gwaddr,
4263 struct net_device *dev)
70ceb4f5 4264{
830218c1
DA
4265 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
4266 int ifindex = dev->ifindex;
70ceb4f5 4267 struct fib6_node *fn;
8d1c802b 4268 struct fib6_info *rt = NULL;
c71099ac
TG
4269 struct fib6_table *table;
4270
830218c1 4271 table = fib6_get_table(net, tb_id);
38308473 4272 if (!table)
c71099ac 4273 return NULL;
70ceb4f5 4274
66f5d6ce 4275 rcu_read_lock();
38fbeeee 4276 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
4277 if (!fn)
4278 goto out;
4279
66f5d6ce 4280 for_each_fib6_node_rt_rcu(fn) {
f88d8ea6
DA
4281 /* these routes do not use nexthops */
4282 if (rt->nh)
4283 continue;
1cf844c7 4284 if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
70ceb4f5 4285 continue;
2b2450ca 4286 if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
1cf844c7 4287 !rt->fib6_nh->fib_nh_gw_family)
70ceb4f5 4288 continue;
1cf844c7 4289 if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr))
70ceb4f5 4290 continue;
e873e4b9
WW
4291 if (!fib6_info_hold_safe(rt))
4292 continue;
70ceb4f5
YH
4293 break;
4294 }
4295out:
66f5d6ce 4296 rcu_read_unlock();
70ceb4f5
YH
4297 return rt;
4298}
4299
8d1c802b 4300static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 4301 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
4302 const struct in6_addr *gwaddr,
4303 struct net_device *dev,
95c96174 4304 unsigned int pref)
70ceb4f5 4305{
86872cb5 4306 struct fib6_config cfg = {
238fc7ea 4307 .fc_metric = IP6_RT_PRIO_USER,
830218c1 4308 .fc_ifindex = dev->ifindex,
86872cb5
TG
4309 .fc_dst_len = prefixlen,
4310 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
4311 RTF_UP | RTF_PREF(pref),
b91d5329 4312 .fc_protocol = RTPROT_RA,
e8478e80 4313 .fc_type = RTN_UNICAST,
15e47304 4314 .fc_nlinfo.portid = 0,
efa2cea0
DL
4315 .fc_nlinfo.nlh = NULL,
4316 .fc_nlinfo.nl_net = net,
86872cb5
TG
4317 };
4318
91b2c9a0 4319 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
4e3fd7a0
AD
4320 cfg.fc_dst = *prefix;
4321 cfg.fc_gateway = *gwaddr;
70ceb4f5 4322
e317da96
YH
4323 /* We should treat it as a default route if prefix length is 0. */
4324 if (!prefixlen)
86872cb5 4325 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 4326
acb54e3c 4327 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 4328
830218c1 4329 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
4330}
4331#endif
4332
8d1c802b 4333struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
4334 const struct in6_addr *addr,
4335 struct net_device *dev)
1ab1457c 4336{
830218c1 4337 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 4338 struct fib6_info *rt;
c71099ac 4339 struct fib6_table *table;
1da177e4 4340
afb1d4b5 4341 table = fib6_get_table(net, tb_id);
38308473 4342 if (!table)
c71099ac 4343 return NULL;
1da177e4 4344
66f5d6ce
WW
4345 rcu_read_lock();
4346 for_each_fib6_node_rt_rcu(&table->tb6_root) {
f88d8ea6 4347 struct fib6_nh *nh;
ad1601ae 4348
f88d8ea6
DA
4349 /* RA routes do not use nexthops */
4350 if (rt->nh)
4351 continue;
4352
4353 nh = rt->fib6_nh;
ad1601ae 4354 if (dev == nh->fib_nh_dev &&
93c2fb25 4355 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ad1601ae 4356 ipv6_addr_equal(&nh->fib_nh_gw6, addr))
1da177e4
LT
4357 break;
4358 }
e873e4b9
WW
4359 if (rt && !fib6_info_hold_safe(rt))
4360 rt = NULL;
66f5d6ce 4361 rcu_read_unlock();
1da177e4
LT
4362 return rt;
4363}
4364
8d1c802b 4365struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 4366 const struct in6_addr *gwaddr,
ebacaaa0 4367 struct net_device *dev,
6b2e04bc 4368 unsigned int pref,
129e406e
KFL
4369 u32 defrtr_usr_metric,
4370 int lifetime)
1da177e4 4371{
86872cb5 4372 struct fib6_config cfg = {
ca254490 4373 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
6b2e04bc 4374 .fc_metric = defrtr_usr_metric,
86872cb5
TG
4375 .fc_ifindex = dev->ifindex,
4376 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
4377 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 4378 .fc_protocol = RTPROT_RA,
e8478e80 4379 .fc_type = RTN_UNICAST,
15e47304 4380 .fc_nlinfo.portid = 0,
5578689a 4381 .fc_nlinfo.nlh = NULL,
afb1d4b5 4382 .fc_nlinfo.nl_net = net,
129e406e 4383 .fc_expires = jiffies_to_clock_t(lifetime * HZ),
86872cb5 4384 };
1da177e4 4385
4e3fd7a0 4386 cfg.fc_gateway = *gwaddr;
1da177e4 4387
acb54e3c 4388 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
4389 struct fib6_table *table;
4390
4391 table = fib6_get_table(dev_net(dev), cfg.fc_table);
4392 if (table)
4393 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
4394 }
1da177e4 4395
afb1d4b5 4396 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
4397}
4398
afb1d4b5
DA
4399static void __rt6_purge_dflt_routers(struct net *net,
4400 struct fib6_table *table)
1da177e4 4401{
8d1c802b 4402 struct fib6_info *rt;
1da177e4
LT
4403
4404restart:
66f5d6ce
WW
4405 rcu_read_lock();
4406 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
4407 struct net_device *dev = fib6_info_nh_dev(rt);
4408 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
4409
93c2fb25 4410 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
e873e4b9
WW
4411 (!idev || idev->cnf.accept_ra != 2) &&
4412 fib6_info_hold_safe(rt)) {
93531c67 4413 rcu_read_unlock();
11dd74b3 4414 ip6_del_rt(net, rt, false);
1da177e4
LT
4415 goto restart;
4416 }
4417 }
66f5d6ce 4418 rcu_read_unlock();
830218c1
DA
4419
4420 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
4421}
4422
4423void rt6_purge_dflt_routers(struct net *net)
4424{
4425 struct fib6_table *table;
4426 struct hlist_head *head;
4427 unsigned int h;
4428
4429 rcu_read_lock();
4430
4431 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
4432 head = &net->ipv6.fib_table_hash[h];
4433 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
4434 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 4435 __rt6_purge_dflt_routers(net, table);
830218c1
DA
4436 }
4437 }
4438
4439 rcu_read_unlock();
1da177e4
LT
4440}
4441
5578689a
DL
4442static void rtmsg_to_fib6_config(struct net *net,
4443 struct in6_rtmsg *rtmsg,
86872cb5
TG
4444 struct fib6_config *cfg)
4445{
8823a3ac
4446 *cfg = (struct fib6_config){
4447 .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
4448 : RT6_TABLE_MAIN,
4449 .fc_ifindex = rtmsg->rtmsg_ifindex,
bb487272 4450 .fc_metric = rtmsg->rtmsg_metric,
8823a3ac
4451 .fc_expires = rtmsg->rtmsg_info,
4452 .fc_dst_len = rtmsg->rtmsg_dst_len,
4453 .fc_src_len = rtmsg->rtmsg_src_len,
4454 .fc_flags = rtmsg->rtmsg_flags,
4455 .fc_type = rtmsg->rtmsg_type,
4456
4457 .fc_nlinfo.nl_net = net,
4458
4459 .fc_dst = rtmsg->rtmsg_dst,
4460 .fc_src = rtmsg->rtmsg_src,
4461 .fc_gateway = rtmsg->rtmsg_gateway,
4462 };
86872cb5
TG
4463}
4464
7c1552da 4465int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
1da177e4 4466{
86872cb5 4467 struct fib6_config cfg;
1da177e4
LT
4468 int err;
4469
7c1552da
CH
4470 if (cmd != SIOCADDRT && cmd != SIOCDELRT)
4471 return -EINVAL;
4472 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
4473 return -EPERM;
86872cb5 4474
7c1552da 4475 rtmsg_to_fib6_config(net, rtmsg, &cfg);
1da177e4 4476
7c1552da
CH
4477 rtnl_lock();
4478 switch (cmd) {
4479 case SIOCADDRT:
bb487272 4480 /* Only do the default setting of fc_metric in route adding */
4481 if (cfg.fc_metric == 0)
4482 cfg.fc_metric = IP6_RT_PRIO_USER;
7c1552da
CH
4483 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
4484 break;
4485 case SIOCDELRT:
4486 err = ip6_route_del(&cfg, NULL);
4487 break;
3ff50b79 4488 }
7c1552da
CH
4489 rtnl_unlock();
4490 return err;
1da177e4
LT
4491}
4492
4493/*
4494 * Drop the packet on the floor
4495 */
4496
d5fdd6ba 4497static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 4498{
adf30907 4499 struct dst_entry *dst = skb_dst(skb);
1d3fd8a1
SS
4500 struct net *net = dev_net(dst->dev);
4501 struct inet6_dev *idev;
3ae42cc8 4502 SKB_DR(reason);
1d3fd8a1
SS
4503 int type;
4504
1158f79f 4505 if (netif_is_l3_master(skb->dev) ||
1d3fd8a1
SS
4506 dst->dev == net->loopback_dev)
4507 idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
4508 else
4509 idev = ip6_dst_idev(dst);
4510
612f09e8
YH
4511 switch (ipstats_mib_noroutes) {
4512 case IPSTATS_MIB_INNOROUTES:
0660e03f 4513 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 4514 if (type == IPV6_ADDR_ANY) {
3ae42cc8 4515 SKB_DR_SET(reason, IP_INADDRERRORS);
1d3fd8a1 4516 IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
4517 break;
4518 }
3ae42cc8 4519 SKB_DR_SET(reason, IP_INNOROUTES);
a8eceea8 4520 fallthrough;
612f09e8 4521 case IPSTATS_MIB_OUTNOROUTES:
3ae42cc8 4522 SKB_DR_OR(reason, IP_OUTNOROUTES);
1d3fd8a1 4523 IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
612f09e8
YH
4524 break;
4525 }
1d3fd8a1
SS
4526
4527 /* Start over by dropping the dst for l3mdev case */
4528 if (netif_is_l3_master(skb->dev))
4529 skb_dst_drop(skb);
4530
3ffe533c 4531 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
3ae42cc8 4532 kfree_skb_reason(skb, reason);
1da177e4
LT
4533 return 0;
4534}
4535
9ce8ade0
TG
4536static int ip6_pkt_discard(struct sk_buff *skb)
4537{
612f09e8 4538 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
4539}
4540
ede2059d 4541static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 4542{
adf30907 4543 skb->dev = skb_dst(skb)->dev;
612f09e8 4544 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
4545}
4546
9ce8ade0
TG
4547static int ip6_pkt_prohibit(struct sk_buff *skb)
4548{
612f09e8 4549 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
4550}
4551
ede2059d 4552static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 4553{
adf30907 4554 skb->dev = skb_dst(skb)->dev;
612f09e8 4555 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
4556}
4557
1da177e4
LT
4558/*
4559 * Allocate a dst for local (unicast / anycast) address.
4560 */
4561
360a9887
DA
4562struct fib6_info *addrconf_f6i_alloc(struct net *net,
4563 struct inet6_dev *idev,
4564 const struct in6_addr *addr,
7f6c4039
HL
4565 bool anycast, gfp_t gfp_flags,
4566 struct netlink_ext_ack *extack)
1da177e4 4567{
c7a1ce39
DA
4568 struct fib6_config cfg = {
4569 .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
4570 .fc_ifindex = idev->dev->ifindex,
d55a2e37 4571 .fc_flags = RTF_UP | RTF_NONEXTHOP,
c7a1ce39
DA
4572 .fc_dst = *addr,
4573 .fc_dst_len = 128,
4574 .fc_protocol = RTPROT_KERNEL,
4575 .fc_nlinfo.nl_net = net,
4576 .fc_ignore_dev_down = true,
4577 };
d55a2e37 4578 struct fib6_info *f6i;
1da177e4 4579
e8478e80 4580 if (anycast) {
c7a1ce39
DA
4581 cfg.fc_type = RTN_ANYCAST;
4582 cfg.fc_flags |= RTF_ANYCAST;
e8478e80 4583 } else {
c7a1ce39
DA
4584 cfg.fc_type = RTN_LOCAL;
4585 cfg.fc_flags |= RTF_LOCAL;
e8478e80 4586 }
1da177e4 4587
7f6c4039 4588 f6i = ip6_route_info_create(&cfg, gfp_flags, extack);
3b0dc529 4589 if (!IS_ERR(f6i)) {
d55a2e37 4590 f6i->dst_nocount = true;
3b0dc529
ND
4591
4592 if (!anycast &&
624d5aec
ED
4593 (READ_ONCE(net->ipv6.devconf_all->disable_policy) ||
4594 READ_ONCE(idev->cnf.disable_policy)))
3b0dc529
ND
4595 f6i->dst_nopolicy = true;
4596 }
4597
d55a2e37 4598 return f6i;
1da177e4
LT
4599}
4600
c3968a85
DW
4601/* remove deleted ip from prefsrc entries */
4602struct arg_dev_net_ip {
c3968a85
DW
4603 struct net *net;
4604 struct in6_addr *addr;
4605};
4606
8d1c802b 4607static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85 4608{
c3968a85
DW
4609 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
4610 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
4611
f88d8ea6 4612 if (!rt->nh &&
421842ed 4613 rt != net->ipv6.fib6_null_entry &&
b358f57f
HL
4614 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr) &&
4615 !ipv6_chk_addr(net, addr, rt->fib6_nh->fib_nh_dev, 0)) {
60006a48 4616 spin_lock_bh(&rt6_exception_lock);
c3968a85 4617 /* remove prefsrc entry */
93c2fb25 4618 rt->fib6_prefsrc.plen = 0;
60006a48 4619 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
4620 }
4621 return 0;
4622}
4623
4624void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
4625{
4626 struct net *net = dev_net(ifp->idev->dev);
4627 struct arg_dev_net_ip adni = {
c3968a85
DW
4628 .net = net,
4629 .addr = &ifp->addr,
4630 };
0c3584d5 4631 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
4632}
4633
2b2450ca 4634#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT)
be7a010d
DJ
4635
4636/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 4637static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
4638{
4639 struct in6_addr *gateway = (struct in6_addr *)arg;
f88d8ea6
DA
4640 struct fib6_nh *nh;
4641
4642 /* RA routes do not use nexthops */
4643 if (rt->nh)
4644 return 0;
be7a010d 4645
f88d8ea6 4646 nh = rt->fib6_nh;
93c2fb25 4647 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
cc5c073a 4648 nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
be7a010d 4649 return -1;
b16cb459
WW
4650
4651 /* Further clean up cached routes in exception table.
4652 * This is needed because cached route may have a different
4653 * gateway than its 'parent' in the case of an ip redirect.
4654 */
cc5c073a 4655 fib6_nh_exceptions_clean_tohost(nh, gateway);
b16cb459 4656
be7a010d
DJ
4657 return 0;
4658}
4659
4660void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
4661{
4662 fib6_clean_all(net, fib6_clean_tohost, gateway);
4663}
4664
2127d95a
IS
4665struct arg_netdev_event {
4666 const struct net_device *dev;
4c981e28 4667 union {
ecc5663c 4668 unsigned char nh_flags;
4c981e28
IS
4669 unsigned long event;
4670 };
2127d95a
IS
4671};
4672
8d1c802b 4673static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 4674{
8d1c802b 4675 struct fib6_info *iter;
d7dedee1
IS
4676 struct fib6_node *fn;
4677
93c2fb25
DA
4678 fn = rcu_dereference_protected(rt->fib6_node,
4679 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 4680 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 4681 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 4682 while (iter) {
93c2fb25 4683 if (iter->fib6_metric == rt->fib6_metric &&
33bd5ac5 4684 rt6_qualify_for_ecmp(iter))
d7dedee1 4685 return iter;
8fb11a9a 4686 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 4687 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
4688 }
4689
4690 return NULL;
4691}
4692
f88d8ea6 4693/* only called for fib entries with builtin fib6_nh */
8d1c802b 4694static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 4695{
1cf844c7
DA
4696 if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
4697 (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN &&
4698 ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev)))
d7dedee1
IS
4699 return true;
4700
4701 return false;
4702}
4703
8d1c802b 4704static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 4705{
8d1c802b 4706 struct fib6_info *iter;
d7dedee1
IS
4707 int total = 0;
4708
4709 if (!rt6_is_dead(rt))
1cf844c7 4710 total += rt->fib6_nh->fib_nh_weight;
d7dedee1 4711
93c2fb25 4712 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 4713 if (!rt6_is_dead(iter))
1cf844c7 4714 total += iter->fib6_nh->fib_nh_weight;
d7dedee1
IS
4715 }
4716
4717 return total;
4718}
4719
8d1c802b 4720static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
4721{
4722 int upper_bound = -1;
4723
4724 if (!rt6_is_dead(rt)) {
1cf844c7 4725 *weight += rt->fib6_nh->fib_nh_weight;
d7dedee1
IS
4726 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
4727 total) - 1;
4728 }
1cf844c7 4729 atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound);
d7dedee1
IS
4730}
4731
8d1c802b 4732static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 4733{
8d1c802b 4734 struct fib6_info *iter;
d7dedee1
IS
4735 int weight = 0;
4736
4737 rt6_upper_bound_set(rt, &weight, total);
4738
93c2fb25 4739 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
4740 rt6_upper_bound_set(iter, &weight, total);
4741}
4742
8d1c802b 4743void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 4744{
8d1c802b 4745 struct fib6_info *first;
d7dedee1
IS
4746 int total;
4747
4748 /* In case the entire multipath route was marked for flushing,
4749 * then there is no need to rebalance upon the removal of every
4750 * sibling route.
4751 */
93c2fb25 4752 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
4753 return;
4754
4755 /* During lookup routes are evaluated in order, so we need to
4756 * make sure upper bounds are assigned from the first sibling
4757 * onwards.
4758 */
4759 first = rt6_multipath_first_sibling(rt);
4760 if (WARN_ON_ONCE(!first))
4761 return;
4762
4763 total = rt6_multipath_total_weight(first);
4764 rt6_multipath_upper_bound_set(first, total);
4765}
4766
8d1c802b 4767static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
4768{
4769 const struct arg_netdev_event *arg = p_arg;
7aef6859 4770 struct net *net = dev_net(arg->dev);
2127d95a 4771
f88d8ea6 4772 if (rt != net->ipv6.fib6_null_entry && !rt->nh &&
1cf844c7
DA
4773 rt->fib6_nh->fib_nh_dev == arg->dev) {
4774 rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
7aef6859 4775 fib6_update_sernum_upto_root(net, rt);
d7dedee1 4776 rt6_multipath_rebalance(rt);
1de178ed 4777 }
2127d95a
IS
4778
4779 return 0;
4780}
4781
ecc5663c 4782void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
2127d95a
IS
4783{
4784 struct arg_netdev_event arg = {
4785 .dev = dev,
6802f3ad
IS
4786 {
4787 .nh_flags = nh_flags,
4788 },
2127d95a
IS
4789 };
4790
4791 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
4792 arg.nh_flags |= RTNH_F_LINKDOWN;
4793
4794 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
4795}
4796
f88d8ea6 4797/* only called for fib entries with inline fib6_nh */
8d1c802b 4798static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
4799 const struct net_device *dev)
4800{
8d1c802b 4801 struct fib6_info *iter;
1de178ed 4802
1cf844c7 4803 if (rt->fib6_nh->fib_nh_dev == dev)
1de178ed 4804 return true;
93c2fb25 4805 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1cf844c7 4806 if (iter->fib6_nh->fib_nh_dev == dev)
1de178ed
IS
4807 return true;
4808
4809 return false;
4810}
4811
8d1c802b 4812static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 4813{
8d1c802b 4814 struct fib6_info *iter;
1de178ed
IS
4815
4816 rt->should_flush = 1;
93c2fb25 4817 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
4818 iter->should_flush = 1;
4819}
4820
8d1c802b 4821static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
4822 const struct net_device *down_dev)
4823{
8d1c802b 4824 struct fib6_info *iter;
1de178ed
IS
4825 unsigned int dead = 0;
4826
1cf844c7
DA
4827 if (rt->fib6_nh->fib_nh_dev == down_dev ||
4828 rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
1de178ed 4829 dead++;
93c2fb25 4830 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1cf844c7
DA
4831 if (iter->fib6_nh->fib_nh_dev == down_dev ||
4832 iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
1de178ed
IS
4833 dead++;
4834
4835 return dead;
4836}
4837
8d1c802b 4838static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed 4839 const struct net_device *dev,
ecc5663c 4840 unsigned char nh_flags)
1de178ed 4841{
8d1c802b 4842 struct fib6_info *iter;
1de178ed 4843
1cf844c7
DA
4844 if (rt->fib6_nh->fib_nh_dev == dev)
4845 rt->fib6_nh->fib_nh_flags |= nh_flags;
93c2fb25 4846 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1cf844c7
DA
4847 if (iter->fib6_nh->fib_nh_dev == dev)
4848 iter->fib6_nh->fib_nh_flags |= nh_flags;
1de178ed
IS
4849}
4850
a1a22c12 4851/* called with write lock held for table with rt */
8d1c802b 4852static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 4853{
4c981e28
IS
4854 const struct arg_netdev_event *arg = p_arg;
4855 const struct net_device *dev = arg->dev;
7aef6859 4856 struct net *net = dev_net(dev);
8ed67789 4857
f88d8ea6 4858 if (rt == net->ipv6.fib6_null_entry || rt->nh)
27c6fa73
IS
4859 return 0;
4860
4861 switch (arg->event) {
4862 case NETDEV_UNREGISTER:
1cf844c7 4863 return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
27c6fa73 4864 case NETDEV_DOWN:
1de178ed 4865 if (rt->should_flush)
27c6fa73 4866 return -1;
93c2fb25 4867 if (!rt->fib6_nsiblings)
1cf844c7 4868 return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
1de178ed
IS
4869 if (rt6_multipath_uses_dev(rt, dev)) {
4870 unsigned int count;
4871
4872 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 4873 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
4874 rt6_multipath_flush(rt);
4875 return -1;
4876 }
4877 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4878 RTNH_F_LINKDOWN);
7aef6859 4879 fib6_update_sernum(net, rt);
d7dedee1 4880 rt6_multipath_rebalance(rt);
1de178ed
IS
4881 }
4882 return -2;
27c6fa73 4883 case NETDEV_CHANGE:
1cf844c7 4884 if (rt->fib6_nh->fib_nh_dev != dev ||
93c2fb25 4885 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 4886 break;
1cf844c7 4887 rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 4888 rt6_multipath_rebalance(rt);
27c6fa73 4889 break;
2b241361 4890 }
c159d30c 4891
1da177e4
LT
4892 return 0;
4893}
4894
27c6fa73 4895void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 4896{
4c981e28 4897 struct arg_netdev_event arg = {
8ed67789 4898 .dev = dev,
6802f3ad
IS
4899 {
4900 .event = event,
4901 },
8ed67789 4902 };
7c6bb7d2 4903 struct net *net = dev_net(dev);
8ed67789 4904
7c6bb7d2
DA
4905 if (net->ipv6.sysctl.skip_notify_on_dev_down)
4906 fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4907 else
4908 fib6_clean_all(net, fib6_ifdown, &arg);
4c981e28
IS
4909}
4910
4911void rt6_disable_ip(struct net_device *dev, unsigned long event)
4912{
4913 rt6_sync_down_dev(dev, event);
e5f80fcf 4914 rt6_uncached_list_flush_dev(dev);
4c981e28 4915 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4916}
4917
95c96174 4918struct rt6_mtu_change_arg {
1da177e4 4919 struct net_device *dev;
95c96174 4920 unsigned int mtu;
c0b220cf 4921 struct fib6_info *f6i;
1da177e4
LT
4922};
4923
cc5c073a 4924static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg)
c0b220cf
DA
4925{
4926 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
cc5c073a 4927 struct fib6_info *f6i = arg->f6i;
c0b220cf
DA
4928
4929 /* For administrative MTU increase, there is no way to discover
4930 * IPv6 PMTU increase, so PMTU increase should be updated here.
4931 * Since RFC 1981 doesn't include administrative MTU increase
4932 * update PMTU increase is a MUST. (i.e. jumbo frame)
4933 */
4934 if (nh->fib_nh_dev == arg->dev) {
4935 struct inet6_dev *idev = __in6_dev_get(arg->dev);
4936 u32 mtu = f6i->fib6_pmtu;
4937
4938 if (mtu >= arg->mtu ||
4939 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4940 fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
4941
4942 spin_lock_bh(&rt6_exception_lock);
cc5c073a 4943 rt6_exceptions_update_pmtu(idev, nh, arg->mtu);
c0b220cf
DA
4944 spin_unlock_bh(&rt6_exception_lock);
4945 }
4946
4947 return 0;
4948}
4949
4950static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
1da177e4
LT
4951{
4952 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4953 struct inet6_dev *idev;
4954
4955 /* In IPv6 pmtu discovery is not optional,
4956 so that RTAX_MTU lock cannot disable it.
4957 We still use this lock to block changes
4958 caused by addrconf/ndisc.
4959 */
4960
4961 idev = __in6_dev_get(arg->dev);
38308473 4962 if (!idev)
1da177e4
LT
4963 return 0;
4964
c0b220cf
DA
4965 if (fib6_metric_locked(f6i, RTAX_MTU))
4966 return 0;
d4ead6b3 4967
c0b220cf 4968 arg->f6i = f6i;
2d44234b
DA
4969 if (f6i->nh) {
4970 /* fib6_nh_mtu_change only returns 0, so this is safe */
4971 return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_mtu_change,
4972 arg);
4973 }
4974
1cf844c7 4975 return fib6_nh_mtu_change(f6i->fib6_nh, arg);
1da177e4
LT
4976}
4977
95c96174 4978void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4979{
c71099ac
TG
4980 struct rt6_mtu_change_arg arg = {
4981 .dev = dev,
4982 .mtu = mtu,
4983 };
1da177e4 4984
0c3584d5 4985 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4986}
4987
ef7c79ed 4988static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
75425657 4989 [RTA_UNSPEC] = { .strict_start_type = RTA_DPORT + 1 },
5176f91e 4990 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 4991 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 4992 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4993 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4994 [RTA_PRIORITY] = { .type = NLA_U32 },
4995 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4996 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4997 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4998 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4999 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 5000 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 5001 [RTA_UID] = { .type = NLA_U32 },
3b45a410 5002 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 5003 [RTA_TABLE] = { .type = NLA_U32 },
eacb9384
RP
5004 [RTA_IP_PROTO] = { .type = NLA_U8 },
5005 [RTA_SPORT] = { .type = NLA_U16 },
5006 [RTA_DPORT] = { .type = NLA_U16 },
5b98324e 5007 [RTA_NH_ID] = { .type = NLA_U32 },
86872cb5
TG
5008};
5009
5010static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
5011 struct fib6_config *cfg,
5012 struct netlink_ext_ack *extack)
1da177e4 5013{
86872cb5
TG
5014 struct rtmsg *rtm;
5015 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 5016 unsigned int pref;
86872cb5 5017 int err;
1da177e4 5018
8cb08174
JB
5019 err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
5020 rtm_ipv6_policy, extack);
86872cb5
TG
5021 if (err < 0)
5022 goto errout;
1da177e4 5023
86872cb5
TG
5024 err = -EINVAL;
5025 rtm = nlmsg_data(nlh);
86872cb5 5026
b9605161
GN
5027 if (rtm->rtm_tos) {
5028 NL_SET_ERR_MSG(extack,
5029 "Invalid dsfield (tos): option not available for IPv6");
5030 goto errout;
5031 }
5032
84db8407
5033 *cfg = (struct fib6_config){
5034 .fc_table = rtm->rtm_table,
5035 .fc_dst_len = rtm->rtm_dst_len,
5036 .fc_src_len = rtm->rtm_src_len,
5037 .fc_flags = RTF_UP,
5038 .fc_protocol = rtm->rtm_protocol,
5039 .fc_type = rtm->rtm_type,
5040
5041 .fc_nlinfo.portid = NETLINK_CB(skb).portid,
5042 .fc_nlinfo.nlh = nlh,
5043 .fc_nlinfo.nl_net = sock_net(skb->sk),
5044 };
86872cb5 5045
ef2c7d7b
ND
5046 if (rtm->rtm_type == RTN_UNREACHABLE ||
5047 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
5048 rtm->rtm_type == RTN_PROHIBIT ||
5049 rtm->rtm_type == RTN_THROW)
86872cb5
TG
5050 cfg->fc_flags |= RTF_REJECT;
5051
ab79ad14
5052 if (rtm->rtm_type == RTN_LOCAL)
5053 cfg->fc_flags |= RTF_LOCAL;
5054
1f56a01f
MKL
5055 if (rtm->rtm_flags & RTM_F_CLONED)
5056 cfg->fc_flags |= RTF_CACHE;
5057
fc1e64e1
DA
5058 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
5059
5b98324e
DA
5060 if (tb[RTA_NH_ID]) {
5061 if (tb[RTA_GATEWAY] || tb[RTA_OIF] ||
5062 tb[RTA_MULTIPATH] || tb[RTA_ENCAP]) {
5063 NL_SET_ERR_MSG(extack,
5064 "Nexthop specification and nexthop id are mutually exclusive");
5065 goto errout;
5066 }
5067 cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]);
5068 }
5069
86872cb5 5070 if (tb[RTA_GATEWAY]) {
67b61f6c 5071 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 5072 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 5073 }
e3818541
DA
5074 if (tb[RTA_VIA]) {
5075 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
5076 goto errout;
5077 }
86872cb5
TG
5078
5079 if (tb[RTA_DST]) {
5080 int plen = (rtm->rtm_dst_len + 7) >> 3;
5081
5082 if (nla_len(tb[RTA_DST]) < plen)
5083 goto errout;
5084
5085 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 5086 }
86872cb5
TG
5087
5088 if (tb[RTA_SRC]) {
5089 int plen = (rtm->rtm_src_len + 7) >> 3;
5090
5091 if (nla_len(tb[RTA_SRC]) < plen)
5092 goto errout;
5093
5094 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 5095 }
86872cb5 5096
c3968a85 5097 if (tb[RTA_PREFSRC])
67b61f6c 5098 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 5099
86872cb5
TG
5100 if (tb[RTA_OIF])
5101 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
5102
5103 if (tb[RTA_PRIORITY])
5104 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
5105
5106 if (tb[RTA_METRICS]) {
5107 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
5108 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 5109 }
86872cb5
TG
5110
5111 if (tb[RTA_TABLE])
5112 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
5113
51ebd318
ND
5114 if (tb[RTA_MULTIPATH]) {
5115 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
5116 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
5117
5118 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 5119 cfg->fc_mp_len, extack);
9ed59592
DA
5120 if (err < 0)
5121 goto errout;
51ebd318
ND
5122 }
5123
c78ba6d6
LR
5124 if (tb[RTA_PREF]) {
5125 pref = nla_get_u8(tb[RTA_PREF]);
5126 if (pref != ICMPV6_ROUTER_PREF_LOW &&
5127 pref != ICMPV6_ROUTER_PREF_HIGH)
5128 pref = ICMPV6_ROUTER_PREF_MEDIUM;
5129 cfg->fc_flags |= RTF_PREF(pref);
5130 }
5131
19e42e45
RP
5132 if (tb[RTA_ENCAP])
5133 cfg->fc_encap = tb[RTA_ENCAP];
5134
9ed59592 5135 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
5136 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
5137
c255bd68 5138 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
5139 if (err < 0)
5140 goto errout;
5141 }
5142
32bc201e
XL
5143 if (tb[RTA_EXPIRES]) {
5144 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
5145
5146 if (addrconf_finite_timeout(timeout)) {
5147 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
5148 cfg->fc_flags |= RTF_EXPIRES;
5149 }
5150 }
5151
86872cb5
TG
5152 err = 0;
5153errout:
5154 return err;
1da177e4
LT
5155}
5156
6b9ea5a6 5157struct rt6_nh {
8d1c802b 5158 struct fib6_info *fib6_info;
6b9ea5a6 5159 struct fib6_config r_cfg;
6b9ea5a6
RP
5160 struct list_head next;
5161};
5162
d4ead6b3
DA
5163static int ip6_route_info_append(struct net *net,
5164 struct list_head *rt6_nh_list,
8d1c802b
DA
5165 struct fib6_info *rt,
5166 struct fib6_config *r_cfg)
6b9ea5a6
RP
5167{
5168 struct rt6_nh *nh;
6b9ea5a6
RP
5169 int err = -EEXIST;
5170
5171 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
5172 /* check if fib6_info already exists */
5173 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
5174 return err;
5175 }
5176
5177 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
5178 if (!nh)
5179 return -ENOMEM;
8d1c802b 5180 nh->fib6_info = rt;
6b9ea5a6
RP
5181 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
5182 list_add_tail(&nh->next, rt6_nh_list);
5183
5184 return 0;
5185}
5186
8d1c802b
DA
5187static void ip6_route_mpath_notify(struct fib6_info *rt,
5188 struct fib6_info *rt_last,
3b1137fe
DA
5189 struct nl_info *info,
5190 __u16 nlflags)
5191{
5192 /* if this is an APPEND route, then rt points to the first route
5193 * inserted and rt_last points to last route inserted. Userspace
5194 * wants a consistent dump of the route which starts at the first
5195 * nexthop. Since sibling routes are always added at the end of
5196 * the list, find the first sibling of the last route appended
5197 */
93c2fb25
DA
5198 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
5199 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 5200 struct fib6_info,
93c2fb25 5201 fib6_siblings);
3b1137fe
DA
5202 }
5203
5204 if (rt)
5205 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
5206}
5207
0ee0f47c
IS
5208static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
5209{
5210 bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
5211 bool should_notify = false;
5212 struct fib6_info *leaf;
5213 struct fib6_node *fn;
5214
5215 rcu_read_lock();
5216 fn = rcu_dereference(rt->fib6_node);
5217 if (!fn)
5218 goto out;
5219
5220 leaf = rcu_dereference(fn->leaf);
5221 if (!leaf)
5222 goto out;
5223
5224 if (rt == leaf ||
5225 (rt_can_ecmp && rt->fib6_metric == leaf->fib6_metric &&
5226 rt6_qualify_for_ecmp(leaf)))
5227 should_notify = true;
5228out:
5229 rcu_read_unlock();
5230
5231 return should_notify;
5232}
5233
4619bcf9
DA
5234static int fib6_gw_from_attr(struct in6_addr *gw, struct nlattr *nla,
5235 struct netlink_ext_ack *extack)
5236{
5237 if (nla_len(nla) < sizeof(*gw)) {
5238 NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_GATEWAY");
5239 return -EINVAL;
5240 }
5241
5242 *gw = nla_get_in6_addr(nla);
5243
5244 return 0;
5245}
5246
333c4301
DA
5247static int ip6_route_multipath_add(struct fib6_config *cfg,
5248 struct netlink_ext_ack *extack)
51ebd318 5249{
8d1c802b 5250 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 5251 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
5252 struct fib6_config r_cfg;
5253 struct rtnexthop *rtnh;
8d1c802b 5254 struct fib6_info *rt;
6b9ea5a6
RP
5255 struct rt6_nh *err_nh;
5256 struct rt6_nh *nh, *nh_safe;
3b1137fe 5257 __u16 nlflags;
51ebd318
ND
5258 int remaining;
5259 int attrlen;
6b9ea5a6
RP
5260 int err = 1;
5261 int nhn = 0;
5262 int replace = (cfg->fc_nlinfo.nlh &&
5263 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
5264 LIST_HEAD(rt6_nh_list);
51ebd318 5265
3b1137fe
DA
5266 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
5267 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
5268 nlflags |= NLM_F_APPEND;
5269
35f1b4e9 5270 remaining = cfg->fc_mp_len;
51ebd318 5271 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 5272
6b9ea5a6 5273 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 5274 * fib6_info structs per nexthop
6b9ea5a6 5275 */
51ebd318
ND
5276 while (rtnh_ok(rtnh, remaining)) {
5277 memcpy(&r_cfg, cfg, sizeof(*cfg));
5278 if (rtnh->rtnh_ifindex)
5279 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
5280
5281 attrlen = rtnh_attrlen(rtnh);
5282 if (attrlen > 0) {
5283 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
5284
5285 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
5286 if (nla) {
95bdba23 5287 err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
4619bcf9 5288 extack);
95bdba23
DA
5289 if (err)
5290 goto cleanup;
4619bcf9 5291
51ebd318
ND
5292 r_cfg.fc_flags |= RTF_GATEWAY;
5293 }
19e42e45 5294 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
8bda81a4
DA
5295
5296 /* RTA_ENCAP_TYPE length checked in
5297 * lwtunnel_valid_encap_type_attr
5298 */
19e42e45
RP
5299 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
5300 if (nla)
5301 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 5302 }
6b9ea5a6 5303
68e2ffde 5304 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 5305 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
5306 if (IS_ERR(rt)) {
5307 err = PTR_ERR(rt);
5308 rt = NULL;
6b9ea5a6 5309 goto cleanup;
8c5b83f0 5310 }
b5d2d75e
DA
5311 if (!rt6_qualify_for_ecmp(rt)) {
5312 err = -EINVAL;
5313 NL_SET_ERR_MSG(extack,
5314 "Device only routes can not be added for IPv6 using the multipath API.");
5315 fib6_info_release(rt);
5316 goto cleanup;
5317 }
6b9ea5a6 5318
1cf844c7 5319 rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
398958ae 5320
d4ead6b3
DA
5321 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
5322 rt, &r_cfg);
51ebd318 5323 if (err) {
93531c67 5324 fib6_info_release(rt);
6b9ea5a6
RP
5325 goto cleanup;
5326 }
5327
5328 rtnh = rtnh_next(rtnh, &remaining);
5329 }
5330
9eee3b49
IS
5331 if (list_empty(&rt6_nh_list)) {
5332 NL_SET_ERR_MSG(extack,
5333 "Invalid nexthop configuration - no valid nexthops");
5334 return -EINVAL;
5335 }
5336
3b1137fe
DA
5337 /* for add and replace send one notification with all nexthops.
5338 * Skip the notification in fib6_add_rt2node and send one with
5339 * the full route when done
5340 */
5341 info->skip_notify = 1;
5342
ebee3cad
IS
5343 /* For add and replace, send one notification with all nexthops. For
5344 * append, send one notification with all appended nexthops.
5345 */
5346 info->skip_notify_kernel = 1;
5347
6b9ea5a6
RP
5348 err_nh = NULL;
5349 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b 5350 err = __ip6_ins_rt(nh->fib6_info, info, extack);
3b1137fe 5351
6b9ea5a6
RP
5352 if (err) {
5353 if (replace && nhn)
a5a82d84
JK
5354 NL_SET_ERR_MSG_MOD(extack,
5355 "multipath route replace failed (check consistency of installed routes)");
6b9ea5a6
RP
5356 err_nh = nh;
5357 goto add_errout;
51ebd318 5358 }
685f7d53
ED
5359 /* save reference to last route successfully inserted */
5360 rt_last = nh->fib6_info;
5361
5362 /* save reference to first route for notification */
5363 if (!rt_notif)
5364 rt_notif = nh->fib6_info;
6b9ea5a6 5365
1a72418b 5366 /* Because each route is added like a single route we remove
27596472
MK
5367 * these flags after the first nexthop: if there is a collision,
5368 * we have already failed to add the first nexthop:
5369 * fib6_add_rt2node() has rejected it; when replacing, old
5370 * nexthops have been replaced by first new, the rest should
5371 * be added to it.
1a72418b 5372 */
864db232
MUA
5373 if (cfg->fc_nlinfo.nlh) {
5374 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
5375 NLM_F_REPLACE);
5376 cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
5377 }
6b9ea5a6
RP
5378 nhn++;
5379 }
5380
0ee0f47c
IS
5381 /* An in-kernel notification should only be sent in case the new
5382 * multipath route is added as the first route in the node, or if
5383 * it was appended to it. We pass 'rt_notif' since it is the first
5384 * sibling and might allow us to skip some checks in the replace case.
5385 */
5386 if (ip6_route_mpath_should_notify(rt_notif)) {
5387 enum fib_event_type fib_event;
5388
5389 if (rt_notif->fib6_nsiblings != nhn - 1)
5390 fib_event = FIB_EVENT_ENTRY_APPEND;
5391 else
caafb250 5392 fib_event = FIB_EVENT_ENTRY_REPLACE;
0ee0f47c
IS
5393
5394 err = call_fib6_multipath_entry_notifiers(info->nl_net,
5395 fib_event, rt_notif,
5396 nhn - 1, extack);
5397 if (err) {
5398 /* Delete all the siblings that were just added */
5399 err_nh = NULL;
5400 goto add_errout;
5401 }
5402 }
ebee3cad 5403
3b1137fe
DA
5404 /* success ... tell user about new route */
5405 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
5406 goto cleanup;
5407
5408add_errout:
3b1137fe
DA
5409 /* send notification for routes that were added so that
5410 * the delete notifications sent by ip6_route_del are
5411 * coherent
5412 */
5413 if (rt_notif)
5414 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
5415
6b9ea5a6
RP
5416 /* Delete routes that were already added */
5417 list_for_each_entry(nh, &rt6_nh_list, next) {
5418 if (err_nh == nh)
5419 break;
333c4301 5420 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
5421 }
5422
5423cleanup:
5424 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
685f7d53 5425 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
5426 list_del(&nh->next);
5427 kfree(nh);
5428 }
5429
5430 return err;
5431}
5432
333c4301
DA
5433static int ip6_route_multipath_del(struct fib6_config *cfg,
5434 struct netlink_ext_ack *extack)
6b9ea5a6
RP
5435{
5436 struct fib6_config r_cfg;
5437 struct rtnexthop *rtnh;
2291267e 5438 int last_err = 0;
6b9ea5a6
RP
5439 int remaining;
5440 int attrlen;
2291267e 5441 int err;
6b9ea5a6
RP
5442
5443 remaining = cfg->fc_mp_len;
5444 rtnh = (struct rtnexthop *)cfg->fc_mp;
5445
5446 /* Parse a Multipath Entry */
5447 while (rtnh_ok(rtnh, remaining)) {
5448 memcpy(&r_cfg, cfg, sizeof(*cfg));
5449 if (rtnh->rtnh_ifindex)
5450 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
5451
5452 attrlen = rtnh_attrlen(rtnh);
5453 if (attrlen > 0) {
5454 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
5455
5456 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
5457 if (nla) {
1ff15a71
DA
5458 err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
5459 extack);
e30a845b
DA
5460 if (err) {
5461 last_err = err;
5462 goto next_rtnh;
5463 }
1ff15a71 5464
6b9ea5a6
RP
5465 r_cfg.fc_flags |= RTF_GATEWAY;
5466 }
5467 }
333c4301 5468 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
5469 if (err)
5470 last_err = err;
5471
e30a845b 5472next_rtnh:
51ebd318
ND
5473 rtnh = rtnh_next(rtnh, &remaining);
5474 }
5475
5476 return last_err;
5477}
5478
c21ef3e3
DA
5479static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
5480 struct netlink_ext_ack *extack)
1da177e4 5481{
86872cb5
TG
5482 struct fib6_config cfg;
5483 int err;
1da177e4 5484
333c4301 5485 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
5486 if (err < 0)
5487 return err;
5488
5b98324e
DA
5489 if (cfg.fc_nh_id &&
5490 !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
5491 NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
5492 return -EINVAL;
5493 }
5494
51ebd318 5495 if (cfg.fc_mp)
333c4301 5496 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
5497 else {
5498 cfg.fc_delete_all_nh = 1;
333c4301 5499 return ip6_route_del(&cfg, extack);
0ae81335 5500 }
1da177e4
LT
5501}
5502
c21ef3e3
DA
5503static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
5504 struct netlink_ext_ack *extack)
1da177e4 5505{
86872cb5
TG
5506 struct fib6_config cfg;
5507 int err;
1da177e4 5508
333c4301 5509 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
5510 if (err < 0)
5511 return err;
5512
67f69513
DA
5513 if (cfg.fc_metric == 0)
5514 cfg.fc_metric = IP6_RT_PRIO_USER;
5515
51ebd318 5516 if (cfg.fc_mp)
333c4301 5517 return ip6_route_multipath_add(&cfg, extack);
51ebd318 5518 else
acb54e3c 5519 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
5520}
5521
a1b7a1f0
DA
5522/* add the overhead of this fib6_nh to nexthop_len */
5523static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
339bf98f 5524{
a1b7a1f0 5525 int *nexthop_len = arg;
beb1afac 5526
a1b7a1f0
DA
5527 *nexthop_len += nla_total_size(0) /* RTA_MULTIPATH */
5528 + NLA_ALIGN(sizeof(struct rtnexthop))
5529 + nla_total_size(16); /* RTA_GATEWAY */
5530
5531 if (nh->fib_nh_lws) {
5532 /* RTA_ENCAP_TYPE */
5533 *nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
5534 /* RTA_ENCAP */
5535 *nexthop_len += nla_total_size(2);
5536 }
f88d8ea6 5537
a1b7a1f0
DA
5538 return 0;
5539}
beb1afac 5540
a1b7a1f0
DA
5541static size_t rt6_nlmsg_size(struct fib6_info *f6i)
5542{
5543 int nexthop_len;
5544
5545 if (f6i->nh) {
5546 nexthop_len = nla_total_size(4); /* RTA_NH_ID */
5547 nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
5548 &nexthop_len);
5549 } else {
4cc59f38 5550 struct fib6_info *sibling, *next_sibling;
a1b7a1f0
DA
5551 struct fib6_nh *nh = f6i->fib6_nh;
5552
5553 nexthop_len = 0;
5554 if (f6i->fib6_nsiblings) {
4cc59f38 5555 rt6_nh_nlmsg_size(nh, &nexthop_len);
a1b7a1f0 5556
4cc59f38
LW
5557 list_for_each_entry_safe(sibling, next_sibling,
5558 &f6i->fib6_siblings, fib6_siblings) {
5559 rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
5560 }
a1b7a1f0
DA
5561 }
5562 nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
beb1afac
DA
5563 }
5564
339bf98f
TG
5565 return NLMSG_ALIGN(sizeof(struct rtmsg))
5566 + nla_total_size(16) /* RTA_SRC */
5567 + nla_total_size(16) /* RTA_DST */
5568 + nla_total_size(16) /* RTA_GATEWAY */
5569 + nla_total_size(16) /* RTA_PREFSRC */
5570 + nla_total_size(4) /* RTA_TABLE */
5571 + nla_total_size(4) /* RTA_IIF */
5572 + nla_total_size(4) /* RTA_OIF */
5573 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 5574 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 5575 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 5576 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 5577 + nla_total_size(1) /* RTA_PREF */
beb1afac
DA
5578 + nexthop_len;
5579}
5580
f88d8ea6
DA
5581static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
5582 unsigned char *flags)
5583{
5584 if (nexthop_is_multipath(nh)) {
5585 struct nlattr *mp;
5586
4255ff05 5587 mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
f88d8ea6
DA
5588 if (!mp)
5589 goto nla_put_failure;
5590
7bdf4de1 5591 if (nexthop_mpath_fill_node(skb, nh, AF_INET6))
f88d8ea6
DA
5592 goto nla_put_failure;
5593
5594 nla_nest_end(skb, mp);
5595 } else {
5596 struct fib6_nh *fib6_nh;
5597
5598 fib6_nh = nexthop_fib6_nh(nh);
7bdf4de1 5599 if (fib_nexthop_info(skb, &fib6_nh->nh_common, AF_INET6,
f88d8ea6
DA
5600 flags, false) < 0)
5601 goto nla_put_failure;
5602 }
5603
5604 return 0;
5605
5606nla_put_failure:
5607 return -EMSGSIZE;
5608}
5609
d4ead6b3 5610static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 5611 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 5612 struct in6_addr *dest, struct in6_addr *src,
15e47304 5613 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 5614 unsigned int flags)
1da177e4 5615{
e8dfd42c 5616 struct rt6_info *rt6 = dst_rt6_info(dst);
22d0bd82
XL
5617 struct rt6key *rt6_dst, *rt6_src;
5618 u32 *pmetrics, table, rt6_flags;
f88d8ea6 5619 unsigned char nh_flags = 0;
2d7202bf 5620 struct nlmsghdr *nlh;
22d0bd82 5621 struct rtmsg *rtm;
d4ead6b3 5622 long expires = 0;
1da177e4 5623
15e47304 5624 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 5625 if (!nlh)
26932566 5626 return -EMSGSIZE;
2d7202bf 5627
22d0bd82
XL
5628 if (rt6) {
5629 rt6_dst = &rt6->rt6i_dst;
5630 rt6_src = &rt6->rt6i_src;
5631 rt6_flags = rt6->rt6i_flags;
5632 } else {
5633 rt6_dst = &rt->fib6_dst;
5634 rt6_src = &rt->fib6_src;
5635 rt6_flags = rt->fib6_flags;
5636 }
5637
2d7202bf 5638 rtm = nlmsg_data(nlh);
1da177e4 5639 rtm->rtm_family = AF_INET6;
22d0bd82
XL
5640 rtm->rtm_dst_len = rt6_dst->plen;
5641 rtm->rtm_src_len = rt6_src->plen;
1da177e4 5642 rtm->rtm_tos = 0;
93c2fb25
DA
5643 if (rt->fib6_table)
5644 table = rt->fib6_table->tb6_id;
c71099ac 5645 else
9e762a4a 5646 table = RT6_TABLE_UNSPEC;
97f0082a 5647 rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
c78679e8
DM
5648 if (nla_put_u32(skb, RTA_TABLE, table))
5649 goto nla_put_failure;
e8478e80
DA
5650
5651 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
5652 rtm->rtm_flags = 0;
5653 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 5654 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 5655
22d0bd82 5656 if (rt6_flags & RTF_CACHE)
1da177e4
LT
5657 rtm->rtm_flags |= RTM_F_CLONED;
5658
d4ead6b3
DA
5659 if (dest) {
5660 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 5661 goto nla_put_failure;
1ab1457c 5662 rtm->rtm_dst_len = 128;
1da177e4 5663 } else if (rtm->rtm_dst_len)
22d0bd82 5664 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
c78679e8 5665 goto nla_put_failure;
1da177e4
LT
5666#ifdef CONFIG_IPV6_SUBTREES
5667 if (src) {
930345ea 5668 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 5669 goto nla_put_failure;
1ab1457c 5670 rtm->rtm_src_len = 128;
c78679e8 5671 } else if (rtm->rtm_src_len &&
22d0bd82 5672 nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
c78679e8 5673 goto nla_put_failure;
1da177e4 5674#endif
7bc570c8
YH
5675 if (iif) {
5676#ifdef CONFIG_IPV6_MROUTE
22d0bd82 5677 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
fd61c6ba
DA
5678 int err = ip6mr_get_route(net, skb, rtm, portid);
5679
5680 if (err == 0)
5681 return 0;
5682 if (err < 0)
5683 goto nla_put_failure;
7bc570c8
YH
5684 } else
5685#endif
c78679e8
DM
5686 if (nla_put_u32(skb, RTA_IIF, iif))
5687 goto nla_put_failure;
d4ead6b3 5688 } else if (dest) {
1da177e4 5689 struct in6_addr saddr_buf;
d4ead6b3 5690 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 5691 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 5692 goto nla_put_failure;
1da177e4 5693 }
2d7202bf 5694
93c2fb25 5695 if (rt->fib6_prefsrc.plen) {
c3968a85 5696 struct in6_addr saddr_buf;
93c2fb25 5697 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 5698 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 5699 goto nla_put_failure;
c3968a85
DW
5700 }
5701
d4ead6b3
DA
5702 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
5703 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
5704 goto nla_put_failure;
5705
93c2fb25 5706 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 5707 goto nla_put_failure;
8253947e 5708
beb1afac
DA
5709 /* For multipath routes, walk the siblings list and add
5710 * each as a nexthop within RTA_MULTIPATH.
5711 */
22d0bd82
XL
5712 if (rt6) {
5713 if (rt6_flags & RTF_GATEWAY &&
5714 nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
5715 goto nla_put_failure;
5716
5717 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
5718 goto nla_put_failure;
6b13d8f7
OH
5719
5720 if (dst->lwtstate &&
5721 lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
5722 goto nla_put_failure;
22d0bd82 5723 } else if (rt->fib6_nsiblings) {
8d1c802b 5724 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
5725 struct nlattr *mp;
5726
ae0be8de 5727 mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
beb1afac
DA
5728 if (!mp)
5729 goto nla_put_failure;
5730
1cf844c7 5731 if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
597aa16c
XL
5732 rt->fib6_nh->fib_nh_weight, AF_INET6,
5733 0) < 0)
beb1afac
DA
5734 goto nla_put_failure;
5735
5736 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 5737 &rt->fib6_siblings, fib6_siblings) {
1cf844c7 5738 if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
7bdf4de1 5739 sibling->fib6_nh->fib_nh_weight,
597aa16c 5740 AF_INET6, 0) < 0)
beb1afac
DA
5741 goto nla_put_failure;
5742 }
5743
5744 nla_nest_end(skb, mp);
f88d8ea6
DA
5745 } else if (rt->nh) {
5746 if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
5747 goto nla_put_failure;
ecc5663c 5748
f88d8ea6
DA
5749 if (nexthop_is_blackhole(rt->nh))
5750 rtm->rtm_type = RTN_BLACKHOLE;
5751
bdf00bf2 5752 if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) &&
4f80116d 5753 rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
f88d8ea6
DA
5754 goto nla_put_failure;
5755
5756 rtm->rtm_flags |= nh_flags;
5757 } else {
7bdf4de1 5758 if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, AF_INET6,
ecc5663c 5759 &nh_flags, false) < 0)
beb1afac 5760 goto nla_put_failure;
ecc5663c
DA
5761
5762 rtm->rtm_flags |= nh_flags;
beb1afac
DA
5763 }
5764
22d0bd82 5765 if (rt6_flags & RTF_EXPIRES) {
14895687
DA
5766 expires = dst ? dst->expires : rt->expires;
5767 expires -= jiffies;
5768 }
69cdf8f9 5769
bb3c4ab9 5770 if (!dst) {
d95d6320 5771 if (READ_ONCE(rt->offload))
bb3c4ab9 5772 rtm->rtm_flags |= RTM_F_OFFLOAD;
d95d6320 5773 if (READ_ONCE(rt->trap))
bb3c4ab9 5774 rtm->rtm_flags |= RTM_F_TRAP;
d95d6320 5775 if (READ_ONCE(rt->offload_failed))
0c5fcf9e 5776 rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED;
bb3c4ab9
IS
5777 }
5778
d4ead6b3 5779 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 5780 goto nla_put_failure;
2d7202bf 5781
22d0bd82 5782 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
c78ba6d6
LR
5783 goto nla_put_failure;
5784
19e42e45 5785
053c095a
JB
5786 nlmsg_end(skb, nlh);
5787 return 0;
2d7202bf
TG
5788
5789nla_put_failure:
26932566
PM
5790 nlmsg_cancel(skb, nlh);
5791 return -EMSGSIZE;
1da177e4
LT
5792}
5793
2c170e07
DA
5794static int fib6_info_nh_uses_dev(struct fib6_nh *nh, void *arg)
5795{
5796 const struct net_device *dev = arg;
5797
5798 if (nh->fib_nh_dev == dev)
5799 return 1;
5800
5801 return 0;
5802}
5803
13e38901
DA
5804static bool fib6_info_uses_dev(const struct fib6_info *f6i,
5805 const struct net_device *dev)
5806{
2c170e07
DA
5807 if (f6i->nh) {
5808 struct net_device *_dev = (struct net_device *)dev;
5809
5810 return !!nexthop_for_each_fib6_nh(f6i->nh,
5811 fib6_info_nh_uses_dev,
5812 _dev);
5813 }
5814
1cf844c7 5815 if (f6i->fib6_nh->fib_nh_dev == dev)
13e38901
DA
5816 return true;
5817
5818 if (f6i->fib6_nsiblings) {
5819 struct fib6_info *sibling, *next_sibling;
5820
5821 list_for_each_entry_safe(sibling, next_sibling,
5822 &f6i->fib6_siblings, fib6_siblings) {
1cf844c7 5823 if (sibling->fib6_nh->fib_nh_dev == dev)
13e38901
DA
5824 return true;
5825 }
5826 }
5827
5828 return false;
5829}
5830
1e47b483
SB
5831struct fib6_nh_exception_dump_walker {
5832 struct rt6_rtnl_dump_arg *dump;
5833 struct fib6_info *rt;
5834 unsigned int flags;
5835 unsigned int skip;
5836 unsigned int count;
5837};
5838
5839static int rt6_nh_dump_exceptions(struct fib6_nh *nh, void *arg)
5840{
5841 struct fib6_nh_exception_dump_walker *w = arg;
5842 struct rt6_rtnl_dump_arg *dump = w->dump;
5843 struct rt6_exception_bucket *bucket;
5844 struct rt6_exception *rt6_ex;
5845 int i, err;
5846
5847 bucket = fib6_nh_get_excptn_bucket(nh, NULL);
5848 if (!bucket)
5849 return 0;
5850
5851 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
5852 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
5853 if (w->skip) {
5854 w->skip--;
5855 continue;
5856 }
5857
5858 /* Expiration of entries doesn't bump sernum, insertion
5859 * does. Removal is triggered by insertion, so we can
5860 * rely on the fact that if entries change between two
5861 * partial dumps, this node is scanned again completely,
5862 * see rt6_insert_exception() and fib6_dump_table().
5863 *
5864 * Count expired entries we go through as handled
5865 * entries that we'll skip next time, in case of partial
5866 * node dump. Otherwise, if entries expire meanwhile,
5867 * we'll skip the wrong amount.
5868 */
5869 if (rt6_check_expired(rt6_ex->rt6i)) {
5870 w->count++;
5871 continue;
5872 }
5873
5874 err = rt6_fill_node(dump->net, dump->skb, w->rt,
5875 &rt6_ex->rt6i->dst, NULL, NULL, 0,
5876 RTM_NEWROUTE,
5877 NETLINK_CB(dump->cb->skb).portid,
5878 dump->cb->nlh->nlmsg_seq, w->flags);
5879 if (err)
5880 return err;
5881
5882 w->count++;
5883 }
5884 bucket++;
5885 }
5886
5887 return 0;
5888}
5889
bf9a8a06 5890/* Return -1 if done with node, number of handled routes on partial dump */
1e47b483 5891int rt6_dump_route(struct fib6_info *rt, void *p_arg, unsigned int skip)
1da177e4
LT
5892{
5893 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
13e38901
DA
5894 struct fib_dump_filter *filter = &arg->filter;
5895 unsigned int flags = NLM_F_MULTI;
1f17e2f2 5896 struct net *net = arg->net;
1e47b483 5897 int count = 0;
1f17e2f2 5898
421842ed 5899 if (rt == net->ipv6.fib6_null_entry)
bf9a8a06 5900 return -1;
1da177e4 5901
13e38901
DA
5902 if ((filter->flags & RTM_F_PREFIX) &&
5903 !(rt->fib6_flags & RTF_PREFIX_RT)) {
5904 /* success since this is not a prefix route */
bf9a8a06 5905 return -1;
13e38901 5906 }
1e47b483
SB
5907 if (filter->filter_set &&
5908 ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
5909 (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
5910 (filter->protocol && rt->fib6_protocol != filter->protocol))) {
5911 return -1;
5912 }
5913
5914 if (filter->filter_set ||
5915 !filter->dump_routes || !filter->dump_exceptions) {
13e38901 5916 flags |= NLM_F_DUMP_FILTERED;
f8cfe2ce 5917 }
1da177e4 5918
1e47b483
SB
5919 if (filter->dump_routes) {
5920 if (skip) {
5921 skip--;
5922 } else {
5923 if (rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL,
5924 0, RTM_NEWROUTE,
5925 NETLINK_CB(arg->cb->skb).portid,
5926 arg->cb->nlh->nlmsg_seq, flags)) {
5927 return 0;
5928 }
5929 count++;
5930 }
5931 }
5932
5933 if (filter->dump_exceptions) {
5934 struct fib6_nh_exception_dump_walker w = { .dump = arg,
5935 .rt = rt,
5936 .flags = flags,
5937 .skip = skip,
5938 .count = 0 };
5939 int err;
5940
3b525691 5941 rcu_read_lock();
1e47b483
SB
5942 if (rt->nh) {
5943 err = nexthop_for_each_fib6_nh(rt->nh,
5944 rt6_nh_dump_exceptions,
5945 &w);
5946 } else {
5947 err = rt6_nh_dump_exceptions(rt->fib6_nh, &w);
5948 }
3b525691 5949 rcu_read_unlock();
1e47b483
SB
5950
5951 if (err)
74fd304f 5952 return count + w.count;
1e47b483 5953 }
bf9a8a06
SB
5954
5955 return -1;
1da177e4
LT
5956}
5957
0eff0a27
JK
5958static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
5959 const struct nlmsghdr *nlh,
5960 struct nlattr **tb,
5961 struct netlink_ext_ack *extack)
5962{
5963 struct rtmsg *rtm;
5964 int i, err;
5965
5966 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
5967 NL_SET_ERR_MSG_MOD(extack,
5968 "Invalid header for get route request");
5969 return -EINVAL;
5970 }
5971
5972 if (!netlink_strict_get_check(skb))
8cb08174
JB
5973 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
5974 rtm_ipv6_policy, extack);
0eff0a27
JK
5975
5976 rtm = nlmsg_data(nlh);
5977 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
5978 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
5979 rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
5980 rtm->rtm_type) {
5981 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
5982 return -EINVAL;
5983 }
5984 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
5985 NL_SET_ERR_MSG_MOD(extack,
5986 "Invalid flags for get route request");
5987 return -EINVAL;
5988 }
5989
8cb08174
JB
5990 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
5991 rtm_ipv6_policy, extack);
0eff0a27
JK
5992 if (err)
5993 return err;
5994
5995 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
5996 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
5997 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
5998 return -EINVAL;
5999 }
6000
6001 for (i = 0; i <= RTA_MAX; i++) {
6002 if (!tb[i])
6003 continue;
6004
6005 switch (i) {
6006 case RTA_SRC:
6007 case RTA_DST:
6008 case RTA_IIF:
6009 case RTA_OIF:
6010 case RTA_MARK:
6011 case RTA_UID:
6012 case RTA_SPORT:
6013 case RTA_DPORT:
6014 case RTA_IP_PROTO:
6015 break;
6016 default:
6017 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
6018 return -EINVAL;
6019 }
6020 }
6021
6022 return 0;
6023}
6024
c21ef3e3
DA
6025static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
6026 struct netlink_ext_ack *extack)
1da177e4 6027{
3b1e0a65 6028 struct net *net = sock_net(in_skb->sk);
ab364a6f 6029 struct nlattr *tb[RTA_MAX+1];
18c3a61c 6030 int err, iif = 0, oif = 0;
a68886a6 6031 struct fib6_info *from;
18c3a61c 6032 struct dst_entry *dst;
ab364a6f 6033 struct rt6_info *rt;
1da177e4 6034 struct sk_buff *skb;
ab364a6f 6035 struct rtmsg *rtm;
744486d4 6036 struct flowi6 fl6 = {};
18c3a61c 6037 bool fibmatch;
1da177e4 6038
0eff0a27 6039 err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
ab364a6f
TG
6040 if (err < 0)
6041 goto errout;
1da177e4 6042
ab364a6f 6043 err = -EINVAL;
38b7097b
HFS
6044 rtm = nlmsg_data(nlh);
6045 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 6046 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 6047
ab364a6f
TG
6048 if (tb[RTA_SRC]) {
6049 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
6050 goto errout;
6051
4e3fd7a0 6052 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
6053 }
6054
6055 if (tb[RTA_DST]) {
6056 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
6057 goto errout;
6058
4e3fd7a0 6059 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
6060 }
6061
6062 if (tb[RTA_IIF])
6063 iif = nla_get_u32(tb[RTA_IIF]);
6064
6065 if (tb[RTA_OIF])
72331bc0 6066 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 6067
2e47b291
LC
6068 if (tb[RTA_MARK])
6069 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
6070
622ec2c9
LC
6071 if (tb[RTA_UID])
6072 fl6.flowi6_uid = make_kuid(current_user_ns(),
6073 nla_get_u32(tb[RTA_UID]));
6074 else
6075 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
6076
eacb9384
RP
6077 if (tb[RTA_SPORT])
6078 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
6079
6080 if (tb[RTA_DPORT])
6081 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
6082
6083 if (tb[RTA_IP_PROTO]) {
6084 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
5e1a99ea
HL
6085 &fl6.flowi6_proto, AF_INET6,
6086 extack);
eacb9384
RP
6087 if (err)
6088 goto errout;
6089 }
6090
1da177e4
LT
6091 if (iif) {
6092 struct net_device *dev;
72331bc0
SL
6093 int flags = 0;
6094
121622db
FW
6095 rcu_read_lock();
6096
6097 dev = dev_get_by_index_rcu(net, iif);
1da177e4 6098 if (!dev) {
121622db 6099 rcu_read_unlock();
1da177e4 6100 err = -ENODEV;
ab364a6f 6101 goto errout;
1da177e4 6102 }
72331bc0
SL
6103
6104 fl6.flowi6_iif = iif;
6105
6106 if (!ipv6_addr_any(&fl6.saddr))
6107 flags |= RT6_LOOKUP_F_HAS_SADDR;
6108
b75cc8f9 6109 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
6110
6111 rcu_read_unlock();
72331bc0
SL
6112 } else {
6113 fl6.flowi6_oif = oif;
6114
58acfd71 6115 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
6116 }
6117
18c3a61c 6118
e8dfd42c 6119 rt = dst_rt6_info(dst);
18c3a61c
RP
6120 if (rt->dst.error) {
6121 err = rt->dst.error;
6122 ip6_rt_put(rt);
6123 goto errout;
1da177e4
LT
6124 }
6125
9d6acb3b
WC
6126 if (rt == net->ipv6.ip6_null_entry) {
6127 err = rt->dst.error;
6128 ip6_rt_put(rt);
6129 goto errout;
6130 }
6131
ab364a6f 6132 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 6133 if (!skb) {
94e187c0 6134 ip6_rt_put(rt);
ab364a6f
TG
6135 err = -ENOBUFS;
6136 goto errout;
6137 }
1da177e4 6138
d8d1f30b 6139 skb_dst_set(skb, &rt->dst);
a68886a6
DA
6140
6141 rcu_read_lock();
6142 from = rcu_dereference(rt->from);
886b7a50
MKL
6143 if (from) {
6144 if (fibmatch)
6145 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
6146 iif, RTM_NEWROUTE,
6147 NETLINK_CB(in_skb).portid,
6148 nlh->nlmsg_seq, 0);
6149 else
6150 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
6151 &fl6.saddr, iif, RTM_NEWROUTE,
6152 NETLINK_CB(in_skb).portid,
6153 nlh->nlmsg_seq, 0);
6154 } else {
6155 err = -ENETUNREACH;
6156 }
a68886a6
DA
6157 rcu_read_unlock();
6158
1da177e4 6159 if (err < 0) {
ab364a6f
TG
6160 kfree_skb(skb);
6161 goto errout;
1da177e4
LT
6162 }
6163
15e47304 6164 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 6165errout:
1da177e4 6166 return err;
1da177e4
LT
6167}
6168
8d1c802b 6169void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 6170 unsigned int nlm_flags)
1da177e4
LT
6171{
6172 struct sk_buff *skb;
5578689a 6173 struct net *net = info->nl_net;
528c4ceb
DL
6174 u32 seq;
6175 int err;
6176
6177 err = -ENOBUFS;
38308473 6178 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 6179
19e42e45 6180 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 6181 if (!skb)
21713ebc
TG
6182 goto errout;
6183
d4ead6b3
DA
6184 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
6185 event, info->portid, seq, nlm_flags);
26932566
PM
6186 if (err < 0) {
6187 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6188 WARN_ON(err == -EMSGSIZE);
6189 kfree_skb(skb);
6190 goto errout;
6191 }
15e47304 6192 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
6193 info->nlh, gfp_any());
6194 return;
21713ebc
TG
6195errout:
6196 if (err < 0)
5578689a 6197 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
6198}
6199
19a3b7ee
DA
6200void fib6_rt_update(struct net *net, struct fib6_info *rt,
6201 struct nl_info *info)
6202{
6203 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
6204 struct sk_buff *skb;
6205 int err = -ENOBUFS;
6206
19a3b7ee
DA
6207 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
6208 if (!skb)
6209 goto errout;
6210
6211 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
6212 RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE);
6213 if (err < 0) {
6214 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6215 WARN_ON(err == -EMSGSIZE);
6216 kfree_skb(skb);
6217 goto errout;
6218 }
6219 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
6220 info->nlh, gfp_any());
6221 return;
6222errout:
6223 if (err < 0)
6224 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
6225}
6226
907eea48 6227void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
0c5fcf9e 6228 bool offload, bool trap, bool offload_failed)
907eea48
AC
6229{
6230 struct sk_buff *skb;
6231 int err;
6232
d95d6320
ED
6233 if (READ_ONCE(f6i->offload) == offload &&
6234 READ_ONCE(f6i->trap) == trap &&
6235 READ_ONCE(f6i->offload_failed) == offload_failed)
907eea48
AC
6236 return;
6237
d95d6320
ED
6238 WRITE_ONCE(f6i->offload, offload);
6239 WRITE_ONCE(f6i->trap, trap);
6fad361a
AC
6240
6241 /* 2 means send notifications only if offload_failed was changed. */
6242 if (net->ipv6.sysctl.fib_notify_on_flag_change == 2 &&
d95d6320 6243 READ_ONCE(f6i->offload_failed) == offload_failed)
6fad361a
AC
6244 return;
6245
d95d6320 6246 WRITE_ONCE(f6i->offload_failed, offload_failed);
907eea48
AC
6247
6248 if (!rcu_access_pointer(f6i->fib6_node))
6249 /* The route was removed from the tree, do not send
89e8347f 6250 * notification.
907eea48
AC
6251 */
6252 return;
6253
6254 if (!net->ipv6.sysctl.fib_notify_on_flag_change)
6255 return;
6256
6257 skb = nlmsg_new(rt6_nlmsg_size(f6i), GFP_KERNEL);
6258 if (!skb) {
6259 err = -ENOBUFS;
6260 goto errout;
6261 }
6262
6263 err = rt6_fill_node(net, skb, f6i, NULL, NULL, NULL, 0, RTM_NEWROUTE, 0,
6264 0, 0);
6265 if (err < 0) {
6266 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6267 WARN_ON(err == -EMSGSIZE);
6268 kfree_skb(skb);
6269 goto errout;
6270 }
6271
6272 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ROUTE, NULL, GFP_KERNEL);
6273 return;
6274
6275errout:
6276 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
6277}
6278EXPORT_SYMBOL(fib6_info_hw_flags_set);
6279
8ed67789 6280static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 6281 unsigned long event, void *ptr)
8ed67789 6282{
351638e7 6283 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 6284 struct net *net = dev_net(dev);
8ed67789 6285
242d3a49
WC
6286 if (!(dev->flags & IFF_LOOPBACK))
6287 return NOTIFY_OK;
6288
6289 if (event == NETDEV_REGISTER) {
1cf844c7 6290 net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev;
d8d1f30b 6291 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
6292 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
6293#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 6294 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 6295 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 6296 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 6297 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 6298#endif
76da0704
WC
6299 } else if (event == NETDEV_UNREGISTER &&
6300 dev->reg_state != NETREG_UNREGISTERED) {
6301 /* NETDEV_UNREGISTER could be fired for multiple times by
6302 * netdev_wait_allrefs(). Make sure we only call this once.
6303 */
12d94a80 6304 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 6305#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
6306 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
6307 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
6308#endif
6309 }
6310
6311 return NOTIFY_OK;
6312}
6313
1da177e4
LT
6314/*
6315 * /proc
6316 */
6317
6318#ifdef CONFIG_PROC_FS
1da177e4
LT
6319static int rt6_stats_seq_show(struct seq_file *seq, void *v)
6320{
69ddb805 6321 struct net *net = (struct net *)seq->private;
1da177e4 6322 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
6323 net->ipv6.rt6_stats->fib_nodes,
6324 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 6325 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
6326 net->ipv6.rt6_stats->fib_rt_entries,
6327 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 6328 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 6329 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
6330
6331 return 0;
6332}
1da177e4
LT
6333#endif /* CONFIG_PROC_FS */
6334
6335#ifdef CONFIG_SYSCTL
6336
32927393
CH
6337static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
6338 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4 6339{
c486da34
LAG
6340 struct net *net;
6341 int delay;
f0fb9b28 6342 int ret;
c486da34 6343 if (!write)
1da177e4 6344 return -EINVAL;
c486da34
LAG
6345
6346 net = (struct net *)ctl->extra1;
6347 delay = net->ipv6.sysctl.flush_delay;
f0fb9b28
AP
6348 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
6349 if (ret)
6350 return ret;
6351
2ac3ac8f 6352 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 6353 return 0;
1da177e4
LT
6354}
6355
ed792e28 6356static struct ctl_table ipv6_route_table_template[] = {
1ab1457c 6357 {
06e6c88f
AK
6358 .procname = "max_size",
6359 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4 6360 .maxlen = sizeof(int),
06e6c88f
AK
6361 .mode = 0644,
6362 .proc_handler = proc_dointvec,
1da177e4
LT
6363 },
6364 {
1da177e4 6365 .procname = "gc_thresh",
9a7ec3a9 6366 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
6367 .maxlen = sizeof(int),
6368 .mode = 0644,
6d9f239a 6369 .proc_handler = proc_dointvec,
1da177e4
LT
6370 },
6371 {
06e6c88f
AK
6372 .procname = "flush",
6373 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 6374 .maxlen = sizeof(int),
06e6c88f
AK
6375 .mode = 0200,
6376 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
6377 },
6378 {
1da177e4 6379 .procname = "gc_min_interval",
4990509f 6380 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
6381 .maxlen = sizeof(int),
6382 .mode = 0644,
6d9f239a 6383 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
6384 },
6385 {
1da177e4 6386 .procname = "gc_timeout",
4990509f 6387 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
6388 .maxlen = sizeof(int),
6389 .mode = 0644,
6d9f239a 6390 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
6391 },
6392 {
1da177e4 6393 .procname = "gc_interval",
4990509f 6394 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
6395 .maxlen = sizeof(int),
6396 .mode = 0644,
6d9f239a 6397 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
6398 },
6399 {
1da177e4 6400 .procname = "gc_elasticity",
4990509f 6401 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
6402 .maxlen = sizeof(int),
6403 .mode = 0644,
f3d3f616 6404 .proc_handler = proc_dointvec,
1da177e4
LT
6405 },
6406 {
1da177e4 6407 .procname = "mtu_expires",
4990509f 6408 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
6409 .maxlen = sizeof(int),
6410 .mode = 0644,
6d9f239a 6411 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
6412 },
6413 {
1da177e4 6414 .procname = "min_adv_mss",
4990509f 6415 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
6416 .maxlen = sizeof(int),
6417 .mode = 0644,
f3d3f616 6418 .proc_handler = proc_dointvec,
1da177e4
LT
6419 },
6420 {
1da177e4 6421 .procname = "gc_min_interval_ms",
4990509f 6422 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
6423 .maxlen = sizeof(int),
6424 .mode = 0644,
6d9f239a 6425 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 6426 },
7c6bb7d2
DA
6427 {
6428 .procname = "skip_notify_on_dev_down",
6429 .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
ef62c0ae 6430 .maxlen = sizeof(u8),
7c6bb7d2 6431 .mode = 0644,
ef62c0ae 6432 .proc_handler = proc_dou8vec_minmax,
eec4844f
MC
6433 .extra1 = SYSCTL_ZERO,
6434 .extra2 = SYSCTL_ONE,
7c6bb7d2 6435 },
1da177e4
LT
6436};
6437
2c8c1e72 6438struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
6439{
6440 struct ctl_table *table;
6441
6442 table = kmemdup(ipv6_route_table_template,
6443 sizeof(ipv6_route_table_template),
6444 GFP_KERNEL);
5ee09105
YH
6445
6446 if (table) {
06e6c88f 6447 table[0].data = &net->ipv6.sysctl.ip6_rt_max_size;
86393e52 6448 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
06e6c88f
AK
6449 table[2].data = &net->ipv6.sysctl.flush_delay;
6450 table[2].extra1 = net;
5ee09105
YH
6451 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
6452 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
6453 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
6454 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
6455 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
6456 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 6457 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
7c6bb7d2 6458 table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
5ee09105
YH
6459 }
6460
760f2d01
DL
6461 return table;
6462}
c899710f
JG
6463
6464size_t ipv6_route_sysctl_table_size(struct net *net)
6465{
6466 /* Don't export sysctls to unprivileged users */
6467 if (net->user_ns != &init_user_ns)
6468 return 1;
6469
6470 return ARRAY_SIZE(ipv6_route_table_template);
6471}
1da177e4
LT
6472#endif
6473
2c8c1e72 6474static int __net_init ip6_route_net_init(struct net *net)
cdb18761 6475{
633d424b 6476 int ret = -ENOMEM;
8ed67789 6477
86393e52
AD
6478 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
6479 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 6480
fc66f95c
ED
6481 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
6482 goto out_ip6_dst_ops;
6483
1cf844c7 6484 net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true);
421842ed
DA
6485 if (!net->ipv6.fib6_null_entry)
6486 goto out_ip6_dst_entries;
1cf844c7
DA
6487 memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template,
6488 sizeof(*net->ipv6.fib6_null_entry));
421842ed 6489
8ed67789
DL
6490 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
6491 sizeof(*net->ipv6.ip6_null_entry),
6492 GFP_KERNEL);
6493 if (!net->ipv6.ip6_null_entry)
421842ed 6494 goto out_fib6_null_entry;
d8d1f30b 6495 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
6496 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
6497 ip6_template_metrics, true);
d288a162 6498 INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->dst.rt_uncached);
8ed67789
DL
6499
6500#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 6501 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
6502 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
6503 sizeof(*net->ipv6.ip6_prohibit_entry),
6504 GFP_KERNEL);
68fffc67
PZ
6505 if (!net->ipv6.ip6_prohibit_entry)
6506 goto out_ip6_null_entry;
d8d1f30b 6507 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
6508 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
6509 ip6_template_metrics, true);
d288a162 6510 INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->dst.rt_uncached);
8ed67789
DL
6511
6512 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
6513 sizeof(*net->ipv6.ip6_blk_hole_entry),
6514 GFP_KERNEL);
68fffc67
PZ
6515 if (!net->ipv6.ip6_blk_hole_entry)
6516 goto out_ip6_prohibit_entry;
d8d1f30b 6517 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
6518 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
6519 ip6_template_metrics, true);
d288a162 6520 INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->dst.rt_uncached);
b9b33e7c
PA
6521#ifdef CONFIG_IPV6_SUBTREES
6522 net->ipv6.fib6_routes_require_src = 0;
6523#endif
8ed67789
DL
6524#endif
6525
b339a47c 6526 net->ipv6.sysctl.flush_delay = 0;
af6d1034 6527 net->ipv6.sysctl.ip6_rt_max_size = INT_MAX;
b339a47c
PZ
6528 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
6529 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
6530 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
6531 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
6532 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
6533 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
7c6bb7d2 6534 net->ipv6.sysctl.skip_notify_on_dev_down = 0;
b339a47c 6535
9cb7c013 6536 atomic_set(&net->ipv6.ip6_rt_gc_expire, 30*HZ);
6891a346 6537
8ed67789
DL
6538 ret = 0;
6539out:
6540 return ret;
f2fc6a54 6541
68fffc67
PZ
6542#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6543out_ip6_prohibit_entry:
6544 kfree(net->ipv6.ip6_prohibit_entry);
6545out_ip6_null_entry:
6546 kfree(net->ipv6.ip6_null_entry);
6547#endif
421842ed
DA
6548out_fib6_null_entry:
6549 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
6550out_ip6_dst_entries:
6551 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 6552out_ip6_dst_ops:
f2fc6a54 6553 goto out;
cdb18761
DL
6554}
6555
2c8c1e72 6556static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 6557{
421842ed 6558 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
6559 kfree(net->ipv6.ip6_null_entry);
6560#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6561 kfree(net->ipv6.ip6_prohibit_entry);
6562 kfree(net->ipv6.ip6_blk_hole_entry);
6563#endif
41bb78b4 6564 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
6565}
6566
d189634e
TG
6567static int __net_init ip6_route_net_init_late(struct net *net)
6568{
6569#ifdef CONFIG_PROC_FS
768b3c74
ZS
6570 if (!proc_create_net("ipv6_route", 0, net->proc_net,
6571 &ipv6_route_seq_ops,
6572 sizeof(struct ipv6_route_iter)))
6573 return -ENOMEM;
6574
6575 if (!proc_create_net_single("rt6_stats", 0444, net->proc_net,
6576 rt6_stats_seq_show, NULL)) {
6577 remove_proc_entry("ipv6_route", net->proc_net);
6578 return -ENOMEM;
6579 }
d189634e
TG
6580#endif
6581 return 0;
6582}
6583
6584static void __net_exit ip6_route_net_exit_late(struct net *net)
6585{
6586#ifdef CONFIG_PROC_FS
ece31ffd
G
6587 remove_proc_entry("ipv6_route", net->proc_net);
6588 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
6589#endif
6590}
6591
cdb18761
DL
6592static struct pernet_operations ip6_route_net_ops = {
6593 .init = ip6_route_net_init,
6594 .exit = ip6_route_net_exit,
6595};
6596
c3426b47
DM
6597static int __net_init ipv6_inetpeer_init(struct net *net)
6598{
6599 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
6600
6601 if (!bp)
6602 return -ENOMEM;
6603 inet_peer_base_init(bp);
6604 net->ipv6.peers = bp;
6605 return 0;
6606}
6607
6608static void __net_exit ipv6_inetpeer_exit(struct net *net)
6609{
6610 struct inet_peer_base *bp = net->ipv6.peers;
6611
6612 net->ipv6.peers = NULL;
56a6b248 6613 inetpeer_invalidate_tree(bp);
c3426b47
DM
6614 kfree(bp);
6615}
6616
2b823f72 6617static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
6618 .init = ipv6_inetpeer_init,
6619 .exit = ipv6_inetpeer_exit,
6620};
6621
d189634e
TG
6622static struct pernet_operations ip6_route_net_late_ops = {
6623 .init = ip6_route_net_init_late,
6624 .exit = ip6_route_net_exit_late,
6625};
6626
8ed67789
DL
6627static struct notifier_block ip6_route_dev_notifier = {
6628 .notifier_call = ip6_route_dev_notify,
242d3a49 6629 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
6630};
6631
2f460933
WC
6632void __init ip6_route_init_special_entries(void)
6633{
6634 /* Registering of the loopback is done before this portion of code,
6635 * the loopback reference in rt6_info will not be taken, do it
6636 * manually for init_net */
1cf844c7 6637 init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev;
2f460933
WC
6638 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
6639 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6640 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
6641 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
6642 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6643 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
6644 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6645 #endif
6646}
6647
138d0be3
YS
6648#if IS_BUILTIN(CONFIG_IPV6)
6649#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6650DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
6651
951cf368
YS
6652BTF_ID_LIST(btf_fib6_info_id)
6653BTF_ID(struct, fib6_info)
6654
14fc6bd6 6655static const struct bpf_iter_seq_info ipv6_route_seq_info = {
15172a46
YS
6656 .seq_ops = &ipv6_route_seq_ops,
6657 .init_seq_private = bpf_iter_init_seq_net,
6658 .fini_seq_private = bpf_iter_fini_seq_net,
6659 .seq_priv_size = sizeof(struct ipv6_route_iter),
14fc6bd6
YS
6660};
6661
6662static struct bpf_iter_reg ipv6_route_reg_info = {
6663 .target = "ipv6_route",
3c32cc1b
YS
6664 .ctx_arg_info_size = 1,
6665 .ctx_arg_info = {
6666 { offsetof(struct bpf_iter__ipv6_route, rt),
6667 PTR_TO_BTF_ID_OR_NULL },
6668 },
14fc6bd6 6669 .seq_info = &ipv6_route_seq_info,
15172a46
YS
6670};
6671
138d0be3
YS
6672static int __init bpf_iter_register(void)
6673{
951cf368 6674 ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id;
15172a46 6675 return bpf_iter_reg_target(&ipv6_route_reg_info);
138d0be3
YS
6676}
6677
6678static void bpf_iter_unregister(void)
6679{
ab2ee4fc 6680 bpf_iter_unreg_target(&ipv6_route_reg_info);
138d0be3
YS
6681}
6682#endif
6683#endif
6684
433d49c3 6685int __init ip6_route_init(void)
1da177e4 6686{
433d49c3 6687 int ret;
8d0b94af 6688 int cpu;
433d49c3 6689
9a7ec3a9
DL
6690 ret = -ENOMEM;
6691 ip6_dst_ops_template.kmem_cachep =
e5d679f3 6692 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
6126891c 6693 SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
9a7ec3a9 6694 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 6695 goto out;
14e50e57 6696
fc66f95c 6697 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 6698 if (ret)
bdb3289f 6699 goto out_kmem_cache;
bdb3289f 6700
c3426b47
DM
6701 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
6702 if (ret)
e8803b6c 6703 goto out_dst_entries;
2a0c451a 6704
7e52b33b
DM
6705 ret = register_pernet_subsys(&ip6_route_net_ops);
6706 if (ret)
6707 goto out_register_inetpeer;
c3426b47 6708
5dc121e9
AE
6709 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
6710
e8803b6c 6711 ret = fib6_init();
433d49c3 6712 if (ret)
8ed67789 6713 goto out_register_subsys;
433d49c3 6714
433d49c3
DL
6715 ret = xfrm6_init();
6716 if (ret)
e8803b6c 6717 goto out_fib6_init;
c35b7e72 6718
433d49c3
DL
6719 ret = fib6_rules_init();
6720 if (ret)
6721 goto xfrm6_init;
7e5449c2 6722
d189634e
TG
6723 ret = register_pernet_subsys(&ip6_route_net_late_ops);
6724 if (ret)
6725 goto fib6_rules_init;
6726
16feebcf
FW
6727 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
6728 inet6_rtm_newroute, NULL, 0);
6729 if (ret < 0)
6730 goto out_register_late_subsys;
6731
6732 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
6733 inet6_rtm_delroute, NULL, 0);
6734 if (ret < 0)
6735 goto out_register_late_subsys;
6736
6737 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
6738 inet6_rtm_getroute, NULL,
6739 RTNL_FLAG_DOIT_UNLOCKED);
6740 if (ret < 0)
d189634e 6741 goto out_register_late_subsys;
c127ea2c 6742
8ed67789 6743 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 6744 if (ret)
d189634e 6745 goto out_register_late_subsys;
8ed67789 6746
138d0be3
YS
6747#if IS_BUILTIN(CONFIG_IPV6)
6748#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6749 ret = bpf_iter_register();
6750 if (ret)
6751 goto out_register_late_subsys;
6752#endif
6753#endif
6754
8d0b94af
MKL
6755 for_each_possible_cpu(cpu) {
6756 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
6757
6758 INIT_LIST_HEAD(&ul->head);
ba55ef81 6759 INIT_LIST_HEAD(&ul->quarantine);
8d0b94af
MKL
6760 spin_lock_init(&ul->lock);
6761 }
6762
433d49c3
DL
6763out:
6764 return ret;
6765
d189634e 6766out_register_late_subsys:
16feebcf 6767 rtnl_unregister_all(PF_INET6);
d189634e 6768 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 6769fib6_rules_init:
433d49c3
DL
6770 fib6_rules_cleanup();
6771xfrm6_init:
433d49c3 6772 xfrm6_fini();
2a0c451a
TG
6773out_fib6_init:
6774 fib6_gc_cleanup();
8ed67789
DL
6775out_register_subsys:
6776 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
6777out_register_inetpeer:
6778 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
6779out_dst_entries:
6780 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 6781out_kmem_cache:
f2fc6a54 6782 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 6783 goto out;
1da177e4
LT
6784}
6785
6786void ip6_route_cleanup(void)
6787{
138d0be3
YS
6788#if IS_BUILTIN(CONFIG_IPV6)
6789#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6790 bpf_iter_unregister();
6791#endif
6792#endif
8ed67789 6793 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 6794 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 6795 fib6_rules_cleanup();
1da177e4 6796 xfrm6_fini();
1da177e4 6797 fib6_gc_cleanup();
c3426b47 6798 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 6799 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 6800 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 6801 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 6802}