ipv6: Remove rt6_check_dev
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
eacb9384 66#include <net/ip.h>
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
30d444d3
DA
73static int ip6_rt_type_to_error(u8 fib6_type);
74
75#define CREATE_TRACE_POINTS
76#include <trace/events/fib6.h>
77EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78#undef CREATE_TRACE_POINTS
79
afc154e9 80enum rt6_nud_state {
7e980569
JB
81 RT6_NUD_FAIL_HARD = -3,
82 RT6_NUD_FAIL_PROBE = -2,
83 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
84 RT6_NUD_SUCCEED = 1
85};
86
1da177e4 87static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 88static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 89static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
569d3645 94static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
95
96static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 97static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 98static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 99static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 100static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
101static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102 struct sk_buff *skb, u32 mtu);
103static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
104 struct sk_buff *skb);
8d1c802b
DA
105static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
106static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 107static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 108 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 109 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
110 int iif, int type, u32 portid, u32 seq,
111 unsigned int flags);
8d1c802b 112static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
113 struct in6_addr *daddr,
114 struct in6_addr *saddr);
1da177e4 115
70ceb4f5 116#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 117static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 118 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
119 const struct in6_addr *gwaddr,
120 struct net_device *dev,
95c96174 121 unsigned int pref);
8d1c802b 122static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 123 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
124 const struct in6_addr *gwaddr,
125 struct net_device *dev);
70ceb4f5
YH
126#endif
127
8d0b94af
MKL
128struct uncached_list {
129 spinlock_t lock;
130 struct list_head head;
131};
132
133static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
134
510c321b 135void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
136{
137 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
138
8d0b94af
MKL
139 rt->rt6i_uncached_list = ul;
140
141 spin_lock_bh(&ul->lock);
142 list_add_tail(&rt->rt6i_uncached, &ul->head);
143 spin_unlock_bh(&ul->lock);
144}
145
510c321b 146void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
147{
148 if (!list_empty(&rt->rt6i_uncached)) {
149 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 150 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
151
152 spin_lock_bh(&ul->lock);
153 list_del(&rt->rt6i_uncached);
81eb8447 154 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
155 spin_unlock_bh(&ul->lock);
156 }
157}
158
159static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
160{
161 struct net_device *loopback_dev = net->loopback_dev;
162 int cpu;
163
e332bc67
EB
164 if (dev == loopback_dev)
165 return;
166
8d0b94af
MKL
167 for_each_possible_cpu(cpu) {
168 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
169 struct rt6_info *rt;
170
171 spin_lock_bh(&ul->lock);
172 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
173 struct inet6_dev *rt_idev = rt->rt6i_idev;
174 struct net_device *rt_dev = rt->dst.dev;
175
e332bc67 176 if (rt_idev->dev == dev) {
8d0b94af
MKL
177 rt->rt6i_idev = in6_dev_get(loopback_dev);
178 in6_dev_put(rt_idev);
179 }
180
e332bc67 181 if (rt_dev == dev) {
8d0b94af
MKL
182 rt->dst.dev = loopback_dev;
183 dev_hold(rt->dst.dev);
184 dev_put(rt_dev);
185 }
186 }
187 spin_unlock_bh(&ul->lock);
188 }
189}
190
f8a1b43b 191static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
192 struct sk_buff *skb,
193 const void *daddr)
39232973 194{
a7563f34 195 if (!ipv6_addr_any(p))
39232973 196 return (const void *) p;
f894cbf8
DM
197 else if (skb)
198 return &ipv6_hdr(skb)->daddr;
39232973
DM
199 return daddr;
200}
201
f8a1b43b
DA
202struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
203 struct net_device *dev,
204 struct sk_buff *skb,
205 const void *daddr)
d3aaeb38 206{
39232973
DM
207 struct neighbour *n;
208
f8a1b43b
DA
209 daddr = choose_neigh_daddr(gw, skb, daddr);
210 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
211 if (n)
212 return n;
7adf3246
SB
213
214 n = neigh_create(&nd_tbl, daddr, dev);
215 return IS_ERR(n) ? NULL : n;
f8a1b43b
DA
216}
217
218static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
219 struct sk_buff *skb,
220 const void *daddr)
221{
222 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
223
224 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
225}
226
63fca65d
JA
227static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
228{
229 struct net_device *dev = dst->dev;
230 struct rt6_info *rt = (struct rt6_info *)dst;
231
f8a1b43b 232 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
233 if (!daddr)
234 return;
235 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
236 return;
237 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
238 return;
239 __ipv6_confirm_neigh(dev, daddr);
240}
241
9a7ec3a9 242static struct dst_ops ip6_dst_ops_template = {
1da177e4 243 .family = AF_INET6,
1da177e4
LT
244 .gc = ip6_dst_gc,
245 .gc_thresh = 1024,
246 .check = ip6_dst_check,
0dbaee3b 247 .default_advmss = ip6_default_advmss,
ebb762f2 248 .mtu = ip6_mtu,
d4ead6b3 249 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
250 .destroy = ip6_dst_destroy,
251 .ifdown = ip6_dst_ifdown,
252 .negative_advice = ip6_negative_advice,
253 .link_failure = ip6_link_failure,
254 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 255 .redirect = rt6_do_redirect,
9f8955cc 256 .local_out = __ip6_local_out,
f8a1b43b 257 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 258 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
259};
260
ebb762f2 261static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 262{
618f9bc7
SK
263 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
264
265 return mtu ? : dst->dev->mtu;
ec831ea7
RD
266}
267
6700c270
DM
268static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
269 struct sk_buff *skb, u32 mtu)
14e50e57
DM
270{
271}
272
6700c270
DM
273static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
274 struct sk_buff *skb)
b587ee3b
DM
275{
276}
277
14e50e57
DM
278static struct dst_ops ip6_dst_blackhole_ops = {
279 .family = AF_INET6,
14e50e57
DM
280 .destroy = ip6_dst_destroy,
281 .check = ip6_dst_check,
ebb762f2 282 .mtu = ip6_blackhole_mtu,
214f45c9 283 .default_advmss = ip6_default_advmss,
14e50e57 284 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 285 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 286 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 287 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
288};
289
62fa8a84 290static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 291 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
292};
293
8d1c802b 294static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
295 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
296 .fib6_protocol = RTPROT_KERNEL,
297 .fib6_metric = ~(u32)0,
298 .fib6_ref = ATOMIC_INIT(1),
421842ed
DA
299 .fib6_type = RTN_UNREACHABLE,
300 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
301};
302
fb0af4c7 303static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
304 .dst = {
305 .__refcnt = ATOMIC_INIT(1),
306 .__use = 1,
2c20cbd7 307 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 308 .error = -ENETUNREACH,
d8d1f30b
CG
309 .input = ip6_pkt_discard,
310 .output = ip6_pkt_discard_out,
1da177e4
LT
311 },
312 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
313};
314
101367c2
TG
315#ifdef CONFIG_IPV6_MULTIPLE_TABLES
316
fb0af4c7 317static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
318 .dst = {
319 .__refcnt = ATOMIC_INIT(1),
320 .__use = 1,
2c20cbd7 321 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 322 .error = -EACCES,
d8d1f30b
CG
323 .input = ip6_pkt_prohibit,
324 .output = ip6_pkt_prohibit_out,
101367c2
TG
325 },
326 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
327};
328
fb0af4c7 329static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
330 .dst = {
331 .__refcnt = ATOMIC_INIT(1),
332 .__use = 1,
2c20cbd7 333 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 334 .error = -EINVAL,
d8d1f30b 335 .input = dst_discard,
ede2059d 336 .output = dst_discard_out,
101367c2
TG
337 },
338 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
339};
340
341#endif
342
ebfa45f0
MKL
343static void rt6_info_init(struct rt6_info *rt)
344{
345 struct dst_entry *dst = &rt->dst;
346
347 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
348 INIT_LIST_HEAD(&rt->rt6i_uncached);
349}
350
1da177e4 351/* allocate dst with ip6_dst_ops */
93531c67
DA
352struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
353 int flags)
1da177e4 354{
97bab73f 355 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 356 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 357
81eb8447 358 if (rt) {
ebfa45f0 359 rt6_info_init(rt);
81eb8447
WW
360 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
361 }
8104891b 362
cf911662 363 return rt;
1da177e4 364}
9ab179d8 365EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 366
1da177e4
LT
367static void ip6_dst_destroy(struct dst_entry *dst)
368{
369 struct rt6_info *rt = (struct rt6_info *)dst;
a68886a6 370 struct fib6_info *from;
8d0b94af 371 struct inet6_dev *idev;
1da177e4 372
1620a336 373 ip_dst_metrics_put(dst);
8d0b94af
MKL
374 rt6_uncached_list_del(rt);
375
376 idev = rt->rt6i_idev;
38308473 377 if (idev) {
1da177e4
LT
378 rt->rt6i_idev = NULL;
379 in6_dev_put(idev);
1ab1457c 380 }
1716a961 381
a68886a6
DA
382 rcu_read_lock();
383 from = rcu_dereference(rt->from);
384 rcu_assign_pointer(rt->from, NULL);
93531c67 385 fib6_info_release(from);
a68886a6 386 rcu_read_unlock();
b3419363
DM
387}
388
1da177e4
LT
389static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
390 int how)
391{
392 struct rt6_info *rt = (struct rt6_info *)dst;
393 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 394 struct net_device *loopback_dev =
c346dca1 395 dev_net(dev)->loopback_dev;
1da177e4 396
e5645f51
WW
397 if (idev && idev->dev != loopback_dev) {
398 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
399 if (loopback_idev) {
400 rt->rt6i_idev = loopback_idev;
401 in6_dev_put(idev);
97cac082 402 }
1da177e4
LT
403 }
404}
405
5973fb1e
MKL
406static bool __rt6_check_expired(const struct rt6_info *rt)
407{
408 if (rt->rt6i_flags & RTF_EXPIRES)
409 return time_after(jiffies, rt->dst.expires);
410 else
411 return false;
412}
413
a50feda5 414static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 415{
a68886a6
DA
416 struct fib6_info *from;
417
418 from = rcu_dereference(rt->from);
419
1716a961
G
420 if (rt->rt6i_flags & RTF_EXPIRES) {
421 if (time_after(jiffies, rt->dst.expires))
a50feda5 422 return true;
a68886a6 423 } else if (from) {
1e2ea8ad 424 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 425 fib6_check_expired(from);
1716a961 426 }
a50feda5 427 return false;
1da177e4
LT
428}
429
3b290a31
DA
430struct fib6_info *fib6_multipath_select(const struct net *net,
431 struct fib6_info *match,
432 struct flowi6 *fl6, int oif,
433 const struct sk_buff *skb,
434 int strict)
51ebd318 435{
8d1c802b 436 struct fib6_info *sibling, *next_sibling;
51ebd318 437
b673d6cc
JS
438 /* We might have already computed the hash for ICMPv6 errors. In such
439 * case it will always be non-zero. Otherwise now is the time to do it.
440 */
441 if (!fl6->mp_hash)
b4bac172 442 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 443
ad1601ae 444 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
3d709f69
IS
445 return match;
446
93c2fb25
DA
447 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
448 fib6_siblings) {
5e670d84
DA
449 int nh_upper_bound;
450
ad1601ae 451 nh_upper_bound = atomic_read(&sibling->fib6_nh.fib_nh_upper_bound);
5e670d84 452 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
453 continue;
454 if (rt6_score_route(sibling, oif, strict) < 0)
455 break;
456 match = sibling;
457 break;
458 }
459
51ebd318
ND
460 return match;
461}
462
1da177e4 463/*
66f5d6ce 464 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
465 */
466
8d1c802b
DA
467static inline struct fib6_info *rt6_device_match(struct net *net,
468 struct fib6_info *rt,
b71d1d42 469 const struct in6_addr *saddr,
1da177e4 470 int oif,
d420895e 471 int flags)
1da177e4 472{
8d1c802b 473 struct fib6_info *sprt;
1da177e4 474
5e670d84 475 if (!oif && ipv6_addr_any(saddr) &&
ad1601ae 476 !(rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD))
8067bb8c 477 return rt;
dd3abc4e 478
8fb11a9a 479 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
ad1601ae 480 const struct net_device *dev = sprt->fib6_nh.fib_nh_dev;
dd3abc4e 481
ad1601ae 482 if (sprt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
8067bb8c
IS
483 continue;
484
dd3abc4e 485 if (oif) {
1da177e4
LT
486 if (dev->ifindex == oif)
487 return sprt;
dd3abc4e
YH
488 } else {
489 if (ipv6_chk_addr(net, saddr, dev,
490 flags & RT6_LOOKUP_F_IFACE))
491 return sprt;
1da177e4 492 }
dd3abc4e 493 }
1da177e4 494
eea68cd3
DA
495 if (oif && flags & RT6_LOOKUP_F_IFACE)
496 return net->ipv6.fib6_null_entry;
8067bb8c 497
ad1601ae 498 return rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
499}
500
27097255 501#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
502struct __rt6_probe_work {
503 struct work_struct work;
504 struct in6_addr target;
505 struct net_device *dev;
506};
507
508static void rt6_probe_deferred(struct work_struct *w)
509{
510 struct in6_addr mcaddr;
511 struct __rt6_probe_work *work =
512 container_of(w, struct __rt6_probe_work, work);
513
514 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 515 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 516 dev_put(work->dev);
662f5533 517 kfree(work);
c2f17e82
HFS
518}
519
8d1c802b 520static void rt6_probe(struct fib6_info *rt)
27097255 521{
f547fac6 522 struct __rt6_probe_work *work = NULL;
5e670d84 523 const struct in6_addr *nh_gw;
f2c31e32 524 struct neighbour *neigh;
5e670d84 525 struct net_device *dev;
f547fac6 526 struct inet6_dev *idev;
5e670d84 527
27097255
YH
528 /*
529 * Okay, this does not seem to be appropriate
530 * for now, however, we need to check if it
531 * is really so; aka Router Reachability Probing.
532 *
533 * Router Reachability Probe MUST be rate-limited
534 * to no more than one per minute.
535 */
bdf00467 536 if (!rt || !rt->fib6_nh.fib_nh_gw_family)
7ff74a59 537 return;
5e670d84 538
ad1601ae
DA
539 nh_gw = &rt->fib6_nh.fib_nh_gw6;
540 dev = rt->fib6_nh.fib_nh_dev;
2152caea 541 rcu_read_lock_bh();
f547fac6 542 idev = __in6_dev_get(dev);
5e670d84 543 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 544 if (neigh) {
8d6c31bf
MKL
545 if (neigh->nud_state & NUD_VALID)
546 goto out;
547
2152caea 548 write_lock(&neigh->lock);
990edb42
MKL
549 if (!(neigh->nud_state & NUD_VALID) &&
550 time_after(jiffies,
dcd1f572 551 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
552 work = kmalloc(sizeof(*work), GFP_ATOMIC);
553 if (work)
554 __neigh_set_probe_once(neigh);
c2f17e82 555 }
2152caea 556 write_unlock(&neigh->lock);
f547fac6
SD
557 } else if (time_after(jiffies, rt->last_probe +
558 idev->cnf.rtr_probe_interval)) {
990edb42 559 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 560 }
990edb42
MKL
561
562 if (work) {
f547fac6 563 rt->last_probe = jiffies;
990edb42 564 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
565 work->target = *nh_gw;
566 dev_hold(dev);
567 work->dev = dev;
990edb42
MKL
568 schedule_work(&work->work);
569 }
570
8d6c31bf 571out:
2152caea 572 rcu_read_unlock_bh();
27097255
YH
573}
574#else
8d1c802b 575static inline void rt6_probe(struct fib6_info *rt)
27097255 576{
27097255
YH
577}
578#endif
579
1da177e4 580/*
554cfb7e 581 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 582 */
1ba9a895 583static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
1da177e4 584{
afc154e9 585 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 586 struct neighbour *neigh;
f2c31e32 587
145a3621 588 rcu_read_lock_bh();
1ba9a895
DA
589 neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
590 &fib6_nh->fib_nh_gw6);
145a3621
YH
591 if (neigh) {
592 read_lock(&neigh->lock);
554cfb7e 593 if (neigh->nud_state & NUD_VALID)
afc154e9 594 ret = RT6_NUD_SUCCEED;
398bcbeb 595#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 596 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 597 ret = RT6_NUD_SUCCEED;
7e980569
JB
598 else
599 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 600#endif
145a3621 601 read_unlock(&neigh->lock);
afc154e9
HFS
602 } else {
603 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 604 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 605 }
145a3621
YH
606 rcu_read_unlock_bh();
607
a5a81f0b 608 return ret;
1da177e4
LT
609}
610
8d1c802b 611static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
1da177e4 612{
1ba9a895 613 struct fib6_nh *nh = &rt->fib6_nh;
6e1809a5
DA
614 int m = 0;
615
616 if (!oif || nh->fib_nh_dev->ifindex == oif)
617 m = 2;
1ab1457c 618
77d16f45 619 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 620 return RT6_NUD_FAIL_HARD;
ebacaaa0 621#ifdef CONFIG_IPV6_ROUTER_PREF
93c2fb25 622 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
ebacaaa0 623#endif
1ba9a895
DA
624 if ((strict & RT6_LOOKUP_F_REACHABLE) &&
625 !(rt->fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
626 int n = rt6_check_neigh(nh);
afc154e9
HFS
627 if (n < 0)
628 return n;
629 }
554cfb7e
YH
630 return m;
631}
632
8d1c802b
DA
633static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
634 int *mpri, struct fib6_info *match,
afc154e9 635 bool *do_rr)
554cfb7e 636{
f11e6659 637 int m;
afc154e9 638 bool match_do_rr = false;
35103d11 639
ad1601ae 640 if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
8067bb8c
IS
641 goto out;
642
ad1601ae
DA
643 if (ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev) &&
644 rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 645 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 646 goto out;
f11e6659 647
14895687 648 if (fib6_check_expired(rt))
f11e6659
DM
649 goto out;
650
651 m = rt6_score_route(rt, oif, strict);
7e980569 652 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
653 match_do_rr = true;
654 m = 0; /* lowest valid score */
7e980569 655 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 656 goto out;
afc154e9
HFS
657 }
658
659 if (strict & RT6_LOOKUP_F_REACHABLE)
660 rt6_probe(rt);
f11e6659 661
7e980569 662 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 663 if (m > *mpri) {
afc154e9 664 *do_rr = match_do_rr;
f11e6659
DM
665 *mpri = m;
666 match = rt;
f11e6659 667 }
f11e6659
DM
668out:
669 return match;
670}
671
8d1c802b
DA
672static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
673 struct fib6_info *leaf,
674 struct fib6_info *rr_head,
afc154e9
HFS
675 u32 metric, int oif, int strict,
676 bool *do_rr)
f11e6659 677{
8d1c802b 678 struct fib6_info *rt, *match, *cont;
554cfb7e 679 int mpri = -1;
1da177e4 680
f11e6659 681 match = NULL;
9fbdcfaf 682 cont = NULL;
8fb11a9a 683 for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 684 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
685 cont = rt;
686 break;
687 }
688
689 match = find_match(rt, oif, strict, &mpri, match, do_rr);
690 }
691
66f5d6ce 692 for (rt = leaf; rt && rt != rr_head;
8fb11a9a 693 rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 694 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
695 cont = rt;
696 break;
697 }
698
afc154e9 699 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
700 }
701
702 if (match || !cont)
703 return match;
704
8fb11a9a 705 for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
afc154e9 706 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 707
f11e6659
DM
708 return match;
709}
1da177e4 710
8d1c802b 711static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
8d1040e8 712 int oif, int strict)
f11e6659 713{
8d1c802b
DA
714 struct fib6_info *leaf = rcu_dereference(fn->leaf);
715 struct fib6_info *match, *rt0;
afc154e9 716 bool do_rr = false;
17ecf590 717 int key_plen;
1da177e4 718
421842ed
DA
719 if (!leaf || leaf == net->ipv6.fib6_null_entry)
720 return net->ipv6.fib6_null_entry;
8d1040e8 721
66f5d6ce 722 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 723 if (!rt0)
66f5d6ce 724 rt0 = leaf;
1da177e4 725
17ecf590
WW
726 /* Double check to make sure fn is not an intermediate node
727 * and fn->leaf does not points to its child's leaf
728 * (This might happen if all routes under fn are deleted from
729 * the tree and fib6_repair_tree() is called on the node.)
730 */
93c2fb25 731 key_plen = rt0->fib6_dst.plen;
17ecf590 732#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
733 if (rt0->fib6_src.plen)
734 key_plen = rt0->fib6_src.plen;
17ecf590
WW
735#endif
736 if (fn->fn_bit != key_plen)
421842ed 737 return net->ipv6.fib6_null_entry;
17ecf590 738
93c2fb25 739 match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
afc154e9 740 &do_rr);
1da177e4 741
afc154e9 742 if (do_rr) {
8fb11a9a 743 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
f11e6659 744
554cfb7e 745 /* no entries matched; do round-robin */
93c2fb25 746 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 747 next = leaf;
f11e6659 748
66f5d6ce 749 if (next != rt0) {
93c2fb25 750 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 751 /* make sure next is not being deleted from the tree */
93c2fb25 752 if (next->fib6_node)
66f5d6ce 753 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 754 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 755 }
1da177e4 756 }
1da177e4 757
421842ed 758 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
759}
760
8d1c802b 761static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
8b9df265 762{
bdf00467 763 return (rt->fib6_flags & RTF_NONEXTHOP) || rt->fib6_nh.fib_nh_gw_family;
8b9df265
MKL
764}
765
70ceb4f5
YH
766#ifdef CONFIG_IPV6_ROUTE_INFO
767int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 768 const struct in6_addr *gwaddr)
70ceb4f5 769{
c346dca1 770 struct net *net = dev_net(dev);
70ceb4f5
YH
771 struct route_info *rinfo = (struct route_info *) opt;
772 struct in6_addr prefix_buf, *prefix;
773 unsigned int pref;
4bed72e4 774 unsigned long lifetime;
8d1c802b 775 struct fib6_info *rt;
70ceb4f5
YH
776
777 if (len < sizeof(struct route_info)) {
778 return -EINVAL;
779 }
780
781 /* Sanity check for prefix_len and length */
782 if (rinfo->length > 3) {
783 return -EINVAL;
784 } else if (rinfo->prefix_len > 128) {
785 return -EINVAL;
786 } else if (rinfo->prefix_len > 64) {
787 if (rinfo->length < 2) {
788 return -EINVAL;
789 }
790 } else if (rinfo->prefix_len > 0) {
791 if (rinfo->length < 1) {
792 return -EINVAL;
793 }
794 }
795
796 pref = rinfo->route_pref;
797 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 798 return -EINVAL;
70ceb4f5 799
4bed72e4 800 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
801
802 if (rinfo->length == 3)
803 prefix = (struct in6_addr *)rinfo->prefix;
804 else {
805 /* this function is safe */
806 ipv6_addr_prefix(&prefix_buf,
807 (struct in6_addr *)rinfo->prefix,
808 rinfo->prefix_len);
809 prefix = &prefix_buf;
810 }
811
f104a567 812 if (rinfo->prefix_len == 0)
afb1d4b5 813 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
814 else
815 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 816 gwaddr, dev);
70ceb4f5
YH
817
818 if (rt && !lifetime) {
afb1d4b5 819 ip6_del_rt(net, rt);
70ceb4f5
YH
820 rt = NULL;
821 }
822
823 if (!rt && lifetime)
830218c1
DA
824 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
825 dev, pref);
70ceb4f5 826 else if (rt)
93c2fb25
DA
827 rt->fib6_flags = RTF_ROUTEINFO |
828 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
829
830 if (rt) {
1716a961 831 if (!addrconf_finite_timeout(lifetime))
14895687 832 fib6_clean_expires(rt);
1716a961 833 else
14895687 834 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 835
93531c67 836 fib6_info_release(rt);
70ceb4f5
YH
837 }
838 return 0;
839}
840#endif
841
ae90d867
DA
842/*
843 * Misc support functions
844 */
845
846/* called with rcu_lock held */
8d1c802b 847static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
ae90d867 848{
ad1601ae 849 struct net_device *dev = rt->fib6_nh.fib_nh_dev;
ae90d867 850
93c2fb25 851 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
852 /* for copies of local routes, dst->dev needs to be the
853 * device if it is a master device, the master device if
854 * device is enslaved, and the loopback as the default
855 */
856 if (netif_is_l3_slave(dev) &&
93c2fb25 857 !rt6_need_strict(&rt->fib6_dst.addr))
ae90d867
DA
858 dev = l3mdev_master_dev_rcu(dev);
859 else if (!netif_is_l3_master(dev))
860 dev = dev_net(dev)->loopback_dev;
861 /* last case is netif_is_l3_master(dev) is true in which
862 * case we want dev returned to be dev
863 */
864 }
865
866 return dev;
867}
868
6edb3c96
DA
869static const int fib6_prop[RTN_MAX + 1] = {
870 [RTN_UNSPEC] = 0,
871 [RTN_UNICAST] = 0,
872 [RTN_LOCAL] = 0,
873 [RTN_BROADCAST] = 0,
874 [RTN_ANYCAST] = 0,
875 [RTN_MULTICAST] = 0,
876 [RTN_BLACKHOLE] = -EINVAL,
877 [RTN_UNREACHABLE] = -EHOSTUNREACH,
878 [RTN_PROHIBIT] = -EACCES,
879 [RTN_THROW] = -EAGAIN,
880 [RTN_NAT] = -EINVAL,
881 [RTN_XRESOLVE] = -EINVAL,
882};
883
884static int ip6_rt_type_to_error(u8 fib6_type)
885{
886 return fib6_prop[fib6_type];
887}
888
8d1c802b 889static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
890{
891 unsigned short flags = 0;
892
893 if (rt->dst_nocount)
894 flags |= DST_NOCOUNT;
895 if (rt->dst_nopolicy)
896 flags |= DST_NOPOLICY;
897 if (rt->dst_host)
898 flags |= DST_HOST;
899
900 return flags;
901}
902
8d1c802b 903static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96
DA
904{
905 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
906
907 switch (ort->fib6_type) {
908 case RTN_BLACKHOLE:
909 rt->dst.output = dst_discard_out;
910 rt->dst.input = dst_discard;
911 break;
912 case RTN_PROHIBIT:
913 rt->dst.output = ip6_pkt_prohibit_out;
914 rt->dst.input = ip6_pkt_prohibit;
915 break;
916 case RTN_THROW:
917 case RTN_UNREACHABLE:
918 default:
919 rt->dst.output = ip6_pkt_discard_out;
920 rt->dst.input = ip6_pkt_discard;
921 break;
922 }
923}
924
8d1c802b 925static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96 926{
93c2fb25 927 if (ort->fib6_flags & RTF_REJECT) {
6edb3c96
DA
928 ip6_rt_init_dst_reject(rt, ort);
929 return;
930 }
931
932 rt->dst.error = 0;
933 rt->dst.output = ip6_output;
934
d23c4b63 935 if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
6edb3c96 936 rt->dst.input = ip6_input;
93c2fb25 937 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
938 rt->dst.input = ip6_mc_input;
939 } else {
940 rt->dst.input = ip6_forward;
941 }
942
ad1601ae
DA
943 if (ort->fib6_nh.fib_nh_lws) {
944 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.fib_nh_lws);
6edb3c96
DA
945 lwtunnel_set_redirect(&rt->dst);
946 }
947
948 rt->dst.lastuse = jiffies;
949}
950
e873e4b9 951/* Caller must already hold reference to @from */
8d1c802b 952static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 953{
ae90d867 954 rt->rt6i_flags &= ~RTF_EXPIRES;
a68886a6 955 rcu_assign_pointer(rt->from, from);
e1255ed4 956 ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
ae90d867
DA
957}
958
e873e4b9 959/* Caller must already hold reference to @ort */
8d1c802b 960static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
ae90d867 961{
dcd1f572
DA
962 struct net_device *dev = fib6_info_nh_dev(ort);
963
6edb3c96
DA
964 ip6_rt_init_dst(rt, ort);
965
93c2fb25 966 rt->rt6i_dst = ort->fib6_dst;
dcd1f572 967 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
93c2fb25 968 rt->rt6i_flags = ort->fib6_flags;
bdf00467 969 if (ort->fib6_nh.fib_nh_gw_family) {
ad1601ae 970 rt->rt6i_gateway = ort->fib6_nh.fib_nh_gw6;
2b2450ca
DA
971 rt->rt6i_flags |= RTF_GATEWAY;
972 }
ae90d867 973 rt6_set_from(rt, ort);
ae90d867 974#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 975 rt->rt6i_src = ort->fib6_src;
ae90d867 976#endif
ae90d867
DA
977}
978
a3c00e46
MKL
979static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
980 struct in6_addr *saddr)
981{
66f5d6ce 982 struct fib6_node *pn, *sn;
a3c00e46
MKL
983 while (1) {
984 if (fn->fn_flags & RTN_TL_ROOT)
985 return NULL;
66f5d6ce
WW
986 pn = rcu_dereference(fn->parent);
987 sn = FIB6_SUBTREE(pn);
988 if (sn && sn != fn)
6454743b 989 fn = fib6_node_lookup(sn, NULL, saddr);
a3c00e46
MKL
990 else
991 fn = pn;
992 if (fn->fn_flags & RTN_RTINFO)
993 return fn;
994 }
995}
c71099ac 996
10585b43 997static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
d3843fe5
WW
998{
999 struct rt6_info *rt = *prt;
1000
1001 if (dst_hold_safe(&rt->dst))
1002 return true;
10585b43 1003 if (net) {
d3843fe5
WW
1004 rt = net->ipv6.ip6_null_entry;
1005 dst_hold(&rt->dst);
1006 } else {
1007 rt = NULL;
1008 }
1009 *prt = rt;
1010 return false;
1011}
1012
dec9b0e2 1013/* called with rcu_lock held */
8d1c802b 1014static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
dec9b0e2 1015{
3b6761d1 1016 unsigned short flags = fib6_info_dst_flags(rt);
ad1601ae 1017 struct net_device *dev = rt->fib6_nh.fib_nh_dev;
dec9b0e2
DA
1018 struct rt6_info *nrt;
1019
e873e4b9 1020 if (!fib6_info_hold_safe(rt))
1c87e79a 1021 goto fallback;
e873e4b9 1022
93531c67 1023 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1c87e79a 1024 if (!nrt) {
e873e4b9 1025 fib6_info_release(rt);
1c87e79a
XL
1026 goto fallback;
1027 }
dec9b0e2 1028
1c87e79a
XL
1029 ip6_rt_copy_init(nrt, rt);
1030 return nrt;
1031
1032fallback:
1033 nrt = dev_net(dev)->ipv6.ip6_null_entry;
1034 dst_hold(&nrt->dst);
dec9b0e2
DA
1035 return nrt;
1036}
1037
8ed67789
DL
1038static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1039 struct fib6_table *table,
b75cc8f9
DA
1040 struct flowi6 *fl6,
1041 const struct sk_buff *skb,
1042 int flags)
1da177e4 1043{
8d1c802b 1044 struct fib6_info *f6i;
1da177e4 1045 struct fib6_node *fn;
23fb93a4 1046 struct rt6_info *rt;
1da177e4 1047
b6cdbc85
DA
1048 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1049 flags &= ~RT6_LOOKUP_F_IFACE;
1050
66f5d6ce 1051 rcu_read_lock();
6454743b 1052 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1053restart:
23fb93a4
DA
1054 f6i = rcu_dereference(fn->leaf);
1055 if (!f6i) {
1056 f6i = net->ipv6.fib6_null_entry;
66f5d6ce 1057 } else {
23fb93a4 1058 f6i = rt6_device_match(net, f6i, &fl6->saddr,
66f5d6ce 1059 fl6->flowi6_oif, flags);
93c2fb25 1060 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
3b290a31
DA
1061 f6i = fib6_multipath_select(net, f6i, fl6,
1062 fl6->flowi6_oif, skb,
1063 flags);
66f5d6ce 1064 }
23fb93a4 1065 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1066 fn = fib6_backtrack(fn, &fl6->saddr);
1067 if (fn)
1068 goto restart;
1069 }
2b760fcf 1070
d4bea421 1071 trace_fib6_table_lookup(net, f6i, table, fl6);
d3843fe5 1072
2b760fcf 1073 /* Search through exception table */
23fb93a4
DA
1074 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1075 if (rt) {
10585b43 1076 if (ip6_hold_safe(net, &rt))
dec9b0e2 1077 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1078 } else if (f6i == net->ipv6.fib6_null_entry) {
dec9b0e2
DA
1079 rt = net->ipv6.ip6_null_entry;
1080 dst_hold(&rt->dst);
23fb93a4
DA
1081 } else {
1082 rt = ip6_create_rt_rcu(f6i);
dec9b0e2 1083 }
b811580d 1084
66f5d6ce 1085 rcu_read_unlock();
b811580d 1086
c71099ac 1087 return rt;
c71099ac
TG
1088}
1089
67ba4152 1090struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1091 const struct sk_buff *skb, int flags)
ea6e574e 1092{
b75cc8f9 1093 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1094}
1095EXPORT_SYMBOL_GPL(ip6_route_lookup);
1096
9acd9f3a 1097struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1098 const struct in6_addr *saddr, int oif,
1099 const struct sk_buff *skb, int strict)
c71099ac 1100{
4c9483b2
DM
1101 struct flowi6 fl6 = {
1102 .flowi6_oif = oif,
1103 .daddr = *daddr,
c71099ac
TG
1104 };
1105 struct dst_entry *dst;
77d16f45 1106 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1107
adaa70bb 1108 if (saddr) {
4c9483b2 1109 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1110 flags |= RT6_LOOKUP_F_HAS_SADDR;
1111 }
1112
b75cc8f9 1113 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1114 if (dst->error == 0)
1115 return (struct rt6_info *) dst;
1116
1117 dst_release(dst);
1118
1da177e4
LT
1119 return NULL;
1120}
7159039a
YH
1121EXPORT_SYMBOL(rt6_lookup);
1122
c71099ac 1123/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1124 * It takes new route entry, the addition fails by any reason the
1125 * route is released.
1126 * Caller must hold dst before calling it.
1da177e4
LT
1127 */
1128
8d1c802b 1129static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1130 struct netlink_ext_ack *extack)
1da177e4
LT
1131{
1132 int err;
c71099ac 1133 struct fib6_table *table;
1da177e4 1134
93c2fb25 1135 table = rt->fib6_table;
66f5d6ce 1136 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1137 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1138 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1139
1140 return err;
1141}
1142
8d1c802b 1143int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1144{
afb1d4b5 1145 struct nl_info info = { .nl_net = net, };
e715b6d3 1146
d4ead6b3 1147 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1148}
1149
8d1c802b 1150static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
8b9df265
MKL
1151 const struct in6_addr *daddr,
1152 const struct in6_addr *saddr)
1da177e4 1153{
4832c30d 1154 struct net_device *dev;
1da177e4
LT
1155 struct rt6_info *rt;
1156
1157 /*
1158 * Clone the route.
1159 */
1160
e873e4b9
WW
1161 if (!fib6_info_hold_safe(ort))
1162 return NULL;
1163
4832c30d 1164 dev = ip6_rt_get_dev_rcu(ort);
93531c67 1165 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
e873e4b9
WW
1166 if (!rt) {
1167 fib6_info_release(ort);
83a09abd 1168 return NULL;
e873e4b9 1169 }
83a09abd
MKL
1170
1171 ip6_rt_copy_init(rt, ort);
1172 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1173 rt->dst.flags |= DST_HOST;
1174 rt->rt6i_dst.addr = *daddr;
1175 rt->rt6i_dst.plen = 128;
1da177e4 1176
83a09abd 1177 if (!rt6_is_gw_or_nonexthop(ort)) {
93c2fb25
DA
1178 if (ort->fib6_dst.plen != 128 &&
1179 ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
83a09abd 1180 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1181#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1182 if (rt->rt6i_src.plen && saddr) {
1183 rt->rt6i_src.addr = *saddr;
1184 rt->rt6i_src.plen = 128;
8b9df265 1185 }
83a09abd 1186#endif
95a9a5ba 1187 }
1da177e4 1188
95a9a5ba
YH
1189 return rt;
1190}
1da177e4 1191
8d1c802b 1192static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
d52d3997 1193{
3b6761d1 1194 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1195 struct net_device *dev;
d52d3997
MKL
1196 struct rt6_info *pcpu_rt;
1197
e873e4b9
WW
1198 if (!fib6_info_hold_safe(rt))
1199 return NULL;
1200
4832c30d
DA
1201 rcu_read_lock();
1202 dev = ip6_rt_get_dev_rcu(rt);
93531c67 1203 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1204 rcu_read_unlock();
e873e4b9
WW
1205 if (!pcpu_rt) {
1206 fib6_info_release(rt);
d52d3997 1207 return NULL;
e873e4b9 1208 }
d52d3997 1209 ip6_rt_copy_init(pcpu_rt, rt);
d52d3997
MKL
1210 pcpu_rt->rt6i_flags |= RTF_PCPU;
1211 return pcpu_rt;
1212}
1213
66f5d6ce 1214/* It should be called with rcu_read_lock() acquired */
8d1c802b 1215static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
d52d3997 1216{
a73e4195 1217 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1218
1219 p = this_cpu_ptr(rt->rt6i_pcpu);
1220 pcpu_rt = *p;
1221
d4ead6b3 1222 if (pcpu_rt)
10585b43 1223 ip6_hold_safe(NULL, &pcpu_rt);
d3843fe5 1224
a73e4195
MKL
1225 return pcpu_rt;
1226}
1227
afb1d4b5 1228static struct rt6_info *rt6_make_pcpu_route(struct net *net,
8d1c802b 1229 struct fib6_info *rt)
a73e4195
MKL
1230{
1231 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1232
1233 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1234 if (!pcpu_rt) {
9c7370a1
MKL
1235 dst_hold(&net->ipv6.ip6_null_entry->dst);
1236 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1237 }
1238
a94b9367
WW
1239 dst_hold(&pcpu_rt->dst);
1240 p = this_cpu_ptr(rt->rt6i_pcpu);
1241 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1242 BUG_ON(prev);
a94b9367 1243
d52d3997
MKL
1244 return pcpu_rt;
1245}
1246
35732d01
WW
1247/* exception hash table implementation
1248 */
1249static DEFINE_SPINLOCK(rt6_exception_lock);
1250
1251/* Remove rt6_ex from hash table and free the memory
1252 * Caller must hold rt6_exception_lock
1253 */
1254static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1255 struct rt6_exception *rt6_ex)
1256{
f5b51fe8 1257 struct fib6_info *from;
b2427e67 1258 struct net *net;
81eb8447 1259
35732d01
WW
1260 if (!bucket || !rt6_ex)
1261 return;
b2427e67
CIK
1262
1263 net = dev_net(rt6_ex->rt6i->dst.dev);
f5b51fe8
PA
1264 net->ipv6.rt6_stats->fib_rt_cache--;
1265
1266 /* purge completely the exception to allow releasing the held resources:
1267 * some [sk] cache may keep the dst around for unlimited time
1268 */
1269 from = rcu_dereference_protected(rt6_ex->rt6i->from,
1270 lockdep_is_held(&rt6_exception_lock));
1271 rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
1272 fib6_info_release(from);
1273 dst_dev_put(&rt6_ex->rt6i->dst);
1274
35732d01 1275 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1276 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1277 kfree_rcu(rt6_ex, rcu);
1278 WARN_ON_ONCE(!bucket->depth);
1279 bucket->depth--;
1280}
1281
1282/* Remove oldest rt6_ex in bucket and free the memory
1283 * Caller must hold rt6_exception_lock
1284 */
1285static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1286{
1287 struct rt6_exception *rt6_ex, *oldest = NULL;
1288
1289 if (!bucket)
1290 return;
1291
1292 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1293 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1294 oldest = rt6_ex;
1295 }
1296 rt6_remove_exception(bucket, oldest);
1297}
1298
1299static u32 rt6_exception_hash(const struct in6_addr *dst,
1300 const struct in6_addr *src)
1301{
1302 static u32 seed __read_mostly;
1303 u32 val;
1304
1305 net_get_random_once(&seed, sizeof(seed));
1306 val = jhash(dst, sizeof(*dst), seed);
1307
1308#ifdef CONFIG_IPV6_SUBTREES
1309 if (src)
1310 val = jhash(src, sizeof(*src), val);
1311#endif
1312 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1313}
1314
1315/* Helper function to find the cached rt in the hash table
1316 * and update bucket pointer to point to the bucket for this
1317 * (daddr, saddr) pair
1318 * Caller must hold rt6_exception_lock
1319 */
1320static struct rt6_exception *
1321__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1322 const struct in6_addr *daddr,
1323 const struct in6_addr *saddr)
1324{
1325 struct rt6_exception *rt6_ex;
1326 u32 hval;
1327
1328 if (!(*bucket) || !daddr)
1329 return NULL;
1330
1331 hval = rt6_exception_hash(daddr, saddr);
1332 *bucket += hval;
1333
1334 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1335 struct rt6_info *rt6 = rt6_ex->rt6i;
1336 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1337
1338#ifdef CONFIG_IPV6_SUBTREES
1339 if (matched && saddr)
1340 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1341#endif
1342 if (matched)
1343 return rt6_ex;
1344 }
1345 return NULL;
1346}
1347
1348/* Helper function to find the cached rt in the hash table
1349 * and update bucket pointer to point to the bucket for this
1350 * (daddr, saddr) pair
1351 * Caller must hold rcu_read_lock()
1352 */
1353static struct rt6_exception *
1354__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1355 const struct in6_addr *daddr,
1356 const struct in6_addr *saddr)
1357{
1358 struct rt6_exception *rt6_ex;
1359 u32 hval;
1360
1361 WARN_ON_ONCE(!rcu_read_lock_held());
1362
1363 if (!(*bucket) || !daddr)
1364 return NULL;
1365
1366 hval = rt6_exception_hash(daddr, saddr);
1367 *bucket += hval;
1368
1369 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1370 struct rt6_info *rt6 = rt6_ex->rt6i;
1371 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1372
1373#ifdef CONFIG_IPV6_SUBTREES
1374 if (matched && saddr)
1375 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1376#endif
1377 if (matched)
1378 return rt6_ex;
1379 }
1380 return NULL;
1381}
1382
8d1c802b 1383static unsigned int fib6_mtu(const struct fib6_info *rt)
d4ead6b3
DA
1384{
1385 unsigned int mtu;
1386
dcd1f572
DA
1387 if (rt->fib6_pmtu) {
1388 mtu = rt->fib6_pmtu;
1389 } else {
1390 struct net_device *dev = fib6_info_nh_dev(rt);
1391 struct inet6_dev *idev;
1392
1393 rcu_read_lock();
1394 idev = __in6_dev_get(dev);
1395 mtu = idev->cnf.mtu6;
1396 rcu_read_unlock();
1397 }
1398
d4ead6b3
DA
1399 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1400
ad1601ae 1401 return mtu - lwtunnel_headroom(rt->fib6_nh.fib_nh_lws, mtu);
d4ead6b3
DA
1402}
1403
35732d01 1404static int rt6_insert_exception(struct rt6_info *nrt,
8d1c802b 1405 struct fib6_info *ort)
35732d01 1406{
5e670d84 1407 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1408 struct rt6_exception_bucket *bucket;
1409 struct in6_addr *src_key = NULL;
1410 struct rt6_exception *rt6_ex;
1411 int err = 0;
1412
35732d01
WW
1413 spin_lock_bh(&rt6_exception_lock);
1414
1415 if (ort->exception_bucket_flushed) {
1416 err = -EINVAL;
1417 goto out;
1418 }
1419
1420 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1421 lockdep_is_held(&rt6_exception_lock));
1422 if (!bucket) {
1423 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1424 GFP_ATOMIC);
1425 if (!bucket) {
1426 err = -ENOMEM;
1427 goto out;
1428 }
1429 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1430 }
1431
1432#ifdef CONFIG_IPV6_SUBTREES
1433 /* rt6i_src.plen != 0 indicates ort is in subtree
1434 * and exception table is indexed by a hash of
1435 * both rt6i_dst and rt6i_src.
1436 * Otherwise, the exception table is indexed by
1437 * a hash of only rt6i_dst.
1438 */
93c2fb25 1439 if (ort->fib6_src.plen)
35732d01
WW
1440 src_key = &nrt->rt6i_src.addr;
1441#endif
f5bbe7ee
WW
1442 /* rt6_mtu_change() might lower mtu on ort.
1443 * Only insert this exception route if its mtu
1444 * is less than ort's mtu value.
1445 */
d4ead6b3 1446 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1447 err = -EINVAL;
1448 goto out;
1449 }
60006a48 1450
35732d01
WW
1451 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1452 src_key);
1453 if (rt6_ex)
1454 rt6_remove_exception(bucket, rt6_ex);
1455
1456 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1457 if (!rt6_ex) {
1458 err = -ENOMEM;
1459 goto out;
1460 }
1461 rt6_ex->rt6i = nrt;
1462 rt6_ex->stamp = jiffies;
35732d01
WW
1463 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1464 bucket->depth++;
81eb8447 1465 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1466
1467 if (bucket->depth > FIB6_MAX_DEPTH)
1468 rt6_exception_remove_oldest(bucket);
1469
1470out:
1471 spin_unlock_bh(&rt6_exception_lock);
1472
1473 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1474 if (!err) {
93c2fb25 1475 spin_lock_bh(&ort->fib6_table->tb6_lock);
7aef6859 1476 fib6_update_sernum(net, ort);
93c2fb25 1477 spin_unlock_bh(&ort->fib6_table->tb6_lock);
b886d5f2
PA
1478 fib6_force_start_gc(net);
1479 }
35732d01
WW
1480
1481 return err;
1482}
1483
8d1c802b 1484void rt6_flush_exceptions(struct fib6_info *rt)
35732d01
WW
1485{
1486 struct rt6_exception_bucket *bucket;
1487 struct rt6_exception *rt6_ex;
1488 struct hlist_node *tmp;
1489 int i;
1490
1491 spin_lock_bh(&rt6_exception_lock);
1492 /* Prevent rt6_insert_exception() to recreate the bucket list */
1493 rt->exception_bucket_flushed = 1;
1494
1495 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1496 lockdep_is_held(&rt6_exception_lock));
1497 if (!bucket)
1498 goto out;
1499
1500 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1501 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1502 rt6_remove_exception(bucket, rt6_ex);
1503 WARN_ON_ONCE(bucket->depth);
1504 bucket++;
1505 }
1506
1507out:
1508 spin_unlock_bh(&rt6_exception_lock);
1509}
1510
1511/* Find cached rt in the hash table inside passed in rt
1512 * Caller has to hold rcu_read_lock()
1513 */
8d1c802b 1514static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
1515 struct in6_addr *daddr,
1516 struct in6_addr *saddr)
1517{
1518 struct rt6_exception_bucket *bucket;
1519 struct in6_addr *src_key = NULL;
1520 struct rt6_exception *rt6_ex;
1521 struct rt6_info *res = NULL;
1522
1523 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1524
1525#ifdef CONFIG_IPV6_SUBTREES
1526 /* rt6i_src.plen != 0 indicates rt is in subtree
1527 * and exception table is indexed by a hash of
1528 * both rt6i_dst and rt6i_src.
1529 * Otherwise, the exception table is indexed by
1530 * a hash of only rt6i_dst.
1531 */
93c2fb25 1532 if (rt->fib6_src.plen)
35732d01
WW
1533 src_key = saddr;
1534#endif
1535 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1536
1537 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1538 res = rt6_ex->rt6i;
1539
1540 return res;
1541}
1542
1543/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1544static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1545{
35732d01
WW
1546 struct rt6_exception_bucket *bucket;
1547 struct in6_addr *src_key = NULL;
1548 struct rt6_exception *rt6_ex;
8a14e46f 1549 struct fib6_info *from;
35732d01
WW
1550 int err;
1551
091311de 1552 from = rcu_dereference(rt->from);
35732d01 1553 if (!from ||
442d713b 1554 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1555 return -EINVAL;
1556
1557 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1558 return -ENOENT;
1559
1560 spin_lock_bh(&rt6_exception_lock);
1561 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1562 lockdep_is_held(&rt6_exception_lock));
1563#ifdef CONFIG_IPV6_SUBTREES
1564 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1565 * and exception table is indexed by a hash of
1566 * both rt6i_dst and rt6i_src.
1567 * Otherwise, the exception table is indexed by
1568 * a hash of only rt6i_dst.
1569 */
93c2fb25 1570 if (from->fib6_src.plen)
35732d01
WW
1571 src_key = &rt->rt6i_src.addr;
1572#endif
1573 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1574 &rt->rt6i_dst.addr,
1575 src_key);
1576 if (rt6_ex) {
1577 rt6_remove_exception(bucket, rt6_ex);
1578 err = 0;
1579 } else {
1580 err = -ENOENT;
1581 }
1582
1583 spin_unlock_bh(&rt6_exception_lock);
1584 return err;
1585}
1586
1587/* Find rt6_ex which contains the passed in rt cache and
1588 * refresh its stamp
1589 */
1590static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1591{
35732d01
WW
1592 struct rt6_exception_bucket *bucket;
1593 struct in6_addr *src_key = NULL;
1594 struct rt6_exception *rt6_ex;
193f3685 1595 struct fib6_info *from;
35732d01
WW
1596
1597 rcu_read_lock();
193f3685
PA
1598 from = rcu_dereference(rt->from);
1599 if (!from || !(rt->rt6i_flags & RTF_CACHE))
1600 goto unlock;
1601
35732d01
WW
1602 bucket = rcu_dereference(from->rt6i_exception_bucket);
1603
1604#ifdef CONFIG_IPV6_SUBTREES
1605 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1606 * and exception table is indexed by a hash of
1607 * both rt6i_dst and rt6i_src.
1608 * Otherwise, the exception table is indexed by
1609 * a hash of only rt6i_dst.
1610 */
93c2fb25 1611 if (from->fib6_src.plen)
35732d01
WW
1612 src_key = &rt->rt6i_src.addr;
1613#endif
1614 rt6_ex = __rt6_find_exception_rcu(&bucket,
1615 &rt->rt6i_dst.addr,
1616 src_key);
1617 if (rt6_ex)
1618 rt6_ex->stamp = jiffies;
1619
193f3685 1620unlock:
35732d01
WW
1621 rcu_read_unlock();
1622}
1623
e9fa1495
SB
1624static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1625 struct rt6_info *rt, int mtu)
1626{
1627 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1628 * lowest MTU in the path: always allow updating the route PMTU to
1629 * reflect PMTU decreases.
1630 *
1631 * If the new MTU is higher, and the route PMTU is equal to the local
1632 * MTU, this means the old MTU is the lowest in the path, so allow
1633 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1634 * handle this.
1635 */
1636
1637 if (dst_mtu(&rt->dst) >= mtu)
1638 return true;
1639
1640 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1641 return true;
1642
1643 return false;
1644}
1645
1646static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
8d1c802b 1647 struct fib6_info *rt, int mtu)
f5bbe7ee
WW
1648{
1649 struct rt6_exception_bucket *bucket;
1650 struct rt6_exception *rt6_ex;
1651 int i;
1652
1653 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1654 lockdep_is_held(&rt6_exception_lock));
1655
e9fa1495
SB
1656 if (!bucket)
1657 return;
1658
1659 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1660 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1661 struct rt6_info *entry = rt6_ex->rt6i;
1662
1663 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1664 * route), the metrics of its rt->from have already
e9fa1495
SB
1665 * been updated.
1666 */
d4ead6b3 1667 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1668 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1669 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1670 }
e9fa1495 1671 bucket++;
f5bbe7ee
WW
1672 }
1673}
1674
b16cb459
WW
1675#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1676
8d1c802b 1677static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
b16cb459
WW
1678 struct in6_addr *gateway)
1679{
1680 struct rt6_exception_bucket *bucket;
1681 struct rt6_exception *rt6_ex;
1682 struct hlist_node *tmp;
1683 int i;
1684
1685 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1686 return;
1687
1688 spin_lock_bh(&rt6_exception_lock);
1689 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1690 lockdep_is_held(&rt6_exception_lock));
1691
1692 if (bucket) {
1693 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1694 hlist_for_each_entry_safe(rt6_ex, tmp,
1695 &bucket->chain, hlist) {
1696 struct rt6_info *entry = rt6_ex->rt6i;
1697
1698 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1699 RTF_CACHE_GATEWAY &&
1700 ipv6_addr_equal(gateway,
1701 &entry->rt6i_gateway)) {
1702 rt6_remove_exception(bucket, rt6_ex);
1703 }
1704 }
1705 bucket++;
1706 }
1707 }
1708
1709 spin_unlock_bh(&rt6_exception_lock);
1710}
1711
c757faa8
WW
1712static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1713 struct rt6_exception *rt6_ex,
1714 struct fib6_gc_args *gc_args,
1715 unsigned long now)
1716{
1717 struct rt6_info *rt = rt6_ex->rt6i;
1718
1859bac0
PA
1719 /* we are pruning and obsoleting aged-out and non gateway exceptions
1720 * even if others have still references to them, so that on next
1721 * dst_check() such references can be dropped.
1722 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1723 * expired, independently from their aging, as per RFC 8201 section 4
1724 */
31afeb42
WW
1725 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1726 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1727 RT6_TRACE("aging clone %p\n", rt);
1728 rt6_remove_exception(bucket, rt6_ex);
1729 return;
1730 }
1731 } else if (time_after(jiffies, rt->dst.expires)) {
1732 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1733 rt6_remove_exception(bucket, rt6_ex);
1734 return;
31afeb42
WW
1735 }
1736
1737 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1738 struct neighbour *neigh;
1739 __u8 neigh_flags = 0;
1740
1bfa26ff
ED
1741 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1742 if (neigh)
c757faa8 1743 neigh_flags = neigh->flags;
1bfa26ff 1744
c757faa8
WW
1745 if (!(neigh_flags & NTF_ROUTER)) {
1746 RT6_TRACE("purging route %p via non-router but gateway\n",
1747 rt);
1748 rt6_remove_exception(bucket, rt6_ex);
1749 return;
1750 }
1751 }
31afeb42 1752
c757faa8
WW
1753 gc_args->more++;
1754}
1755
8d1c802b 1756void rt6_age_exceptions(struct fib6_info *rt,
c757faa8
WW
1757 struct fib6_gc_args *gc_args,
1758 unsigned long now)
1759{
1760 struct rt6_exception_bucket *bucket;
1761 struct rt6_exception *rt6_ex;
1762 struct hlist_node *tmp;
1763 int i;
1764
1765 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1766 return;
1767
1bfa26ff
ED
1768 rcu_read_lock_bh();
1769 spin_lock(&rt6_exception_lock);
c757faa8
WW
1770 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1771 lockdep_is_held(&rt6_exception_lock));
1772
1773 if (bucket) {
1774 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1775 hlist_for_each_entry_safe(rt6_ex, tmp,
1776 &bucket->chain, hlist) {
1777 rt6_age_examine_exception(bucket, rt6_ex,
1778 gc_args, now);
1779 }
1780 bucket++;
1781 }
1782 }
1bfa26ff
ED
1783 spin_unlock(&rt6_exception_lock);
1784 rcu_read_unlock_bh();
c757faa8
WW
1785}
1786
1d053da9
DA
1787/* must be called with rcu lock held */
1788struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1789 int oif, struct flowi6 *fl6, int strict)
1da177e4 1790{
367efcb9 1791 struct fib6_node *fn, *saved_fn;
8d1c802b 1792 struct fib6_info *f6i;
1da177e4 1793
6454743b 1794 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1795 saved_fn = fn;
1da177e4 1796
ca254490
DA
1797 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1798 oif = 0;
1799
a3c00e46 1800redo_rt6_select:
23fb93a4 1801 f6i = rt6_select(net, fn, oif, strict);
23fb93a4 1802 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1803 fn = fib6_backtrack(fn, &fl6->saddr);
1804 if (fn)
1805 goto redo_rt6_select;
367efcb9
MKL
1806 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1807 /* also consider unreachable route */
1808 strict &= ~RT6_LOOKUP_F_REACHABLE;
1809 fn = saved_fn;
1810 goto redo_rt6_select;
367efcb9 1811 }
a3c00e46
MKL
1812 }
1813
d4bea421 1814 trace_fib6_table_lookup(net, f6i, table, fl6);
fb9de91e 1815
1d053da9
DA
1816 return f6i;
1817}
1818
1819struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1820 int oif, struct flowi6 *fl6,
1821 const struct sk_buff *skb, int flags)
1822{
1823 struct fib6_info *f6i;
1824 struct rt6_info *rt;
1825 int strict = 0;
1826
1827 strict |= flags & RT6_LOOKUP_F_IFACE;
1828 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1829 if (net->ipv6.devconf_all->forwarding == 0)
1830 strict |= RT6_LOOKUP_F_REACHABLE;
1831
1832 rcu_read_lock();
1833
1834 f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1835 if (f6i->fib6_nsiblings)
1836 f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1837
23fb93a4 1838 if (f6i == net->ipv6.fib6_null_entry) {
421842ed 1839 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1840 rcu_read_unlock();
d3843fe5 1841 dst_hold(&rt->dst);
d3843fe5 1842 return rt;
23fb93a4
DA
1843 }
1844
1845 /*Search through exception table */
1846 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1847 if (rt) {
10585b43 1848 if (ip6_hold_safe(net, &rt))
d3843fe5 1849 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1850
66f5d6ce 1851 rcu_read_unlock();
d52d3997 1852 return rt;
3da59bd9 1853 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
bdf00467 1854 !f6i->fib6_nh.fib_nh_gw_family)) {
3da59bd9
MKL
1855 /* Create a RTF_CACHE clone which will not be
1856 * owned by the fib6 tree. It is for the special case where
1857 * the daddr in the skb during the neighbor look-up is different
1858 * from the fl6->daddr used to look-up route here.
1859 */
3da59bd9
MKL
1860 struct rt6_info *uncached_rt;
1861
23fb93a4 1862 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
d52d3997 1863
4d85cd0c 1864 rcu_read_unlock();
c71099ac 1865
1cfb71ee
WW
1866 if (uncached_rt) {
1867 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1868 * No need for another dst_hold()
1869 */
8d0b94af 1870 rt6_uncached_list_add(uncached_rt);
81eb8447 1871 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1872 } else {
3da59bd9 1873 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1874 dst_hold(&uncached_rt->dst);
1875 }
b811580d 1876
3da59bd9 1877 return uncached_rt;
d52d3997
MKL
1878 } else {
1879 /* Get a percpu copy */
1880
1881 struct rt6_info *pcpu_rt;
1882
951f788a 1883 local_bh_disable();
23fb93a4 1884 pcpu_rt = rt6_get_pcpu_route(f6i);
d52d3997 1885
93531c67
DA
1886 if (!pcpu_rt)
1887 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1888
951f788a
ED
1889 local_bh_enable();
1890 rcu_read_unlock();
d4bea421 1891
d52d3997
MKL
1892 return pcpu_rt;
1893 }
1da177e4 1894}
9ff74384 1895EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1896
b75cc8f9
DA
1897static struct rt6_info *ip6_pol_route_input(struct net *net,
1898 struct fib6_table *table,
1899 struct flowi6 *fl6,
1900 const struct sk_buff *skb,
1901 int flags)
4acad72d 1902{
b75cc8f9 1903 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1904}
1905
d409b847
MB
1906struct dst_entry *ip6_route_input_lookup(struct net *net,
1907 struct net_device *dev,
b75cc8f9
DA
1908 struct flowi6 *fl6,
1909 const struct sk_buff *skb,
1910 int flags)
72331bc0
SL
1911{
1912 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1913 flags |= RT6_LOOKUP_F_IFACE;
1914
b75cc8f9 1915 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1916}
d409b847 1917EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1918
23aebdac 1919static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1920 struct flow_keys *keys,
1921 struct flow_keys *flkeys)
23aebdac
JS
1922{
1923 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1924 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1925 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1926 const struct ipv6hdr *inner_iph;
1927 const struct icmp6hdr *icmph;
1928 struct ipv6hdr _inner_iph;
cea67a2d 1929 struct icmp6hdr _icmph;
23aebdac
JS
1930
1931 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1932 goto out;
1933
cea67a2d
ED
1934 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1935 sizeof(_icmph), &_icmph);
1936 if (!icmph)
1937 goto out;
1938
23aebdac
JS
1939 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1940 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1941 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1942 icmph->icmp6_type != ICMPV6_PARAMPROB)
1943 goto out;
1944
1945 inner_iph = skb_header_pointer(skb,
1946 skb_transport_offset(skb) + sizeof(*icmph),
1947 sizeof(_inner_iph), &_inner_iph);
1948 if (!inner_iph)
1949 goto out;
1950
1951 key_iph = inner_iph;
5e5d6fed 1952 _flkeys = NULL;
23aebdac 1953out:
5e5d6fed
RP
1954 if (_flkeys) {
1955 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1956 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1957 keys->tags.flow_label = _flkeys->tags.flow_label;
1958 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1959 } else {
1960 keys->addrs.v6addrs.src = key_iph->saddr;
1961 keys->addrs.v6addrs.dst = key_iph->daddr;
fa1be7e0 1962 keys->tags.flow_label = ip6_flowlabel(key_iph);
5e5d6fed
RP
1963 keys->basic.ip_proto = key_iph->nexthdr;
1964 }
23aebdac
JS
1965}
1966
1967/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
1968u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1969 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
1970{
1971 struct flow_keys hash_keys;
9a2a537a 1972 u32 mhash;
23aebdac 1973
bbfa047a 1974 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
1975 case 0:
1976 memset(&hash_keys, 0, sizeof(hash_keys));
1977 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1978 if (skb) {
1979 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1980 } else {
1981 hash_keys.addrs.v6addrs.src = fl6->saddr;
1982 hash_keys.addrs.v6addrs.dst = fl6->daddr;
fa1be7e0 1983 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
b4bac172
DA
1984 hash_keys.basic.ip_proto = fl6->flowi6_proto;
1985 }
1986 break;
1987 case 1:
1988 if (skb) {
1989 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1990 struct flow_keys keys;
1991
1992 /* short-circuit if we already have L4 hash present */
1993 if (skb->l4_hash)
1994 return skb_get_hash_raw(skb) >> 1;
1995
1996 memset(&hash_keys, 0, sizeof(hash_keys));
1997
1998 if (!flkeys) {
1999 skb_flow_dissect_flow_keys(skb, &keys, flag);
2000 flkeys = &keys;
2001 }
2002 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2003 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2004 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2005 hash_keys.ports.src = flkeys->ports.src;
2006 hash_keys.ports.dst = flkeys->ports.dst;
2007 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2008 } else {
2009 memset(&hash_keys, 0, sizeof(hash_keys));
2010 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2011 hash_keys.addrs.v6addrs.src = fl6->saddr;
2012 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2013 hash_keys.ports.src = fl6->fl6_sport;
2014 hash_keys.ports.dst = fl6->fl6_dport;
2015 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2016 }
2017 break;
23aebdac 2018 }
9a2a537a 2019 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2020
9a2a537a 2021 return mhash >> 1;
23aebdac
JS
2022}
2023
c71099ac
TG
2024void ip6_route_input(struct sk_buff *skb)
2025{
b71d1d42 2026 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2027 struct net *net = dev_net(skb->dev);
adaa70bb 2028 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2029 struct ip_tunnel_info *tun_info;
4c9483b2 2030 struct flowi6 fl6 = {
e0d56fdd 2031 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2032 .daddr = iph->daddr,
2033 .saddr = iph->saddr,
6502ca52 2034 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2035 .flowi6_mark = skb->mark,
2036 .flowi6_proto = iph->nexthdr,
c71099ac 2037 };
5e5d6fed 2038 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2039
904af04d 2040 tun_info = skb_tunnel_info(skb);
46fa062a 2041 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2042 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2043
2044 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2045 flkeys = &_flkeys;
2046
23aebdac 2047 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2048 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2049 skb_dst_drop(skb);
b75cc8f9
DA
2050 skb_dst_set(skb,
2051 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2052}
2053
b75cc8f9
DA
2054static struct rt6_info *ip6_pol_route_output(struct net *net,
2055 struct fib6_table *table,
2056 struct flowi6 *fl6,
2057 const struct sk_buff *skb,
2058 int flags)
1da177e4 2059{
b75cc8f9 2060 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2061}
2062
6f21c96a
PA
2063struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2064 struct flowi6 *fl6, int flags)
c71099ac 2065{
d46a9d67 2066 bool any_src;
c71099ac 2067
3ede0bbc
RS
2068 if (ipv6_addr_type(&fl6->daddr) &
2069 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
4c1feac5
DA
2070 struct dst_entry *dst;
2071
2072 dst = l3mdev_link_scope_lookup(net, fl6);
2073 if (dst)
2074 return dst;
2075 }
ca254490 2076
1fb9489b 2077 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2078
d46a9d67 2079 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2080 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2081 (fl6->flowi6_oif && any_src))
77d16f45 2082 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2083
d46a9d67 2084 if (!any_src)
adaa70bb 2085 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2086 else if (sk)
2087 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2088
b75cc8f9 2089 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2090}
6f21c96a 2091EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2092
2774c131 2093struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2094{
5c1e6aa3 2095 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2096 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2097 struct dst_entry *new = NULL;
2098
1dbe3252 2099 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2100 DST_OBSOLETE_DEAD, 0);
14e50e57 2101 if (rt) {
0a1f5962 2102 rt6_info_init(rt);
81eb8447 2103 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2104
0a1f5962 2105 new = &rt->dst;
14e50e57 2106 new->__use = 1;
352e512c 2107 new->input = dst_discard;
ede2059d 2108 new->output = dst_discard_out;
14e50e57 2109
0a1f5962 2110 dst_copy_metrics(new, &ort->dst);
14e50e57 2111
1dbe3252 2112 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2113 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2114 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2115
2116 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2117#ifdef CONFIG_IPV6_SUBTREES
2118 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2119#endif
14e50e57
DM
2120 }
2121
69ead7af
DM
2122 dst_release(dst_orig);
2123 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2124}
14e50e57 2125
1da177e4
LT
2126/*
2127 * Destination cache support functions
2128 */
2129
8d1c802b 2130static bool fib6_check(struct fib6_info *f6i, u32 cookie)
4b32b5ad 2131{
93531c67
DA
2132 u32 rt_cookie = 0;
2133
8ae86971 2134 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
93531c67
DA
2135 return false;
2136
2137 if (fib6_check_expired(f6i))
2138 return false;
2139
2140 return true;
4b32b5ad
MKL
2141}
2142
a68886a6
DA
2143static struct dst_entry *rt6_check(struct rt6_info *rt,
2144 struct fib6_info *from,
2145 u32 cookie)
3da59bd9 2146{
36143645 2147 u32 rt_cookie = 0;
c5cff856 2148
a68886a6 2149 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
93531c67 2150 rt_cookie != cookie)
3da59bd9
MKL
2151 return NULL;
2152
2153 if (rt6_check_expired(rt))
2154 return NULL;
2155
2156 return &rt->dst;
2157}
2158
a68886a6
DA
2159static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2160 struct fib6_info *from,
2161 u32 cookie)
3da59bd9 2162{
5973fb1e
MKL
2163 if (!__rt6_check_expired(rt) &&
2164 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2165 fib6_check(from, cookie))
3da59bd9
MKL
2166 return &rt->dst;
2167 else
2168 return NULL;
2169}
2170
1da177e4
LT
2171static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2172{
a87b7dc9 2173 struct dst_entry *dst_ret;
a68886a6 2174 struct fib6_info *from;
1da177e4
LT
2175 struct rt6_info *rt;
2176
a87b7dc9
DA
2177 rt = container_of(dst, struct rt6_info, dst);
2178
2179 rcu_read_lock();
1da177e4 2180
6f3118b5
ND
2181 /* All IPV6 dsts are created with ->obsolete set to the value
2182 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2183 * into this function always.
2184 */
e3bc10bd 2185
a68886a6 2186 from = rcu_dereference(rt->from);
4b32b5ad 2187
a68886a6
DA
2188 if (from && (rt->rt6i_flags & RTF_PCPU ||
2189 unlikely(!list_empty(&rt->rt6i_uncached))))
2190 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2191 else
a68886a6 2192 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2193
2194 rcu_read_unlock();
2195
2196 return dst_ret;
1da177e4
LT
2197}
2198
2199static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2200{
2201 struct rt6_info *rt = (struct rt6_info *) dst;
2202
2203 if (rt) {
54c1a859 2204 if (rt->rt6i_flags & RTF_CACHE) {
c3c14da0 2205 rcu_read_lock();
54c1a859 2206 if (rt6_check_expired(rt)) {
93531c67 2207 rt6_remove_exception_rt(rt);
54c1a859
YH
2208 dst = NULL;
2209 }
c3c14da0 2210 rcu_read_unlock();
54c1a859 2211 } else {
1da177e4 2212 dst_release(dst);
54c1a859
YH
2213 dst = NULL;
2214 }
1da177e4 2215 }
54c1a859 2216 return dst;
1da177e4
LT
2217}
2218
2219static void ip6_link_failure(struct sk_buff *skb)
2220{
2221 struct rt6_info *rt;
2222
3ffe533c 2223 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2224
adf30907 2225 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2226 if (rt) {
8a14e46f 2227 rcu_read_lock();
1eb4f758 2228 if (rt->rt6i_flags & RTF_CACHE) {
761f6026 2229 rt6_remove_exception_rt(rt);
c5cff856 2230 } else {
a68886a6 2231 struct fib6_info *from;
c5cff856
WW
2232 struct fib6_node *fn;
2233
a68886a6
DA
2234 from = rcu_dereference(rt->from);
2235 if (from) {
2236 fn = rcu_dereference(from->fib6_node);
2237 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2238 fn->fn_sernum = -1;
2239 }
1eb4f758 2240 }
8a14e46f 2241 rcu_read_unlock();
1da177e4
LT
2242 }
2243}
2244
6a3e030f
DA
2245static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2246{
a68886a6
DA
2247 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2248 struct fib6_info *from;
2249
2250 rcu_read_lock();
2251 from = rcu_dereference(rt0->from);
2252 if (from)
2253 rt0->dst.expires = from->expires;
2254 rcu_read_unlock();
2255 }
6a3e030f
DA
2256
2257 dst_set_expires(&rt0->dst, timeout);
2258 rt0->rt6i_flags |= RTF_EXPIRES;
2259}
2260
45e4fd26
MKL
2261static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2262{
2263 struct net *net = dev_net(rt->dst.dev);
2264
d4ead6b3 2265 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2266 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2267 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2268}
2269
0d3f6d29
MKL
2270static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2271{
2272 return !(rt->rt6i_flags & RTF_CACHE) &&
1490ed2a 2273 (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
0d3f6d29
MKL
2274}
2275
45e4fd26
MKL
2276static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2277 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2278{
0dec879f 2279 const struct in6_addr *daddr, *saddr;
67ba4152 2280 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2281
19bda36c
XL
2282 if (dst_metric_locked(dst, RTAX_MTU))
2283 return;
2284
0dec879f
JA
2285 if (iph) {
2286 daddr = &iph->daddr;
2287 saddr = &iph->saddr;
2288 } else if (sk) {
2289 daddr = &sk->sk_v6_daddr;
2290 saddr = &inet6_sk(sk)->saddr;
2291 } else {
2292 daddr = NULL;
2293 saddr = NULL;
2294 }
2295 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2296 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2297 if (mtu >= dst_mtu(dst))
2298 return;
9d289715 2299
0d3f6d29 2300 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2301 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2302 /* update rt6_ex->stamp for cache */
2303 if (rt6->rt6i_flags & RTF_CACHE)
2304 rt6_update_exception_stamp_rt(rt6);
0dec879f 2305 } else if (daddr) {
a68886a6 2306 struct fib6_info *from;
45e4fd26
MKL
2307 struct rt6_info *nrt6;
2308
4d85cd0c 2309 rcu_read_lock();
a68886a6
DA
2310 from = rcu_dereference(rt6->from);
2311 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
45e4fd26
MKL
2312 if (nrt6) {
2313 rt6_do_update_pmtu(nrt6, mtu);
a68886a6 2314 if (rt6_insert_exception(nrt6, from))
2b760fcf 2315 dst_release_immediate(&nrt6->dst);
45e4fd26 2316 }
a68886a6 2317 rcu_read_unlock();
1da177e4
LT
2318 }
2319}
2320
45e4fd26
MKL
2321static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2322 struct sk_buff *skb, u32 mtu)
2323{
2324 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2325}
2326
42ae66c8 2327void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2328 int oif, u32 mark, kuid_t uid)
81aded24
DM
2329{
2330 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2331 struct dst_entry *dst;
dc92095d
2332 struct flowi6 fl6 = {
2333 .flowi6_oif = oif,
2334 .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2335 .daddr = iph->daddr,
2336 .saddr = iph->saddr,
2337 .flowlabel = ip6_flowinfo(iph),
2338 .flowi6_uid = uid,
2339 };
81aded24
DM
2340
2341 dst = ip6_route_output(net, NULL, &fl6);
2342 if (!dst->error)
45e4fd26 2343 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2344 dst_release(dst);
2345}
2346EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2347
2348void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2349{
7ddacfa5 2350 int oif = sk->sk_bound_dev_if;
33c162a9
MKL
2351 struct dst_entry *dst;
2352
7ddacfa5
DA
2353 if (!oif && skb->dev)
2354 oif = l3mdev_master_ifindex(skb->dev);
2355
2356 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2357
2358 dst = __sk_dst_get(sk);
2359 if (!dst || !dst->obsolete ||
2360 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2361 return;
2362
2363 bh_lock_sock(sk);
2364 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2365 ip6_datagram_dst_update(sk, false);
2366 bh_unlock_sock(sk);
81aded24
DM
2367}
2368EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2369
7d6850f7
AK
2370void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2371 const struct flowi6 *fl6)
2372{
2373#ifdef CONFIG_IPV6_SUBTREES
2374 struct ipv6_pinfo *np = inet6_sk(sk);
2375#endif
2376
2377 ip6_dst_store(sk, dst,
2378 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2379 &sk->sk_v6_daddr : NULL,
2380#ifdef CONFIG_IPV6_SUBTREES
2381 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2382 &np->saddr :
2383#endif
2384 NULL);
2385}
2386
b55b76b2
DJ
2387/* Handle redirects */
2388struct ip6rd_flowi {
2389 struct flowi6 fl6;
2390 struct in6_addr gateway;
2391};
2392
2393static struct rt6_info *__ip6_route_redirect(struct net *net,
2394 struct fib6_table *table,
2395 struct flowi6 *fl6,
b75cc8f9 2396 const struct sk_buff *skb,
b55b76b2
DJ
2397 int flags)
2398{
2399 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
23fb93a4 2400 struct rt6_info *ret = NULL, *rt_cache;
8d1c802b 2401 struct fib6_info *rt;
b55b76b2
DJ
2402 struct fib6_node *fn;
2403
2404 /* Get the "current" route for this destination and
67c408cf 2405 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2406 *
2407 * RFC 4861 specifies that redirects should only be
2408 * accepted if they come from the nexthop to the target.
2409 * Due to the way the routes are chosen, this notion
2410 * is a bit fuzzy and one might need to check all possible
2411 * routes.
2412 */
2413
66f5d6ce 2414 rcu_read_lock();
6454743b 2415 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
b55b76b2 2416restart:
66f5d6ce 2417 for_each_fib6_node_rt_rcu(fn) {
ad1601ae 2418 if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
8067bb8c 2419 continue;
14895687 2420 if (fib6_check_expired(rt))
b55b76b2 2421 continue;
93c2fb25 2422 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2423 break;
bdf00467 2424 if (!rt->fib6_nh.fib_nh_gw_family)
b55b76b2 2425 continue;
ad1601ae 2426 if (fl6->flowi6_oif != rt->fib6_nh.fib_nh_dev->ifindex)
b55b76b2 2427 continue;
2b760fcf
WW
2428 /* rt_cache's gateway might be different from its 'parent'
2429 * in the case of an ip redirect.
2430 * So we keep searching in the exception table if the gateway
2431 * is different.
2432 */
ad1601ae 2433 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.fib_nh_gw6)) {
2b760fcf
WW
2434 rt_cache = rt6_find_cached_rt(rt,
2435 &fl6->daddr,
2436 &fl6->saddr);
2437 if (rt_cache &&
2438 ipv6_addr_equal(&rdfl->gateway,
2439 &rt_cache->rt6i_gateway)) {
23fb93a4 2440 ret = rt_cache;
2b760fcf
WW
2441 break;
2442 }
b55b76b2 2443 continue;
2b760fcf 2444 }
b55b76b2
DJ
2445 break;
2446 }
2447
2448 if (!rt)
421842ed 2449 rt = net->ipv6.fib6_null_entry;
93c2fb25 2450 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2451 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2452 goto out;
2453 }
2454
421842ed 2455 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2456 fn = fib6_backtrack(fn, &fl6->saddr);
2457 if (fn)
2458 goto restart;
b55b76b2 2459 }
a3c00e46 2460
b0a1ba59 2461out:
23fb93a4 2462 if (ret)
10585b43 2463 ip6_hold_safe(net, &ret);
23fb93a4
DA
2464 else
2465 ret = ip6_create_rt_rcu(rt);
b55b76b2 2466
66f5d6ce 2467 rcu_read_unlock();
b55b76b2 2468
b65f164d 2469 trace_fib6_table_lookup(net, rt, table, fl6);
23fb93a4 2470 return ret;
b55b76b2
DJ
2471};
2472
2473static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2474 const struct flowi6 *fl6,
2475 const struct sk_buff *skb,
2476 const struct in6_addr *gateway)
b55b76b2
DJ
2477{
2478 int flags = RT6_LOOKUP_F_HAS_SADDR;
2479 struct ip6rd_flowi rdfl;
2480
2481 rdfl.fl6 = *fl6;
2482 rdfl.gateway = *gateway;
2483
b75cc8f9 2484 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2485 flags, __ip6_route_redirect);
2486}
2487
e2d118a1
LC
2488void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2489 kuid_t uid)
3a5ad2ee
DM
2490{
2491 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2492 struct dst_entry *dst;
1f7f10ac
2493 struct flowi6 fl6 = {
2494 .flowi6_iif = LOOPBACK_IFINDEX,
2495 .flowi6_oif = oif,
2496 .flowi6_mark = mark,
2497 .daddr = iph->daddr,
2498 .saddr = iph->saddr,
2499 .flowlabel = ip6_flowinfo(iph),
2500 .flowi6_uid = uid,
2501 };
3a5ad2ee 2502
b75cc8f9 2503 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2504 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2505 dst_release(dst);
2506}
2507EXPORT_SYMBOL_GPL(ip6_redirect);
2508
d456336d 2509void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
c92a59ec
DJ
2510{
2511 const struct ipv6hdr *iph = ipv6_hdr(skb);
2512 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2513 struct dst_entry *dst;
0b26fb17
2514 struct flowi6 fl6 = {
2515 .flowi6_iif = LOOPBACK_IFINDEX,
2516 .flowi6_oif = oif,
0b26fb17
2517 .daddr = msg->dest,
2518 .saddr = iph->daddr,
2519 .flowi6_uid = sock_net_uid(net, NULL),
2520 };
c92a59ec 2521
b75cc8f9 2522 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2523 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2524 dst_release(dst);
2525}
2526
3a5ad2ee
DM
2527void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2528{
e2d118a1
LC
2529 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2530 sk->sk_uid);
3a5ad2ee
DM
2531}
2532EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2533
0dbaee3b 2534static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2535{
0dbaee3b
DM
2536 struct net_device *dev = dst->dev;
2537 unsigned int mtu = dst_mtu(dst);
2538 struct net *net = dev_net(dev);
2539
1da177e4
LT
2540 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2541
5578689a
DL
2542 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2543 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2544
2545 /*
1ab1457c
YH
2546 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2547 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2548 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2549 * rely only on pmtu discovery"
2550 */
2551 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2552 mtu = IPV6_MAXPLEN;
2553 return mtu;
2554}
2555
ebb762f2 2556static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2557{
d33e4553 2558 struct inet6_dev *idev;
d4ead6b3 2559 unsigned int mtu;
4b32b5ad
MKL
2560
2561 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2562 if (mtu)
30f78d8e 2563 goto out;
618f9bc7
SK
2564
2565 mtu = IPV6_MIN_MTU;
d33e4553
DM
2566
2567 rcu_read_lock();
2568 idev = __in6_dev_get(dst->dev);
2569 if (idev)
2570 mtu = idev->cnf.mtu6;
2571 rcu_read_unlock();
2572
30f78d8e 2573out:
14972cbd
RP
2574 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2575
2576 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2577}
2578
901731b8
DA
2579/* MTU selection:
2580 * 1. mtu on route is locked - use it
2581 * 2. mtu from nexthop exception
2582 * 3. mtu from egress device
2583 *
2584 * based on ip6_dst_mtu_forward and exception logic of
2585 * rt6_find_cached_rt; called with rcu_read_lock
2586 */
2587u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2588 struct in6_addr *saddr)
2589{
2590 struct rt6_exception_bucket *bucket;
2591 struct rt6_exception *rt6_ex;
2592 struct in6_addr *src_key;
2593 struct inet6_dev *idev;
2594 u32 mtu = 0;
2595
2596 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2597 mtu = f6i->fib6_pmtu;
2598 if (mtu)
2599 goto out;
2600 }
2601
2602 src_key = NULL;
2603#ifdef CONFIG_IPV6_SUBTREES
2604 if (f6i->fib6_src.plen)
2605 src_key = saddr;
2606#endif
2607
2608 bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2609 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2610 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2611 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2612
2613 if (likely(!mtu)) {
2614 struct net_device *dev = fib6_info_nh_dev(f6i);
2615
2616 mtu = IPV6_MIN_MTU;
2617 idev = __in6_dev_get(dev);
2618 if (idev && idev->cnf.mtu6 > mtu)
2619 mtu = idev->cnf.mtu6;
2620 }
2621
2622 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2623out:
2624 return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2625}
2626
3b00944c 2627struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2628 struct flowi6 *fl6)
1da177e4 2629{
87a11578 2630 struct dst_entry *dst;
1da177e4
LT
2631 struct rt6_info *rt;
2632 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2633 struct net *net = dev_net(dev);
1da177e4 2634
38308473 2635 if (unlikely(!idev))
122bdf67 2636 return ERR_PTR(-ENODEV);
1da177e4 2637
ad706862 2638 rt = ip6_dst_alloc(net, dev, 0);
38308473 2639 if (unlikely(!rt)) {
1da177e4 2640 in6_dev_put(idev);
87a11578 2641 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2642 goto out;
2643 }
2644
8e2ec639 2645 rt->dst.flags |= DST_HOST;
588753f1 2646 rt->dst.input = ip6_input;
8e2ec639 2647 rt->dst.output = ip6_output;
550bab42 2648 rt->rt6i_gateway = fl6->daddr;
87a11578 2649 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2650 rt->rt6i_dst.plen = 128;
2651 rt->rt6i_idev = idev;
14edd87d 2652 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2653
4c981e28 2654 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2655 * do proper release of the net_device
2656 */
2657 rt6_uncached_list_add(rt);
81eb8447 2658 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2659
87a11578
DM
2660 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2661
1da177e4 2662out:
87a11578 2663 return dst;
1da177e4
LT
2664}
2665
569d3645 2666static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2667{
86393e52 2668 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2669 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2670 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2671 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2672 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2673 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2674 int entries;
7019b78e 2675
fc66f95c 2676 entries = dst_entries_get_fast(ops);
49a18d86 2677 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2678 entries <= rt_max_size)
1da177e4
LT
2679 goto out;
2680
6891a346 2681 net->ipv6.ip6_rt_gc_expire++;
14956643 2682 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2683 entries = dst_entries_get_slow(ops);
2684 if (entries < ops->gc_thresh)
7019b78e 2685 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2686out:
7019b78e 2687 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2688 return entries > rt_max_size;
1da177e4
LT
2689}
2690
8c14586f
DA
2691static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2692 struct fib6_config *cfg,
f4797b33
DA
2693 const struct in6_addr *gw_addr,
2694 u32 tbid, int flags)
8c14586f
DA
2695{
2696 struct flowi6 fl6 = {
2697 .flowi6_oif = cfg->fc_ifindex,
2698 .daddr = *gw_addr,
2699 .saddr = cfg->fc_prefsrc,
2700 };
2701 struct fib6_table *table;
2702 struct rt6_info *rt;
8c14586f 2703
f4797b33 2704 table = fib6_get_table(net, tbid);
8c14586f
DA
2705 if (!table)
2706 return NULL;
2707
2708 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2709 flags |= RT6_LOOKUP_F_HAS_SADDR;
2710
f4797b33 2711 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2712 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2713
2714 /* if table lookup failed, fall back to full lookup */
2715 if (rt == net->ipv6.ip6_null_entry) {
2716 ip6_rt_put(rt);
2717 rt = NULL;
2718 }
2719
2720 return rt;
2721}
2722
fc1e64e1
DA
2723static int ip6_route_check_nh_onlink(struct net *net,
2724 struct fib6_config *cfg,
9fbb704c 2725 const struct net_device *dev,
fc1e64e1
DA
2726 struct netlink_ext_ack *extack)
2727{
44750f84 2728 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2729 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2730 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
bf1dc8ba 2731 struct fib6_info *from;
fc1e64e1
DA
2732 struct rt6_info *grt;
2733 int err;
2734
2735 err = 0;
2736 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2737 if (grt) {
bf1dc8ba
PA
2738 rcu_read_lock();
2739 from = rcu_dereference(grt->from);
58e354c0 2740 if (!grt->dst.error &&
4ed591c8 2741 /* ignore match if it is the default route */
bf1dc8ba 2742 from && !ipv6_addr_any(&from->fib6_dst.addr) &&
58e354c0 2743 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2744 NL_SET_ERR_MSG(extack,
2745 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2746 err = -EINVAL;
2747 }
bf1dc8ba 2748 rcu_read_unlock();
fc1e64e1
DA
2749
2750 ip6_rt_put(grt);
2751 }
2752
2753 return err;
2754}
2755
1edce99f
DA
2756static int ip6_route_check_nh(struct net *net,
2757 struct fib6_config *cfg,
2758 struct net_device **_dev,
2759 struct inet6_dev **idev)
2760{
2761 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2762 struct net_device *dev = _dev ? *_dev : NULL;
2763 struct rt6_info *grt = NULL;
2764 int err = -EHOSTUNREACH;
2765
2766 if (cfg->fc_table) {
f4797b33
DA
2767 int flags = RT6_LOOKUP_F_IFACE;
2768
2769 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2770 cfg->fc_table, flags);
1edce99f
DA
2771 if (grt) {
2772 if (grt->rt6i_flags & RTF_GATEWAY ||
2773 (dev && dev != grt->dst.dev)) {
2774 ip6_rt_put(grt);
2775 grt = NULL;
2776 }
2777 }
2778 }
2779
2780 if (!grt)
b75cc8f9 2781 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2782
2783 if (!grt)
2784 goto out;
2785
2786 if (dev) {
2787 if (dev != grt->dst.dev) {
2788 ip6_rt_put(grt);
2789 goto out;
2790 }
2791 } else {
2792 *_dev = dev = grt->dst.dev;
2793 *idev = grt->rt6i_idev;
2794 dev_hold(dev);
2795 in6_dev_hold(grt->rt6i_idev);
2796 }
2797
2798 if (!(grt->rt6i_flags & RTF_GATEWAY))
2799 err = 0;
2800
2801 ip6_rt_put(grt);
2802
2803out:
2804 return err;
2805}
2806
9fbb704c
DA
2807static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2808 struct net_device **_dev, struct inet6_dev **idev,
2809 struct netlink_ext_ack *extack)
2810{
2811 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2812 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2813 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2814 const struct net_device *dev = *_dev;
232378e8 2815 bool need_addr_check = !dev;
9fbb704c
DA
2816 int err = -EINVAL;
2817
2818 /* if gw_addr is local we will fail to detect this in case
2819 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2820 * will return already-added prefix route via interface that
2821 * prefix route was assigned to, which might be non-loopback.
2822 */
232378e8
DA
2823 if (dev &&
2824 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2825 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2826 goto out;
2827 }
2828
2829 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2830 /* IPv6 strictly inhibits using not link-local
2831 * addresses as nexthop address.
2832 * Otherwise, router will not able to send redirects.
2833 * It is very good, but in some (rare!) circumstances
2834 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2835 * some exceptions. --ANK
2836 * We allow IPv4-mapped nexthops to support RFC4798-type
2837 * addressing
2838 */
2839 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2840 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2841 goto out;
2842 }
2843
2844 if (cfg->fc_flags & RTNH_F_ONLINK)
2845 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2846 else
2847 err = ip6_route_check_nh(net, cfg, _dev, idev);
2848
2849 if (err)
2850 goto out;
2851 }
2852
2853 /* reload in case device was changed */
2854 dev = *_dev;
2855
2856 err = -EINVAL;
2857 if (!dev) {
2858 NL_SET_ERR_MSG(extack, "Egress device not specified");
2859 goto out;
2860 } else if (dev->flags & IFF_LOOPBACK) {
2861 NL_SET_ERR_MSG(extack,
2862 "Egress device can not be loopback device for this route");
2863 goto out;
2864 }
232378e8
DA
2865
2866 /* if we did not check gw_addr above, do so now that the
2867 * egress device has been resolved.
2868 */
2869 if (need_addr_check &&
2870 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2871 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2872 goto out;
2873 }
2874
9fbb704c
DA
2875 err = 0;
2876out:
2877 return err;
2878}
2879
83c44251
DA
2880static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
2881{
2882 if ((flags & RTF_REJECT) ||
2883 (dev && (dev->flags & IFF_LOOPBACK) &&
2884 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2885 !(flags & RTF_LOCAL)))
2886 return true;
2887
2888 return false;
2889}
2890
2891int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
2892 struct fib6_config *cfg, gfp_t gfp_flags,
2893 struct netlink_ext_ack *extack)
2894{
2895 struct net_device *dev = NULL;
2896 struct inet6_dev *idev = NULL;
2897 int addr_type;
2898 int err;
2899
f1741730
DA
2900 fib6_nh->fib_nh_family = AF_INET6;
2901
83c44251
DA
2902 err = -ENODEV;
2903 if (cfg->fc_ifindex) {
2904 dev = dev_get_by_index(net, cfg->fc_ifindex);
2905 if (!dev)
2906 goto out;
2907 idev = in6_dev_get(dev);
2908 if (!idev)
2909 goto out;
2910 }
2911
2912 if (cfg->fc_flags & RTNH_F_ONLINK) {
2913 if (!dev) {
2914 NL_SET_ERR_MSG(extack,
2915 "Nexthop device required for onlink");
2916 goto out;
2917 }
2918
2919 if (!(dev->flags & IFF_UP)) {
2920 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2921 err = -ENETDOWN;
2922 goto out;
2923 }
2924
ad1601ae 2925 fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
83c44251
DA
2926 }
2927
ad1601ae 2928 fib6_nh->fib_nh_weight = 1;
83c44251
DA
2929
2930 /* We cannot add true routes via loopback here,
2931 * they would result in kernel looping; promote them to reject routes
2932 */
2933 addr_type = ipv6_addr_type(&cfg->fc_dst);
2934 if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
2935 /* hold loopback dev/idev if we haven't done so. */
2936 if (dev != net->loopback_dev) {
2937 if (dev) {
2938 dev_put(dev);
2939 in6_dev_put(idev);
2940 }
2941 dev = net->loopback_dev;
2942 dev_hold(dev);
2943 idev = in6_dev_get(dev);
2944 if (!idev) {
2945 err = -ENODEV;
2946 goto out;
2947 }
2948 }
2949 goto set_dev;
2950 }
2951
2952 if (cfg->fc_flags & RTF_GATEWAY) {
2953 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
2954 if (err)
2955 goto out;
2956
ad1601ae 2957 fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
bdf00467 2958 fib6_nh->fib_nh_gw_family = AF_INET6;
83c44251
DA
2959 }
2960
2961 err = -ENODEV;
2962 if (!dev)
2963 goto out;
2964
2965 if (idev->cnf.disable_ipv6) {
2966 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
2967 err = -EACCES;
2968 goto out;
2969 }
2970
2971 if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
2972 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2973 err = -ENETDOWN;
2974 goto out;
2975 }
2976
2977 if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
2978 !netif_carrier_ok(dev))
ad1601ae 2979 fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
83c44251 2980
979e276e
DA
2981 err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
2982 cfg->fc_encap_type, cfg, gfp_flags, extack);
2983 if (err)
2984 goto out;
83c44251 2985set_dev:
ad1601ae 2986 fib6_nh->fib_nh_dev = dev;
f1741730 2987 fib6_nh->fib_nh_oif = dev->ifindex;
83c44251
DA
2988 err = 0;
2989out:
2990 if (idev)
2991 in6_dev_put(idev);
2992
2993 if (err) {
ad1601ae
DA
2994 lwtstate_put(fib6_nh->fib_nh_lws);
2995 fib6_nh->fib_nh_lws = NULL;
83c44251
DA
2996 if (dev)
2997 dev_put(dev);
2998 }
2999
3000 return err;
3001}
3002
dac7d0f2
DA
3003void fib6_nh_release(struct fib6_nh *fib6_nh)
3004{
979e276e 3005 fib_nh_common_release(&fib6_nh->nh_common);
dac7d0f2
DA
3006}
3007
8d1c802b 3008static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 3009 gfp_t gfp_flags,
333c4301 3010 struct netlink_ext_ack *extack)
1da177e4 3011{
5578689a 3012 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 3013 struct fib6_info *rt = NULL;
c71099ac 3014 struct fib6_table *table;
8c5b83f0 3015 int err = -EINVAL;
83c44251 3016 int addr_type;
1da177e4 3017
557c44be 3018 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
3019 if (cfg->fc_flags & RTF_PCPU) {
3020 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 3021 goto out;
d5d531cb 3022 }
557c44be 3023
2ea2352e
WW
3024 /* RTF_CACHE is an internal flag; can not be set by userspace */
3025 if (cfg->fc_flags & RTF_CACHE) {
3026 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3027 goto out;
3028 }
3029
e8478e80
DA
3030 if (cfg->fc_type > RTN_MAX) {
3031 NL_SET_ERR_MSG(extack, "Invalid route type");
3032 goto out;
3033 }
3034
d5d531cb
DA
3035 if (cfg->fc_dst_len > 128) {
3036 NL_SET_ERR_MSG(extack, "Invalid prefix length");
3037 goto out;
3038 }
3039 if (cfg->fc_src_len > 128) {
3040 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 3041 goto out;
d5d531cb 3042 }
1da177e4 3043#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
3044 if (cfg->fc_src_len) {
3045 NL_SET_ERR_MSG(extack,
3046 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 3047 goto out;
d5d531cb 3048 }
1da177e4 3049#endif
fc1e64e1 3050
d71314b4 3051 err = -ENOBUFS;
38308473
DM
3052 if (cfg->fc_nlinfo.nlh &&
3053 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 3054 table = fib6_get_table(net, cfg->fc_table);
38308473 3055 if (!table) {
f3213831 3056 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
3057 table = fib6_new_table(net, cfg->fc_table);
3058 }
3059 } else {
3060 table = fib6_new_table(net, cfg->fc_table);
3061 }
38308473
DM
3062
3063 if (!table)
c71099ac 3064 goto out;
c71099ac 3065
93531c67
DA
3066 err = -ENOMEM;
3067 rt = fib6_info_alloc(gfp_flags);
3068 if (!rt)
1da177e4 3069 goto out;
93531c67 3070
d7e774f3
DA
3071 rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3072 extack);
767a2217
DA
3073 if (IS_ERR(rt->fib6_metrics)) {
3074 err = PTR_ERR(rt->fib6_metrics);
fda21d46
ED
3075 /* Do not leave garbage there. */
3076 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
767a2217
DA
3077 goto out;
3078 }
3079
93531c67
DA
3080 if (cfg->fc_flags & RTF_ADDRCONF)
3081 rt->dst_nocount = true;
1da177e4 3082
1716a961 3083 if (cfg->fc_flags & RTF_EXPIRES)
14895687 3084 fib6_set_expires(rt, jiffies +
1716a961
G
3085 clock_t_to_jiffies(cfg->fc_expires));
3086 else
14895687 3087 fib6_clean_expires(rt);
1da177e4 3088
86872cb5
TG
3089 if (cfg->fc_protocol == RTPROT_UNSPEC)
3090 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 3091 rt->fib6_protocol = cfg->fc_protocol;
86872cb5 3092
83c44251
DA
3093 rt->fib6_table = table;
3094 rt->fib6_metric = cfg->fc_metric;
3095 rt->fib6_type = cfg->fc_type;
2b2450ca 3096 rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
19e42e45 3097
93c2fb25
DA
3098 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3099 rt->fib6_dst.plen = cfg->fc_dst_len;
3100 if (rt->fib6_dst.plen == 128)
3b6761d1 3101 rt->dst_host = true;
e5fd387a 3102
1da177e4 3103#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
3104 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3105 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4 3106#endif
83c44251
DA
3107 err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
3108 if (err)
3109 goto out;
1da177e4
LT
3110
3111 /* We cannot add true routes via loopback here,
83c44251 3112 * they would result in kernel looping; promote them to reject routes
1da177e4 3113 */
83c44251 3114 addr_type = ipv6_addr_type(&cfg->fc_dst);
ad1601ae 3115 if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
83c44251 3116 rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
955ec4cb 3117
c3968a85 3118 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
83c44251
DA
3119 struct net_device *dev = fib6_info_nh_dev(rt);
3120
c3968a85 3121 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3122 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3123 err = -EINVAL;
3124 goto out;
3125 }
93c2fb25
DA
3126 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3127 rt->fib6_prefsrc.plen = 128;
c3968a85 3128 } else
93c2fb25 3129 rt->fib6_prefsrc.plen = 0;
c3968a85 3130
8c5b83f0 3131 return rt;
6b9ea5a6 3132out:
93531c67 3133 fib6_info_release(rt);
8c5b83f0 3134 return ERR_PTR(err);
6b9ea5a6
RP
3135}
3136
acb54e3c 3137int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
333c4301 3138 struct netlink_ext_ack *extack)
6b9ea5a6 3139{
8d1c802b 3140 struct fib6_info *rt;
6b9ea5a6
RP
3141 int err;
3142
acb54e3c 3143 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3144 if (IS_ERR(rt))
3145 return PTR_ERR(rt);
6b9ea5a6 3146
d4ead6b3 3147 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3148 fib6_info_release(rt);
6b9ea5a6 3149
1da177e4
LT
3150 return err;
3151}
3152
8d1c802b 3153static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3154{
afb1d4b5 3155 struct net *net = info->nl_net;
c71099ac 3156 struct fib6_table *table;
afb1d4b5 3157 int err;
1da177e4 3158
421842ed 3159 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3160 err = -ENOENT;
3161 goto out;
3162 }
6c813a72 3163
93c2fb25 3164 table = rt->fib6_table;
66f5d6ce 3165 spin_lock_bh(&table->tb6_lock);
86872cb5 3166 err = fib6_del(rt, info);
66f5d6ce 3167 spin_unlock_bh(&table->tb6_lock);
1da177e4 3168
6825a26c 3169out:
93531c67 3170 fib6_info_release(rt);
1da177e4
LT
3171 return err;
3172}
3173
8d1c802b 3174int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3175{
afb1d4b5
DA
3176 struct nl_info info = { .nl_net = net };
3177
528c4ceb 3178 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3179}
3180
8d1c802b 3181static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3182{
3183 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3184 struct net *net = info->nl_net;
16a16cd3 3185 struct sk_buff *skb = NULL;
0ae81335 3186 struct fib6_table *table;
e3330039 3187 int err = -ENOENT;
0ae81335 3188
421842ed 3189 if (rt == net->ipv6.fib6_null_entry)
e3330039 3190 goto out_put;
93c2fb25 3191 table = rt->fib6_table;
66f5d6ce 3192 spin_lock_bh(&table->tb6_lock);
0ae81335 3193
93c2fb25 3194 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3195 struct fib6_info *sibling, *next_sibling;
0ae81335 3196
16a16cd3
DA
3197 /* prefer to send a single notification with all hops */
3198 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3199 if (skb) {
3200 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3201
d4ead6b3 3202 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3203 NULL, NULL, 0, RTM_DELROUTE,
3204 info->portid, seq, 0) < 0) {
3205 kfree_skb(skb);
3206 skb = NULL;
3207 } else
3208 info->skip_notify = 1;
3209 }
3210
0ae81335 3211 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3212 &rt->fib6_siblings,
3213 fib6_siblings) {
0ae81335
DA
3214 err = fib6_del(sibling, info);
3215 if (err)
e3330039 3216 goto out_unlock;
0ae81335
DA
3217 }
3218 }
3219
3220 err = fib6_del(rt, info);
e3330039 3221out_unlock:
66f5d6ce 3222 spin_unlock_bh(&table->tb6_lock);
e3330039 3223out_put:
93531c67 3224 fib6_info_release(rt);
16a16cd3
DA
3225
3226 if (skb) {
e3330039 3227 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3228 info->nlh, gfp_any());
3229 }
0ae81335
DA
3230 return err;
3231}
3232
23fb93a4
DA
3233static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3234{
3235 int rc = -ESRCH;
3236
3237 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3238 goto out;
3239
3240 if (cfg->fc_flags & RTF_GATEWAY &&
3241 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3242 goto out;
761f6026
XL
3243
3244 rc = rt6_remove_exception_rt(rt);
23fb93a4
DA
3245out:
3246 return rc;
3247}
3248
333c4301
DA
3249static int ip6_route_del(struct fib6_config *cfg,
3250 struct netlink_ext_ack *extack)
1da177e4 3251{
8d1c802b 3252 struct rt6_info *rt_cache;
c71099ac 3253 struct fib6_table *table;
8d1c802b 3254 struct fib6_info *rt;
1da177e4 3255 struct fib6_node *fn;
1da177e4
LT
3256 int err = -ESRCH;
3257
5578689a 3258 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3259 if (!table) {
3260 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3261 return err;
d5d531cb 3262 }
c71099ac 3263
66f5d6ce 3264 rcu_read_lock();
1da177e4 3265
c71099ac 3266 fn = fib6_locate(&table->tb6_root,
86872cb5 3267 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3268 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3269 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3270
1da177e4 3271 if (fn) {
66f5d6ce 3272 for_each_fib6_node_rt_rcu(fn) {
ad1601ae
DA
3273 struct fib6_nh *nh;
3274
2b760fcf 3275 if (cfg->fc_flags & RTF_CACHE) {
23fb93a4
DA
3276 int rc;
3277
2b760fcf
WW
3278 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3279 &cfg->fc_src);
23fb93a4
DA
3280 if (rt_cache) {
3281 rc = ip6_del_cached_rt(rt_cache, cfg);
9e575010
ED
3282 if (rc != -ESRCH) {
3283 rcu_read_unlock();
23fb93a4 3284 return rc;
9e575010 3285 }
23fb93a4
DA
3286 }
3287 continue;
2b760fcf 3288 }
ad1601ae
DA
3289
3290 nh = &rt->fib6_nh;
86872cb5 3291 if (cfg->fc_ifindex &&
ad1601ae
DA
3292 (!nh->fib_nh_dev ||
3293 nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3294 continue;
86872cb5 3295 if (cfg->fc_flags & RTF_GATEWAY &&
ad1601ae 3296 !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
1da177e4 3297 continue;
93c2fb25 3298 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3299 continue;
93c2fb25 3300 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3301 continue;
e873e4b9
WW
3302 if (!fib6_info_hold_safe(rt))
3303 continue;
66f5d6ce 3304 rcu_read_unlock();
1da177e4 3305
0ae81335
DA
3306 /* if gateway was specified only delete the one hop */
3307 if (cfg->fc_flags & RTF_GATEWAY)
3308 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3309
3310 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3311 }
3312 }
66f5d6ce 3313 rcu_read_unlock();
1da177e4
LT
3314
3315 return err;
3316}
3317
6700c270 3318static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3319{
a6279458 3320 struct netevent_redirect netevent;
e8599ff4 3321 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3322 struct ndisc_options ndopts;
3323 struct inet6_dev *in6_dev;
3324 struct neighbour *neigh;
a68886a6 3325 struct fib6_info *from;
71bcdba0 3326 struct rd_msg *msg;
6e157b6a
DM
3327 int optlen, on_link;
3328 u8 *lladdr;
e8599ff4 3329
29a3cad5 3330 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3331 optlen -= sizeof(*msg);
e8599ff4
DM
3332
3333 if (optlen < 0) {
6e157b6a 3334 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3335 return;
3336 }
3337
71bcdba0 3338 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3339
71bcdba0 3340 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3341 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3342 return;
3343 }
3344
6e157b6a 3345 on_link = 0;
71bcdba0 3346 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3347 on_link = 1;
71bcdba0 3348 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3349 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3350 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3351 return;
3352 }
3353
3354 in6_dev = __in6_dev_get(skb->dev);
3355 if (!in6_dev)
3356 return;
3357 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3358 return;
3359
3360 /* RFC2461 8.1:
3361 * The IP source address of the Redirect MUST be the same as the current
3362 * first-hop router for the specified ICMP Destination Address.
3363 */
3364
f997c55c 3365 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3366 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3367 return;
3368 }
6e157b6a
DM
3369
3370 lladdr = NULL;
e8599ff4
DM
3371 if (ndopts.nd_opts_tgt_lladdr) {
3372 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3373 skb->dev);
3374 if (!lladdr) {
3375 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3376 return;
3377 }
3378 }
3379
6e157b6a 3380 rt = (struct rt6_info *) dst;
ec13ad1d 3381 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3382 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3383 return;
6e157b6a 3384 }
e8599ff4 3385
6e157b6a
DM
3386 /* Redirect received -> path was valid.
3387 * Look, redirects are sent only in response to data packets,
3388 * so that this nexthop apparently is reachable. --ANK
3389 */
0dec879f 3390 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3391
71bcdba0 3392 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3393 if (!neigh)
3394 return;
a6279458 3395
1da177e4
LT
3396 /*
3397 * We have finally decided to accept it.
3398 */
3399
f997c55c 3400 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3401 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3402 NEIGH_UPDATE_F_OVERRIDE|
3403 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3404 NEIGH_UPDATE_F_ISROUTER)),
3405 NDISC_REDIRECT, &ndopts);
1da177e4 3406
4d85cd0c 3407 rcu_read_lock();
a68886a6 3408 from = rcu_dereference(rt->from);
e873e4b9
WW
3409 /* This fib6_info_hold() is safe here because we hold reference to rt
3410 * and rt already holds reference to fib6_info.
3411 */
8a14e46f 3412 fib6_info_hold(from);
4d85cd0c 3413 rcu_read_unlock();
8a14e46f
DA
3414
3415 nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
38308473 3416 if (!nrt)
1da177e4
LT
3417 goto out;
3418
3419 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3420 if (on_link)
3421 nrt->rt6i_flags &= ~RTF_GATEWAY;
3422
4e3fd7a0 3423 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3424
2b760fcf
WW
3425 /* No need to remove rt from the exception table if rt is
3426 * a cached route because rt6_insert_exception() will
3427 * takes care of it
3428 */
8a14e46f 3429 if (rt6_insert_exception(nrt, from)) {
2b760fcf
WW
3430 dst_release_immediate(&nrt->dst);
3431 goto out;
3432 }
1da177e4 3433
d8d1f30b
CG
3434 netevent.old = &rt->dst;
3435 netevent.new = &nrt->dst;
71bcdba0 3436 netevent.daddr = &msg->dest;
60592833 3437 netevent.neigh = neigh;
8d71740c
TT
3438 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3439
1da177e4 3440out:
8a14e46f 3441 fib6_info_release(from);
e8599ff4 3442 neigh_release(neigh);
6e157b6a
DM
3443}
3444
70ceb4f5 3445#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3446static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3447 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3448 const struct in6_addr *gwaddr,
3449 struct net_device *dev)
70ceb4f5 3450{
830218c1
DA
3451 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3452 int ifindex = dev->ifindex;
70ceb4f5 3453 struct fib6_node *fn;
8d1c802b 3454 struct fib6_info *rt = NULL;
c71099ac
TG
3455 struct fib6_table *table;
3456
830218c1 3457 table = fib6_get_table(net, tb_id);
38308473 3458 if (!table)
c71099ac 3459 return NULL;
70ceb4f5 3460
66f5d6ce 3461 rcu_read_lock();
38fbeeee 3462 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3463 if (!fn)
3464 goto out;
3465
66f5d6ce 3466 for_each_fib6_node_rt_rcu(fn) {
ad1601ae 3467 if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
70ceb4f5 3468 continue;
2b2450ca 3469 if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
bdf00467 3470 !rt->fib6_nh.fib_nh_gw_family)
70ceb4f5 3471 continue;
ad1601ae 3472 if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
70ceb4f5 3473 continue;
e873e4b9
WW
3474 if (!fib6_info_hold_safe(rt))
3475 continue;
70ceb4f5
YH
3476 break;
3477 }
3478out:
66f5d6ce 3479 rcu_read_unlock();
70ceb4f5
YH
3480 return rt;
3481}
3482
8d1c802b 3483static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3484 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3485 const struct in6_addr *gwaddr,
3486 struct net_device *dev,
95c96174 3487 unsigned int pref)
70ceb4f5 3488{
86872cb5 3489 struct fib6_config cfg = {
238fc7ea 3490 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3491 .fc_ifindex = dev->ifindex,
86872cb5
TG
3492 .fc_dst_len = prefixlen,
3493 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3494 RTF_UP | RTF_PREF(pref),
b91d5329 3495 .fc_protocol = RTPROT_RA,
e8478e80 3496 .fc_type = RTN_UNICAST,
15e47304 3497 .fc_nlinfo.portid = 0,
efa2cea0
DL
3498 .fc_nlinfo.nlh = NULL,
3499 .fc_nlinfo.nl_net = net,
86872cb5
TG
3500 };
3501
830218c1 3502 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3503 cfg.fc_dst = *prefix;
3504 cfg.fc_gateway = *gwaddr;
70ceb4f5 3505
e317da96
YH
3506 /* We should treat it as a default route if prefix length is 0. */
3507 if (!prefixlen)
86872cb5 3508 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3509
acb54e3c 3510 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3511
830218c1 3512 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3513}
3514#endif
3515
8d1c802b 3516struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3517 const struct in6_addr *addr,
3518 struct net_device *dev)
1ab1457c 3519{
830218c1 3520 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3521 struct fib6_info *rt;
c71099ac 3522 struct fib6_table *table;
1da177e4 3523
afb1d4b5 3524 table = fib6_get_table(net, tb_id);
38308473 3525 if (!table)
c71099ac 3526 return NULL;
1da177e4 3527
66f5d6ce
WW
3528 rcu_read_lock();
3529 for_each_fib6_node_rt_rcu(&table->tb6_root) {
ad1601ae
DA
3530 struct fib6_nh *nh = &rt->fib6_nh;
3531
3532 if (dev == nh->fib_nh_dev &&
93c2fb25 3533 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ad1601ae 3534 ipv6_addr_equal(&nh->fib_nh_gw6, addr))
1da177e4
LT
3535 break;
3536 }
e873e4b9
WW
3537 if (rt && !fib6_info_hold_safe(rt))
3538 rt = NULL;
66f5d6ce 3539 rcu_read_unlock();
1da177e4
LT
3540 return rt;
3541}
3542
8d1c802b 3543struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3544 const struct in6_addr *gwaddr,
ebacaaa0
YH
3545 struct net_device *dev,
3546 unsigned int pref)
1da177e4 3547{
86872cb5 3548 struct fib6_config cfg = {
ca254490 3549 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3550 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3551 .fc_ifindex = dev->ifindex,
3552 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3553 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3554 .fc_protocol = RTPROT_RA,
e8478e80 3555 .fc_type = RTN_UNICAST,
15e47304 3556 .fc_nlinfo.portid = 0,
5578689a 3557 .fc_nlinfo.nlh = NULL,
afb1d4b5 3558 .fc_nlinfo.nl_net = net,
86872cb5 3559 };
1da177e4 3560
4e3fd7a0 3561 cfg.fc_gateway = *gwaddr;
1da177e4 3562
acb54e3c 3563 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3564 struct fib6_table *table;
3565
3566 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3567 if (table)
3568 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3569 }
1da177e4 3570
afb1d4b5 3571 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3572}
3573
afb1d4b5
DA
3574static void __rt6_purge_dflt_routers(struct net *net,
3575 struct fib6_table *table)
1da177e4 3576{
8d1c802b 3577 struct fib6_info *rt;
1da177e4
LT
3578
3579restart:
66f5d6ce
WW
3580 rcu_read_lock();
3581 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3582 struct net_device *dev = fib6_info_nh_dev(rt);
3583 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3584
93c2fb25 3585 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
e873e4b9
WW
3586 (!idev || idev->cnf.accept_ra != 2) &&
3587 fib6_info_hold_safe(rt)) {
93531c67
DA
3588 rcu_read_unlock();
3589 ip6_del_rt(net, rt);
1da177e4
LT
3590 goto restart;
3591 }
3592 }
66f5d6ce 3593 rcu_read_unlock();
830218c1
DA
3594
3595 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3596}
3597
3598void rt6_purge_dflt_routers(struct net *net)
3599{
3600 struct fib6_table *table;
3601 struct hlist_head *head;
3602 unsigned int h;
3603
3604 rcu_read_lock();
3605
3606 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3607 head = &net->ipv6.fib_table_hash[h];
3608 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3609 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3610 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3611 }
3612 }
3613
3614 rcu_read_unlock();
1da177e4
LT
3615}
3616
5578689a
DL
3617static void rtmsg_to_fib6_config(struct net *net,
3618 struct in6_rtmsg *rtmsg,
86872cb5
TG
3619 struct fib6_config *cfg)
3620{
8823a3ac
3621 *cfg = (struct fib6_config){
3622 .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3623 : RT6_TABLE_MAIN,
3624 .fc_ifindex = rtmsg->rtmsg_ifindex,
67f69513 3625 .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
8823a3ac
3626 .fc_expires = rtmsg->rtmsg_info,
3627 .fc_dst_len = rtmsg->rtmsg_dst_len,
3628 .fc_src_len = rtmsg->rtmsg_src_len,
3629 .fc_flags = rtmsg->rtmsg_flags,
3630 .fc_type = rtmsg->rtmsg_type,
3631
3632 .fc_nlinfo.nl_net = net,
3633
3634 .fc_dst = rtmsg->rtmsg_dst,
3635 .fc_src = rtmsg->rtmsg_src,
3636 .fc_gateway = rtmsg->rtmsg_gateway,
3637 };
86872cb5
TG
3638}
3639
5578689a 3640int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3641{
86872cb5 3642 struct fib6_config cfg;
1da177e4
LT
3643 struct in6_rtmsg rtmsg;
3644 int err;
3645
67ba4152 3646 switch (cmd) {
1da177e4
LT
3647 case SIOCADDRT: /* Add a route */
3648 case SIOCDELRT: /* Delete a route */
af31f412 3649 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3650 return -EPERM;
3651 err = copy_from_user(&rtmsg, arg,
3652 sizeof(struct in6_rtmsg));
3653 if (err)
3654 return -EFAULT;
86872cb5 3655
5578689a 3656 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3657
1da177e4
LT
3658 rtnl_lock();
3659 switch (cmd) {
3660 case SIOCADDRT:
acb54e3c 3661 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3662 break;
3663 case SIOCDELRT:
333c4301 3664 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3665 break;
3666 default:
3667 err = -EINVAL;
3668 }
3669 rtnl_unlock();
3670
3671 return err;
3ff50b79 3672 }
1da177e4
LT
3673
3674 return -EINVAL;
3675}
3676
3677/*
3678 * Drop the packet on the floor
3679 */
3680
d5fdd6ba 3681static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3682{
612f09e8 3683 int type;
adf30907 3684 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3685 switch (ipstats_mib_noroutes) {
3686 case IPSTATS_MIB_INNOROUTES:
0660e03f 3687 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3688 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3689 IP6_INC_STATS(dev_net(dst->dev),
3690 __in6_dev_get_safely(skb->dev),
3bd653c8 3691 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3692 break;
3693 }
3694 /* FALLTHROUGH */
3695 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3696 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3697 ipstats_mib_noroutes);
612f09e8
YH
3698 break;
3699 }
3ffe533c 3700 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3701 kfree_skb(skb);
3702 return 0;
3703}
3704
9ce8ade0
TG
3705static int ip6_pkt_discard(struct sk_buff *skb)
3706{
612f09e8 3707 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3708}
3709
ede2059d 3710static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3711{
adf30907 3712 skb->dev = skb_dst(skb)->dev;
612f09e8 3713 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3714}
3715
9ce8ade0
TG
3716static int ip6_pkt_prohibit(struct sk_buff *skb)
3717{
612f09e8 3718 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3719}
3720
ede2059d 3721static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3722{
adf30907 3723 skb->dev = skb_dst(skb)->dev;
612f09e8 3724 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3725}
3726
1da177e4
LT
3727/*
3728 * Allocate a dst for local (unicast / anycast) address.
3729 */
3730
360a9887
DA
3731struct fib6_info *addrconf_f6i_alloc(struct net *net,
3732 struct inet6_dev *idev,
3733 const struct in6_addr *addr,
3734 bool anycast, gfp_t gfp_flags)
1da177e4 3735{
c7a1ce39
DA
3736 struct fib6_config cfg = {
3737 .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
3738 .fc_ifindex = idev->dev->ifindex,
3739 .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
3740 .fc_dst = *addr,
3741 .fc_dst_len = 128,
3742 .fc_protocol = RTPROT_KERNEL,
3743 .fc_nlinfo.nl_net = net,
3744 .fc_ignore_dev_down = true,
3745 };
1da177e4 3746
e8478e80 3747 if (anycast) {
c7a1ce39
DA
3748 cfg.fc_type = RTN_ANYCAST;
3749 cfg.fc_flags |= RTF_ANYCAST;
e8478e80 3750 } else {
c7a1ce39
DA
3751 cfg.fc_type = RTN_LOCAL;
3752 cfg.fc_flags |= RTF_LOCAL;
e8478e80 3753 }
1da177e4 3754
c7a1ce39 3755 return ip6_route_info_create(&cfg, gfp_flags, NULL);
1da177e4
LT
3756}
3757
c3968a85
DW
3758/* remove deleted ip from prefsrc entries */
3759struct arg_dev_net_ip {
3760 struct net_device *dev;
3761 struct net *net;
3762 struct in6_addr *addr;
3763};
3764
8d1c802b 3765static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
3766{
3767 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3768 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3769 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3770
ad1601ae 3771 if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
421842ed 3772 rt != net->ipv6.fib6_null_entry &&
93c2fb25 3773 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 3774 spin_lock_bh(&rt6_exception_lock);
c3968a85 3775 /* remove prefsrc entry */
93c2fb25 3776 rt->fib6_prefsrc.plen = 0;
60006a48 3777 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3778 }
3779 return 0;
3780}
3781
3782void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3783{
3784 struct net *net = dev_net(ifp->idev->dev);
3785 struct arg_dev_net_ip adni = {
3786 .dev = ifp->idev->dev,
3787 .net = net,
3788 .addr = &ifp->addr,
3789 };
0c3584d5 3790 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3791}
3792
2b2450ca 3793#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT)
be7a010d
DJ
3794
3795/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 3796static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
3797{
3798 struct in6_addr *gateway = (struct in6_addr *)arg;
3799
93c2fb25 3800 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
bdf00467 3801 rt->fib6_nh.fib_nh_gw_family &&
ad1601ae 3802 ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
be7a010d
DJ
3803 return -1;
3804 }
b16cb459
WW
3805
3806 /* Further clean up cached routes in exception table.
3807 * This is needed because cached route may have a different
3808 * gateway than its 'parent' in the case of an ip redirect.
3809 */
3810 rt6_exceptions_clean_tohost(rt, gateway);
3811
be7a010d
DJ
3812 return 0;
3813}
3814
3815void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3816{
3817 fib6_clean_all(net, fib6_clean_tohost, gateway);
3818}
3819
2127d95a
IS
3820struct arg_netdev_event {
3821 const struct net_device *dev;
4c981e28
IS
3822 union {
3823 unsigned int nh_flags;
3824 unsigned long event;
3825 };
2127d95a
IS
3826};
3827
8d1c802b 3828static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 3829{
8d1c802b 3830 struct fib6_info *iter;
d7dedee1
IS
3831 struct fib6_node *fn;
3832
93c2fb25
DA
3833 fn = rcu_dereference_protected(rt->fib6_node,
3834 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3835 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 3836 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3837 while (iter) {
93c2fb25 3838 if (iter->fib6_metric == rt->fib6_metric &&
33bd5ac5 3839 rt6_qualify_for_ecmp(iter))
d7dedee1 3840 return iter;
8fb11a9a 3841 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 3842 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
3843 }
3844
3845 return NULL;
3846}
3847
8d1c802b 3848static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 3849{
ad1601ae
DA
3850 if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
3851 (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
3852 ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
d7dedee1
IS
3853 return true;
3854
3855 return false;
3856}
3857
8d1c802b 3858static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 3859{
8d1c802b 3860 struct fib6_info *iter;
d7dedee1
IS
3861 int total = 0;
3862
3863 if (!rt6_is_dead(rt))
ad1601ae 3864 total += rt->fib6_nh.fib_nh_weight;
d7dedee1 3865
93c2fb25 3866 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 3867 if (!rt6_is_dead(iter))
ad1601ae 3868 total += iter->fib6_nh.fib_nh_weight;
d7dedee1
IS
3869 }
3870
3871 return total;
3872}
3873
8d1c802b 3874static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
3875{
3876 int upper_bound = -1;
3877
3878 if (!rt6_is_dead(rt)) {
ad1601ae 3879 *weight += rt->fib6_nh.fib_nh_weight;
d7dedee1
IS
3880 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3881 total) - 1;
3882 }
ad1601ae 3883 atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
d7dedee1
IS
3884}
3885
8d1c802b 3886static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 3887{
8d1c802b 3888 struct fib6_info *iter;
d7dedee1
IS
3889 int weight = 0;
3890
3891 rt6_upper_bound_set(rt, &weight, total);
3892
93c2fb25 3893 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
3894 rt6_upper_bound_set(iter, &weight, total);
3895}
3896
8d1c802b 3897void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 3898{
8d1c802b 3899 struct fib6_info *first;
d7dedee1
IS
3900 int total;
3901
3902 /* In case the entire multipath route was marked for flushing,
3903 * then there is no need to rebalance upon the removal of every
3904 * sibling route.
3905 */
93c2fb25 3906 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
3907 return;
3908
3909 /* During lookup routes are evaluated in order, so we need to
3910 * make sure upper bounds are assigned from the first sibling
3911 * onwards.
3912 */
3913 first = rt6_multipath_first_sibling(rt);
3914 if (WARN_ON_ONCE(!first))
3915 return;
3916
3917 total = rt6_multipath_total_weight(first);
3918 rt6_multipath_upper_bound_set(first, total);
3919}
3920
8d1c802b 3921static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
3922{
3923 const struct arg_netdev_event *arg = p_arg;
7aef6859 3924 struct net *net = dev_net(arg->dev);
2127d95a 3925
ad1601ae
DA
3926 if (rt != net->ipv6.fib6_null_entry &&
3927 rt->fib6_nh.fib_nh_dev == arg->dev) {
3928 rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
7aef6859 3929 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3930 rt6_multipath_rebalance(rt);
1de178ed 3931 }
2127d95a
IS
3932
3933 return 0;
3934}
3935
3936void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3937{
3938 struct arg_netdev_event arg = {
3939 .dev = dev,
6802f3ad
IS
3940 {
3941 .nh_flags = nh_flags,
3942 },
2127d95a
IS
3943 };
3944
3945 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3946 arg.nh_flags |= RTNH_F_LINKDOWN;
3947
3948 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3949}
3950
8d1c802b 3951static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
3952 const struct net_device *dev)
3953{
8d1c802b 3954 struct fib6_info *iter;
1de178ed 3955
ad1601ae 3956 if (rt->fib6_nh.fib_nh_dev == dev)
1de178ed 3957 return true;
93c2fb25 3958 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae 3959 if (iter->fib6_nh.fib_nh_dev == dev)
1de178ed
IS
3960 return true;
3961
3962 return false;
3963}
3964
8d1c802b 3965static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 3966{
8d1c802b 3967 struct fib6_info *iter;
1de178ed
IS
3968
3969 rt->should_flush = 1;
93c2fb25 3970 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
3971 iter->should_flush = 1;
3972}
3973
8d1c802b 3974static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
3975 const struct net_device *down_dev)
3976{
8d1c802b 3977 struct fib6_info *iter;
1de178ed
IS
3978 unsigned int dead = 0;
3979
ad1601ae
DA
3980 if (rt->fib6_nh.fib_nh_dev == down_dev ||
3981 rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
1de178ed 3982 dead++;
93c2fb25 3983 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae
DA
3984 if (iter->fib6_nh.fib_nh_dev == down_dev ||
3985 iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
1de178ed
IS
3986 dead++;
3987
3988 return dead;
3989}
3990
8d1c802b 3991static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed
IS
3992 const struct net_device *dev,
3993 unsigned int nh_flags)
3994{
8d1c802b 3995 struct fib6_info *iter;
1de178ed 3996
ad1601ae
DA
3997 if (rt->fib6_nh.fib_nh_dev == dev)
3998 rt->fib6_nh.fib_nh_flags |= nh_flags;
93c2fb25 3999 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae
DA
4000 if (iter->fib6_nh.fib_nh_dev == dev)
4001 iter->fib6_nh.fib_nh_flags |= nh_flags;
1de178ed
IS
4002}
4003
a1a22c12 4004/* called with write lock held for table with rt */
8d1c802b 4005static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 4006{
4c981e28
IS
4007 const struct arg_netdev_event *arg = p_arg;
4008 const struct net_device *dev = arg->dev;
7aef6859 4009 struct net *net = dev_net(dev);
8ed67789 4010
421842ed 4011 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
4012 return 0;
4013
4014 switch (arg->event) {
4015 case NETDEV_UNREGISTER:
ad1601ae 4016 return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
27c6fa73 4017 case NETDEV_DOWN:
1de178ed 4018 if (rt->should_flush)
27c6fa73 4019 return -1;
93c2fb25 4020 if (!rt->fib6_nsiblings)
ad1601ae 4021 return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
1de178ed
IS
4022 if (rt6_multipath_uses_dev(rt, dev)) {
4023 unsigned int count;
4024
4025 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 4026 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
4027 rt6_multipath_flush(rt);
4028 return -1;
4029 }
4030 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4031 RTNH_F_LINKDOWN);
7aef6859 4032 fib6_update_sernum(net, rt);
d7dedee1 4033 rt6_multipath_rebalance(rt);
1de178ed
IS
4034 }
4035 return -2;
27c6fa73 4036 case NETDEV_CHANGE:
ad1601ae 4037 if (rt->fib6_nh.fib_nh_dev != dev ||
93c2fb25 4038 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 4039 break;
ad1601ae 4040 rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 4041 rt6_multipath_rebalance(rt);
27c6fa73 4042 break;
2b241361 4043 }
c159d30c 4044
1da177e4
LT
4045 return 0;
4046}
4047
27c6fa73 4048void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 4049{
4c981e28 4050 struct arg_netdev_event arg = {
8ed67789 4051 .dev = dev,
6802f3ad
IS
4052 {
4053 .event = event,
4054 },
8ed67789 4055 };
7c6bb7d2 4056 struct net *net = dev_net(dev);
8ed67789 4057
7c6bb7d2
DA
4058 if (net->ipv6.sysctl.skip_notify_on_dev_down)
4059 fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4060 else
4061 fib6_clean_all(net, fib6_ifdown, &arg);
4c981e28
IS
4062}
4063
4064void rt6_disable_ip(struct net_device *dev, unsigned long event)
4065{
4066 rt6_sync_down_dev(dev, event);
4067 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4068 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4069}
4070
95c96174 4071struct rt6_mtu_change_arg {
1da177e4 4072 struct net_device *dev;
95c96174 4073 unsigned int mtu;
1da177e4
LT
4074};
4075
8d1c802b 4076static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4077{
4078 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4079 struct inet6_dev *idev;
4080
4081 /* In IPv6 pmtu discovery is not optional,
4082 so that RTAX_MTU lock cannot disable it.
4083 We still use this lock to block changes
4084 caused by addrconf/ndisc.
4085 */
4086
4087 idev = __in6_dev_get(arg->dev);
38308473 4088 if (!idev)
1da177e4
LT
4089 return 0;
4090
4091 /* For administrative MTU increase, there is no way to discover
4092 IPv6 PMTU increase, so PMTU increase should be updated here.
4093 Since RFC 1981 doesn't include administrative MTU increase
4094 update PMTU increase is a MUST. (i.e. jumbo frame)
4095 */
ad1601ae 4096 if (rt->fib6_nh.fib_nh_dev == arg->dev &&
d4ead6b3
DA
4097 !fib6_metric_locked(rt, RTAX_MTU)) {
4098 u32 mtu = rt->fib6_pmtu;
4099
4100 if (mtu >= arg->mtu ||
4101 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4102 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4103
f5bbe7ee 4104 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4105 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4106 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4107 }
1da177e4
LT
4108 return 0;
4109}
4110
95c96174 4111void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4112{
c71099ac
TG
4113 struct rt6_mtu_change_arg arg = {
4114 .dev = dev,
4115 .mtu = mtu,
4116 };
1da177e4 4117
0c3584d5 4118 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4119}
4120
ef7c79ed 4121static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4122 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 4123 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 4124 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4125 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4126 [RTA_PRIORITY] = { .type = NLA_U32 },
4127 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4128 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4129 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4130 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4131 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4132 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4133 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4134 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 4135 [RTA_TABLE] = { .type = NLA_U32 },
eacb9384
RP
4136 [RTA_IP_PROTO] = { .type = NLA_U8 },
4137 [RTA_SPORT] = { .type = NLA_U16 },
4138 [RTA_DPORT] = { .type = NLA_U16 },
86872cb5
TG
4139};
4140
4141static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4142 struct fib6_config *cfg,
4143 struct netlink_ext_ack *extack)
1da177e4 4144{
86872cb5
TG
4145 struct rtmsg *rtm;
4146 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4147 unsigned int pref;
86872cb5 4148 int err;
1da177e4 4149
fceb6435 4150 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
dac9c979 4151 extack);
86872cb5
TG
4152 if (err < 0)
4153 goto errout;
1da177e4 4154
86872cb5
TG
4155 err = -EINVAL;
4156 rtm = nlmsg_data(nlh);
86872cb5 4157
84db8407
4158 *cfg = (struct fib6_config){
4159 .fc_table = rtm->rtm_table,
4160 .fc_dst_len = rtm->rtm_dst_len,
4161 .fc_src_len = rtm->rtm_src_len,
4162 .fc_flags = RTF_UP,
4163 .fc_protocol = rtm->rtm_protocol,
4164 .fc_type = rtm->rtm_type,
4165
4166 .fc_nlinfo.portid = NETLINK_CB(skb).portid,
4167 .fc_nlinfo.nlh = nlh,
4168 .fc_nlinfo.nl_net = sock_net(skb->sk),
4169 };
86872cb5 4170
ef2c7d7b
ND
4171 if (rtm->rtm_type == RTN_UNREACHABLE ||
4172 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4173 rtm->rtm_type == RTN_PROHIBIT ||
4174 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4175 cfg->fc_flags |= RTF_REJECT;
4176
ab79ad14
4177 if (rtm->rtm_type == RTN_LOCAL)
4178 cfg->fc_flags |= RTF_LOCAL;
4179
1f56a01f
MKL
4180 if (rtm->rtm_flags & RTM_F_CLONED)
4181 cfg->fc_flags |= RTF_CACHE;
4182
fc1e64e1
DA
4183 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4184
86872cb5 4185 if (tb[RTA_GATEWAY]) {
67b61f6c 4186 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4187 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4188 }
e3818541
DA
4189 if (tb[RTA_VIA]) {
4190 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4191 goto errout;
4192 }
86872cb5
TG
4193
4194 if (tb[RTA_DST]) {
4195 int plen = (rtm->rtm_dst_len + 7) >> 3;
4196
4197 if (nla_len(tb[RTA_DST]) < plen)
4198 goto errout;
4199
4200 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4201 }
86872cb5
TG
4202
4203 if (tb[RTA_SRC]) {
4204 int plen = (rtm->rtm_src_len + 7) >> 3;
4205
4206 if (nla_len(tb[RTA_SRC]) < plen)
4207 goto errout;
4208
4209 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4210 }
86872cb5 4211
c3968a85 4212 if (tb[RTA_PREFSRC])
67b61f6c 4213 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4214
86872cb5
TG
4215 if (tb[RTA_OIF])
4216 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4217
4218 if (tb[RTA_PRIORITY])
4219 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4220
4221 if (tb[RTA_METRICS]) {
4222 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4223 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4224 }
86872cb5
TG
4225
4226 if (tb[RTA_TABLE])
4227 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4228
51ebd318
ND
4229 if (tb[RTA_MULTIPATH]) {
4230 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4231 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4232
4233 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4234 cfg->fc_mp_len, extack);
9ed59592
DA
4235 if (err < 0)
4236 goto errout;
51ebd318
ND
4237 }
4238
c78ba6d6
LR
4239 if (tb[RTA_PREF]) {
4240 pref = nla_get_u8(tb[RTA_PREF]);
4241 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4242 pref != ICMPV6_ROUTER_PREF_HIGH)
4243 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4244 cfg->fc_flags |= RTF_PREF(pref);
4245 }
4246
19e42e45
RP
4247 if (tb[RTA_ENCAP])
4248 cfg->fc_encap = tb[RTA_ENCAP];
4249
9ed59592 4250 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4251 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4252
c255bd68 4253 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4254 if (err < 0)
4255 goto errout;
4256 }
4257
32bc201e
XL
4258 if (tb[RTA_EXPIRES]) {
4259 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4260
4261 if (addrconf_finite_timeout(timeout)) {
4262 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4263 cfg->fc_flags |= RTF_EXPIRES;
4264 }
4265 }
4266
86872cb5
TG
4267 err = 0;
4268errout:
4269 return err;
1da177e4
LT
4270}
4271
6b9ea5a6 4272struct rt6_nh {
8d1c802b 4273 struct fib6_info *fib6_info;
6b9ea5a6 4274 struct fib6_config r_cfg;
6b9ea5a6
RP
4275 struct list_head next;
4276};
4277
d4ead6b3
DA
4278static int ip6_route_info_append(struct net *net,
4279 struct list_head *rt6_nh_list,
8d1c802b
DA
4280 struct fib6_info *rt,
4281 struct fib6_config *r_cfg)
6b9ea5a6
RP
4282{
4283 struct rt6_nh *nh;
6b9ea5a6
RP
4284 int err = -EEXIST;
4285
4286 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4287 /* check if fib6_info already exists */
4288 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4289 return err;
4290 }
4291
4292 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4293 if (!nh)
4294 return -ENOMEM;
8d1c802b 4295 nh->fib6_info = rt;
6b9ea5a6
RP
4296 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4297 list_add_tail(&nh->next, rt6_nh_list);
4298
4299 return 0;
4300}
4301
8d1c802b
DA
4302static void ip6_route_mpath_notify(struct fib6_info *rt,
4303 struct fib6_info *rt_last,
3b1137fe
DA
4304 struct nl_info *info,
4305 __u16 nlflags)
4306{
4307 /* if this is an APPEND route, then rt points to the first route
4308 * inserted and rt_last points to last route inserted. Userspace
4309 * wants a consistent dump of the route which starts at the first
4310 * nexthop. Since sibling routes are always added at the end of
4311 * the list, find the first sibling of the last route appended
4312 */
93c2fb25
DA
4313 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4314 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4315 struct fib6_info,
93c2fb25 4316 fib6_siblings);
3b1137fe
DA
4317 }
4318
4319 if (rt)
4320 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4321}
4322
333c4301
DA
4323static int ip6_route_multipath_add(struct fib6_config *cfg,
4324 struct netlink_ext_ack *extack)
51ebd318 4325{
8d1c802b 4326 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4327 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4328 struct fib6_config r_cfg;
4329 struct rtnexthop *rtnh;
8d1c802b 4330 struct fib6_info *rt;
6b9ea5a6
RP
4331 struct rt6_nh *err_nh;
4332 struct rt6_nh *nh, *nh_safe;
3b1137fe 4333 __u16 nlflags;
51ebd318
ND
4334 int remaining;
4335 int attrlen;
6b9ea5a6
RP
4336 int err = 1;
4337 int nhn = 0;
4338 int replace = (cfg->fc_nlinfo.nlh &&
4339 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4340 LIST_HEAD(rt6_nh_list);
51ebd318 4341
3b1137fe
DA
4342 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4343 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4344 nlflags |= NLM_F_APPEND;
4345
35f1b4e9 4346 remaining = cfg->fc_mp_len;
51ebd318 4347 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4348
6b9ea5a6 4349 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4350 * fib6_info structs per nexthop
6b9ea5a6 4351 */
51ebd318
ND
4352 while (rtnh_ok(rtnh, remaining)) {
4353 memcpy(&r_cfg, cfg, sizeof(*cfg));
4354 if (rtnh->rtnh_ifindex)
4355 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4356
4357 attrlen = rtnh_attrlen(rtnh);
4358 if (attrlen > 0) {
4359 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4360
4361 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4362 if (nla) {
67b61f6c 4363 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4364 r_cfg.fc_flags |= RTF_GATEWAY;
4365 }
19e42e45
RP
4366 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4367 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4368 if (nla)
4369 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4370 }
6b9ea5a6 4371
68e2ffde 4372 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4373 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4374 if (IS_ERR(rt)) {
4375 err = PTR_ERR(rt);
4376 rt = NULL;
6b9ea5a6 4377 goto cleanup;
8c5b83f0 4378 }
b5d2d75e
DA
4379 if (!rt6_qualify_for_ecmp(rt)) {
4380 err = -EINVAL;
4381 NL_SET_ERR_MSG(extack,
4382 "Device only routes can not be added for IPv6 using the multipath API.");
4383 fib6_info_release(rt);
4384 goto cleanup;
4385 }
6b9ea5a6 4386
ad1601ae 4387 rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
398958ae 4388
d4ead6b3
DA
4389 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4390 rt, &r_cfg);
51ebd318 4391 if (err) {
93531c67 4392 fib6_info_release(rt);
6b9ea5a6
RP
4393 goto cleanup;
4394 }
4395
4396 rtnh = rtnh_next(rtnh, &remaining);
4397 }
4398
3b1137fe
DA
4399 /* for add and replace send one notification with all nexthops.
4400 * Skip the notification in fib6_add_rt2node and send one with
4401 * the full route when done
4402 */
4403 info->skip_notify = 1;
4404
6b9ea5a6
RP
4405 err_nh = NULL;
4406 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4407 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4408 fib6_info_release(nh->fib6_info);
93531c67 4409
f7225172
DA
4410 if (!err) {
4411 /* save reference to last route successfully inserted */
4412 rt_last = nh->fib6_info;
4413
4414 /* save reference to first route for notification */
4415 if (!rt_notif)
4416 rt_notif = nh->fib6_info;
4417 }
3b1137fe 4418
8d1c802b
DA
4419 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4420 nh->fib6_info = NULL;
6b9ea5a6
RP
4421 if (err) {
4422 if (replace && nhn)
a5a82d84
JK
4423 NL_SET_ERR_MSG_MOD(extack,
4424 "multipath route replace failed (check consistency of installed routes)");
6b9ea5a6
RP
4425 err_nh = nh;
4426 goto add_errout;
51ebd318 4427 }
6b9ea5a6 4428
1a72418b 4429 /* Because each route is added like a single route we remove
27596472
MK
4430 * these flags after the first nexthop: if there is a collision,
4431 * we have already failed to add the first nexthop:
4432 * fib6_add_rt2node() has rejected it; when replacing, old
4433 * nexthops have been replaced by first new, the rest should
4434 * be added to it.
1a72418b 4435 */
27596472
MK
4436 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4437 NLM_F_REPLACE);
6b9ea5a6
RP
4438 nhn++;
4439 }
4440
3b1137fe
DA
4441 /* success ... tell user about new route */
4442 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4443 goto cleanup;
4444
4445add_errout:
3b1137fe
DA
4446 /* send notification for routes that were added so that
4447 * the delete notifications sent by ip6_route_del are
4448 * coherent
4449 */
4450 if (rt_notif)
4451 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4452
6b9ea5a6
RP
4453 /* Delete routes that were already added */
4454 list_for_each_entry(nh, &rt6_nh_list, next) {
4455 if (err_nh == nh)
4456 break;
333c4301 4457 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4458 }
4459
4460cleanup:
4461 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4462 if (nh->fib6_info)
4463 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4464 list_del(&nh->next);
4465 kfree(nh);
4466 }
4467
4468 return err;
4469}
4470
333c4301
DA
4471static int ip6_route_multipath_del(struct fib6_config *cfg,
4472 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4473{
4474 struct fib6_config r_cfg;
4475 struct rtnexthop *rtnh;
4476 int remaining;
4477 int attrlen;
4478 int err = 1, last_err = 0;
4479
4480 remaining = cfg->fc_mp_len;
4481 rtnh = (struct rtnexthop *)cfg->fc_mp;
4482
4483 /* Parse a Multipath Entry */
4484 while (rtnh_ok(rtnh, remaining)) {
4485 memcpy(&r_cfg, cfg, sizeof(*cfg));
4486 if (rtnh->rtnh_ifindex)
4487 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4488
4489 attrlen = rtnh_attrlen(rtnh);
4490 if (attrlen > 0) {
4491 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4492
4493 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4494 if (nla) {
4495 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4496 r_cfg.fc_flags |= RTF_GATEWAY;
4497 }
4498 }
333c4301 4499 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4500 if (err)
4501 last_err = err;
4502
51ebd318
ND
4503 rtnh = rtnh_next(rtnh, &remaining);
4504 }
4505
4506 return last_err;
4507}
4508
c21ef3e3
DA
4509static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4510 struct netlink_ext_ack *extack)
1da177e4 4511{
86872cb5
TG
4512 struct fib6_config cfg;
4513 int err;
1da177e4 4514
333c4301 4515 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4516 if (err < 0)
4517 return err;
4518
51ebd318 4519 if (cfg.fc_mp)
333c4301 4520 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4521 else {
4522 cfg.fc_delete_all_nh = 1;
333c4301 4523 return ip6_route_del(&cfg, extack);
0ae81335 4524 }
1da177e4
LT
4525}
4526
c21ef3e3
DA
4527static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4528 struct netlink_ext_ack *extack)
1da177e4 4529{
86872cb5
TG
4530 struct fib6_config cfg;
4531 int err;
1da177e4 4532
333c4301 4533 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4534 if (err < 0)
4535 return err;
4536
67f69513
DA
4537 if (cfg.fc_metric == 0)
4538 cfg.fc_metric = IP6_RT_PRIO_USER;
4539
51ebd318 4540 if (cfg.fc_mp)
333c4301 4541 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4542 else
acb54e3c 4543 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4544}
4545
8d1c802b 4546static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4547{
beb1afac
DA
4548 int nexthop_len = 0;
4549
93c2fb25 4550 if (rt->fib6_nsiblings) {
beb1afac
DA
4551 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4552 + NLA_ALIGN(sizeof(struct rtnexthop))
4553 + nla_total_size(16) /* RTA_GATEWAY */
ad1601ae 4554 + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
beb1afac 4555
93c2fb25 4556 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4557 }
4558
339bf98f
TG
4559 return NLMSG_ALIGN(sizeof(struct rtmsg))
4560 + nla_total_size(16) /* RTA_SRC */
4561 + nla_total_size(16) /* RTA_DST */
4562 + nla_total_size(16) /* RTA_GATEWAY */
4563 + nla_total_size(16) /* RTA_PREFSRC */
4564 + nla_total_size(4) /* RTA_TABLE */
4565 + nla_total_size(4) /* RTA_IIF */
4566 + nla_total_size(4) /* RTA_OIF */
4567 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4568 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4569 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4570 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4571 + nla_total_size(1) /* RTA_PREF */
ad1601ae 4572 + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
beb1afac
DA
4573 + nexthop_len;
4574}
4575
d4ead6b3 4576static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4577 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4578 struct in6_addr *dest, struct in6_addr *src,
15e47304 4579 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4580 unsigned int flags)
1da177e4 4581{
22d0bd82
XL
4582 struct rt6_info *rt6 = (struct rt6_info *)dst;
4583 struct rt6key *rt6_dst, *rt6_src;
4584 u32 *pmetrics, table, rt6_flags;
2d7202bf 4585 struct nlmsghdr *nlh;
22d0bd82 4586 struct rtmsg *rtm;
d4ead6b3 4587 long expires = 0;
1da177e4 4588
15e47304 4589 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4590 if (!nlh)
26932566 4591 return -EMSGSIZE;
2d7202bf 4592
22d0bd82
XL
4593 if (rt6) {
4594 rt6_dst = &rt6->rt6i_dst;
4595 rt6_src = &rt6->rt6i_src;
4596 rt6_flags = rt6->rt6i_flags;
4597 } else {
4598 rt6_dst = &rt->fib6_dst;
4599 rt6_src = &rt->fib6_src;
4600 rt6_flags = rt->fib6_flags;
4601 }
4602
2d7202bf 4603 rtm = nlmsg_data(nlh);
1da177e4 4604 rtm->rtm_family = AF_INET6;
22d0bd82
XL
4605 rtm->rtm_dst_len = rt6_dst->plen;
4606 rtm->rtm_src_len = rt6_src->plen;
1da177e4 4607 rtm->rtm_tos = 0;
93c2fb25
DA
4608 if (rt->fib6_table)
4609 table = rt->fib6_table->tb6_id;
c71099ac 4610 else
9e762a4a 4611 table = RT6_TABLE_UNSPEC;
97f0082a 4612 rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
c78679e8
DM
4613 if (nla_put_u32(skb, RTA_TABLE, table))
4614 goto nla_put_failure;
e8478e80
DA
4615
4616 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4617 rtm->rtm_flags = 0;
4618 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4619 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4620
22d0bd82 4621 if (rt6_flags & RTF_CACHE)
1da177e4
LT
4622 rtm->rtm_flags |= RTM_F_CLONED;
4623
d4ead6b3
DA
4624 if (dest) {
4625 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4626 goto nla_put_failure;
1ab1457c 4627 rtm->rtm_dst_len = 128;
1da177e4 4628 } else if (rtm->rtm_dst_len)
22d0bd82 4629 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
c78679e8 4630 goto nla_put_failure;
1da177e4
LT
4631#ifdef CONFIG_IPV6_SUBTREES
4632 if (src) {
930345ea 4633 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4634 goto nla_put_failure;
1ab1457c 4635 rtm->rtm_src_len = 128;
c78679e8 4636 } else if (rtm->rtm_src_len &&
22d0bd82 4637 nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
c78679e8 4638 goto nla_put_failure;
1da177e4 4639#endif
7bc570c8
YH
4640 if (iif) {
4641#ifdef CONFIG_IPV6_MROUTE
22d0bd82 4642 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
fd61c6ba
DA
4643 int err = ip6mr_get_route(net, skb, rtm, portid);
4644
4645 if (err == 0)
4646 return 0;
4647 if (err < 0)
4648 goto nla_put_failure;
7bc570c8
YH
4649 } else
4650#endif
c78679e8
DM
4651 if (nla_put_u32(skb, RTA_IIF, iif))
4652 goto nla_put_failure;
d4ead6b3 4653 } else if (dest) {
1da177e4 4654 struct in6_addr saddr_buf;
d4ead6b3 4655 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4656 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4657 goto nla_put_failure;
1da177e4 4658 }
2d7202bf 4659
93c2fb25 4660 if (rt->fib6_prefsrc.plen) {
c3968a85 4661 struct in6_addr saddr_buf;
93c2fb25 4662 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4663 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4664 goto nla_put_failure;
c3968a85
DW
4665 }
4666
d4ead6b3
DA
4667 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4668 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4669 goto nla_put_failure;
4670
93c2fb25 4671 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 4672 goto nla_put_failure;
8253947e 4673
beb1afac
DA
4674 /* For multipath routes, walk the siblings list and add
4675 * each as a nexthop within RTA_MULTIPATH.
4676 */
22d0bd82
XL
4677 if (rt6) {
4678 if (rt6_flags & RTF_GATEWAY &&
4679 nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4680 goto nla_put_failure;
4681
4682 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4683 goto nla_put_failure;
4684 } else if (rt->fib6_nsiblings) {
8d1c802b 4685 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
4686 struct nlattr *mp;
4687
4688 mp = nla_nest_start(skb, RTA_MULTIPATH);
4689 if (!mp)
4690 goto nla_put_failure;
4691
c0a72077
DA
4692 if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common,
4693 rt->fib6_nh.fib_nh_weight) < 0)
beb1afac
DA
4694 goto nla_put_failure;
4695
4696 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 4697 &rt->fib6_siblings, fib6_siblings) {
c0a72077
DA
4698 if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common,
4699 sibling->fib6_nh.fib_nh_weight) < 0)
beb1afac
DA
4700 goto nla_put_failure;
4701 }
4702
4703 nla_nest_end(skb, mp);
4704 } else {
c0a72077
DA
4705 if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
4706 &rtm->rtm_flags, false) < 0)
beb1afac
DA
4707 goto nla_put_failure;
4708 }
4709
22d0bd82 4710 if (rt6_flags & RTF_EXPIRES) {
14895687
DA
4711 expires = dst ? dst->expires : rt->expires;
4712 expires -= jiffies;
4713 }
69cdf8f9 4714
d4ead6b3 4715 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4716 goto nla_put_failure;
2d7202bf 4717
22d0bd82 4718 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
c78ba6d6
LR
4719 goto nla_put_failure;
4720
19e42e45 4721
053c095a
JB
4722 nlmsg_end(skb, nlh);
4723 return 0;
2d7202bf
TG
4724
4725nla_put_failure:
26932566
PM
4726 nlmsg_cancel(skb, nlh);
4727 return -EMSGSIZE;
1da177e4
LT
4728}
4729
13e38901
DA
4730static bool fib6_info_uses_dev(const struct fib6_info *f6i,
4731 const struct net_device *dev)
4732{
ad1601ae 4733 if (f6i->fib6_nh.fib_nh_dev == dev)
13e38901
DA
4734 return true;
4735
4736 if (f6i->fib6_nsiblings) {
4737 struct fib6_info *sibling, *next_sibling;
4738
4739 list_for_each_entry_safe(sibling, next_sibling,
4740 &f6i->fib6_siblings, fib6_siblings) {
ad1601ae 4741 if (sibling->fib6_nh.fib_nh_dev == dev)
13e38901
DA
4742 return true;
4743 }
4744 }
4745
4746 return false;
4747}
4748
8d1c802b 4749int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4750{
4751 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
13e38901
DA
4752 struct fib_dump_filter *filter = &arg->filter;
4753 unsigned int flags = NLM_F_MULTI;
1f17e2f2
DA
4754 struct net *net = arg->net;
4755
421842ed 4756 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4757 return 0;
1da177e4 4758
13e38901
DA
4759 if ((filter->flags & RTM_F_PREFIX) &&
4760 !(rt->fib6_flags & RTF_PREFIX_RT)) {
4761 /* success since this is not a prefix route */
4762 return 1;
4763 }
4764 if (filter->filter_set) {
4765 if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
4766 (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
4767 (filter->protocol && rt->fib6_protocol != filter->protocol)) {
f8cfe2ce
DA
4768 return 1;
4769 }
13e38901 4770 flags |= NLM_F_DUMP_FILTERED;
f8cfe2ce 4771 }
1da177e4 4772
d4ead6b3
DA
4773 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4774 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
13e38901 4775 arg->cb->nlh->nlmsg_seq, flags);
1da177e4
LT
4776}
4777
0eff0a27
JK
4778static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
4779 const struct nlmsghdr *nlh,
4780 struct nlattr **tb,
4781 struct netlink_ext_ack *extack)
4782{
4783 struct rtmsg *rtm;
4784 int i, err;
4785
4786 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
4787 NL_SET_ERR_MSG_MOD(extack,
4788 "Invalid header for get route request");
4789 return -EINVAL;
4790 }
4791
4792 if (!netlink_strict_get_check(skb))
4793 return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
4794 rtm_ipv6_policy, extack);
4795
4796 rtm = nlmsg_data(nlh);
4797 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
4798 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
4799 rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
4800 rtm->rtm_type) {
4801 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
4802 return -EINVAL;
4803 }
4804 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
4805 NL_SET_ERR_MSG_MOD(extack,
4806 "Invalid flags for get route request");
4807 return -EINVAL;
4808 }
4809
4810 err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
4811 rtm_ipv6_policy, extack);
4812 if (err)
4813 return err;
4814
4815 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
4816 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
4817 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
4818 return -EINVAL;
4819 }
4820
4821 for (i = 0; i <= RTA_MAX; i++) {
4822 if (!tb[i])
4823 continue;
4824
4825 switch (i) {
4826 case RTA_SRC:
4827 case RTA_DST:
4828 case RTA_IIF:
4829 case RTA_OIF:
4830 case RTA_MARK:
4831 case RTA_UID:
4832 case RTA_SPORT:
4833 case RTA_DPORT:
4834 case RTA_IP_PROTO:
4835 break;
4836 default:
4837 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
4838 return -EINVAL;
4839 }
4840 }
4841
4842 return 0;
4843}
4844
c21ef3e3
DA
4845static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4846 struct netlink_ext_ack *extack)
1da177e4 4847{
3b1e0a65 4848 struct net *net = sock_net(in_skb->sk);
ab364a6f 4849 struct nlattr *tb[RTA_MAX+1];
18c3a61c 4850 int err, iif = 0, oif = 0;
a68886a6 4851 struct fib6_info *from;
18c3a61c 4852 struct dst_entry *dst;
ab364a6f 4853 struct rt6_info *rt;
1da177e4 4854 struct sk_buff *skb;
ab364a6f 4855 struct rtmsg *rtm;
744486d4 4856 struct flowi6 fl6 = {};
18c3a61c 4857 bool fibmatch;
1da177e4 4858
0eff0a27 4859 err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
ab364a6f
TG
4860 if (err < 0)
4861 goto errout;
1da177e4 4862
ab364a6f 4863 err = -EINVAL;
38b7097b
HFS
4864 rtm = nlmsg_data(nlh);
4865 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4866 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4867
ab364a6f
TG
4868 if (tb[RTA_SRC]) {
4869 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4870 goto errout;
4871
4e3fd7a0 4872 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4873 }
4874
4875 if (tb[RTA_DST]) {
4876 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4877 goto errout;
4878
4e3fd7a0 4879 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4880 }
4881
4882 if (tb[RTA_IIF])
4883 iif = nla_get_u32(tb[RTA_IIF]);
4884
4885 if (tb[RTA_OIF])
72331bc0 4886 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4887
2e47b291
LC
4888 if (tb[RTA_MARK])
4889 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4890
622ec2c9
LC
4891 if (tb[RTA_UID])
4892 fl6.flowi6_uid = make_kuid(current_user_ns(),
4893 nla_get_u32(tb[RTA_UID]));
4894 else
4895 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4896
eacb9384
RP
4897 if (tb[RTA_SPORT])
4898 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4899
4900 if (tb[RTA_DPORT])
4901 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4902
4903 if (tb[RTA_IP_PROTO]) {
4904 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
5e1a99ea
HL
4905 &fl6.flowi6_proto, AF_INET6,
4906 extack);
eacb9384
RP
4907 if (err)
4908 goto errout;
4909 }
4910
1da177e4
LT
4911 if (iif) {
4912 struct net_device *dev;
72331bc0
SL
4913 int flags = 0;
4914
121622db
FW
4915 rcu_read_lock();
4916
4917 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4918 if (!dev) {
121622db 4919 rcu_read_unlock();
1da177e4 4920 err = -ENODEV;
ab364a6f 4921 goto errout;
1da177e4 4922 }
72331bc0
SL
4923
4924 fl6.flowi6_iif = iif;
4925
4926 if (!ipv6_addr_any(&fl6.saddr))
4927 flags |= RT6_LOOKUP_F_HAS_SADDR;
4928
b75cc8f9 4929 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4930
4931 rcu_read_unlock();
72331bc0
SL
4932 } else {
4933 fl6.flowi6_oif = oif;
4934
58acfd71 4935 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4936 }
4937
18c3a61c
RP
4938
4939 rt = container_of(dst, struct rt6_info, dst);
4940 if (rt->dst.error) {
4941 err = rt->dst.error;
4942 ip6_rt_put(rt);
4943 goto errout;
1da177e4
LT
4944 }
4945
9d6acb3b
WC
4946 if (rt == net->ipv6.ip6_null_entry) {
4947 err = rt->dst.error;
4948 ip6_rt_put(rt);
4949 goto errout;
4950 }
4951
ab364a6f 4952 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4953 if (!skb) {
94e187c0 4954 ip6_rt_put(rt);
ab364a6f
TG
4955 err = -ENOBUFS;
4956 goto errout;
4957 }
1da177e4 4958
d8d1f30b 4959 skb_dst_set(skb, &rt->dst);
a68886a6
DA
4960
4961 rcu_read_lock();
4962 from = rcu_dereference(rt->from);
4963
18c3a61c 4964 if (fibmatch)
a68886a6 4965 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
18c3a61c
RP
4966 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4967 nlh->nlmsg_seq, 0);
4968 else
a68886a6
DA
4969 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
4970 &fl6.saddr, iif, RTM_NEWROUTE,
d4ead6b3
DA
4971 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4972 0);
a68886a6
DA
4973 rcu_read_unlock();
4974
1da177e4 4975 if (err < 0) {
ab364a6f
TG
4976 kfree_skb(skb);
4977 goto errout;
1da177e4
LT
4978 }
4979
15e47304 4980 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4981errout:
1da177e4 4982 return err;
1da177e4
LT
4983}
4984
8d1c802b 4985void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 4986 unsigned int nlm_flags)
1da177e4
LT
4987{
4988 struct sk_buff *skb;
5578689a 4989 struct net *net = info->nl_net;
528c4ceb
DL
4990 u32 seq;
4991 int err;
4992
4993 err = -ENOBUFS;
38308473 4994 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4995
19e42e45 4996 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4997 if (!skb)
21713ebc
TG
4998 goto errout;
4999
d4ead6b3
DA
5000 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5001 event, info->portid, seq, nlm_flags);
26932566
PM
5002 if (err < 0) {
5003 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5004 WARN_ON(err == -EMSGSIZE);
5005 kfree_skb(skb);
5006 goto errout;
5007 }
15e47304 5008 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
5009 info->nlh, gfp_any());
5010 return;
21713ebc
TG
5011errout:
5012 if (err < 0)
5578689a 5013 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
5014}
5015
8ed67789 5016static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 5017 unsigned long event, void *ptr)
8ed67789 5018{
351638e7 5019 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 5020 struct net *net = dev_net(dev);
8ed67789 5021
242d3a49
WC
5022 if (!(dev->flags & IFF_LOOPBACK))
5023 return NOTIFY_OK;
5024
5025 if (event == NETDEV_REGISTER) {
ad1601ae 5026 net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
d8d1f30b 5027 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
5028 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5029#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 5030 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 5031 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 5032 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 5033 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 5034#endif
76da0704
WC
5035 } else if (event == NETDEV_UNREGISTER &&
5036 dev->reg_state != NETREG_UNREGISTERED) {
5037 /* NETDEV_UNREGISTER could be fired for multiple times by
5038 * netdev_wait_allrefs(). Make sure we only call this once.
5039 */
12d94a80 5040 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 5041#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
5042 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5043 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
5044#endif
5045 }
5046
5047 return NOTIFY_OK;
5048}
5049
1da177e4
LT
5050/*
5051 * /proc
5052 */
5053
5054#ifdef CONFIG_PROC_FS
1da177e4
LT
5055static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5056{
69ddb805 5057 struct net *net = (struct net *)seq->private;
1da177e4 5058 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
5059 net->ipv6.rt6_stats->fib_nodes,
5060 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 5061 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
5062 net->ipv6.rt6_stats->fib_rt_entries,
5063 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 5064 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 5065 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
5066
5067 return 0;
5068}
1da177e4
LT
5069#endif /* CONFIG_PROC_FS */
5070
5071#ifdef CONFIG_SYSCTL
5072
1da177e4 5073static
fe2c6338 5074int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
5075 void __user *buffer, size_t *lenp, loff_t *ppos)
5076{
c486da34
LAG
5077 struct net *net;
5078 int delay;
f0fb9b28 5079 int ret;
c486da34 5080 if (!write)
1da177e4 5081 return -EINVAL;
c486da34
LAG
5082
5083 net = (struct net *)ctl->extra1;
5084 delay = net->ipv6.sysctl.flush_delay;
f0fb9b28
AP
5085 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5086 if (ret)
5087 return ret;
5088
2ac3ac8f 5089 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 5090 return 0;
1da177e4
LT
5091}
5092
7c6bb7d2
DA
5093static int zero;
5094static int one = 1;
5095
ed792e28 5096static struct ctl_table ipv6_route_table_template[] = {
1ab1457c 5097 {
1da177e4 5098 .procname = "flush",
4990509f 5099 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 5100 .maxlen = sizeof(int),
89c8b3a1 5101 .mode = 0200,
6d9f239a 5102 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
5103 },
5104 {
1da177e4 5105 .procname = "gc_thresh",
9a7ec3a9 5106 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
5107 .maxlen = sizeof(int),
5108 .mode = 0644,
6d9f239a 5109 .proc_handler = proc_dointvec,
1da177e4
LT
5110 },
5111 {
1da177e4 5112 .procname = "max_size",
4990509f 5113 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
5114 .maxlen = sizeof(int),
5115 .mode = 0644,
6d9f239a 5116 .proc_handler = proc_dointvec,
1da177e4
LT
5117 },
5118 {
1da177e4 5119 .procname = "gc_min_interval",
4990509f 5120 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5121 .maxlen = sizeof(int),
5122 .mode = 0644,
6d9f239a 5123 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5124 },
5125 {
1da177e4 5126 .procname = "gc_timeout",
4990509f 5127 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5128 .maxlen = sizeof(int),
5129 .mode = 0644,
6d9f239a 5130 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5131 },
5132 {
1da177e4 5133 .procname = "gc_interval",
4990509f 5134 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5135 .maxlen = sizeof(int),
5136 .mode = 0644,
6d9f239a 5137 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5138 },
5139 {
1da177e4 5140 .procname = "gc_elasticity",
4990509f 5141 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5142 .maxlen = sizeof(int),
5143 .mode = 0644,
f3d3f616 5144 .proc_handler = proc_dointvec,
1da177e4
LT
5145 },
5146 {
1da177e4 5147 .procname = "mtu_expires",
4990509f 5148 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5149 .maxlen = sizeof(int),
5150 .mode = 0644,
6d9f239a 5151 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5152 },
5153 {
1da177e4 5154 .procname = "min_adv_mss",
4990509f 5155 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5156 .maxlen = sizeof(int),
5157 .mode = 0644,
f3d3f616 5158 .proc_handler = proc_dointvec,
1da177e4
LT
5159 },
5160 {
1da177e4 5161 .procname = "gc_min_interval_ms",
4990509f 5162 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5163 .maxlen = sizeof(int),
5164 .mode = 0644,
6d9f239a 5165 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5166 },
7c6bb7d2
DA
5167 {
5168 .procname = "skip_notify_on_dev_down",
5169 .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5170 .maxlen = sizeof(int),
5171 .mode = 0644,
5172 .proc_handler = proc_dointvec,
5173 .extra1 = &zero,
5174 .extra2 = &one,
5175 },
f8572d8f 5176 { }
1da177e4
LT
5177};
5178
2c8c1e72 5179struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5180{
5181 struct ctl_table *table;
5182
5183 table = kmemdup(ipv6_route_table_template,
5184 sizeof(ipv6_route_table_template),
5185 GFP_KERNEL);
5ee09105
YH
5186
5187 if (table) {
5188 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5189 table[0].extra1 = net;
86393e52 5190 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5191 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5192 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5193 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5194 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5195 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5196 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5197 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5198 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
7c6bb7d2 5199 table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
464dc801
EB
5200
5201 /* Don't export sysctls to unprivileged users */
5202 if (net->user_ns != &init_user_ns)
5203 table[0].procname = NULL;
5ee09105
YH
5204 }
5205
760f2d01
DL
5206 return table;
5207}
1da177e4
LT
5208#endif
5209
2c8c1e72 5210static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5211{
633d424b 5212 int ret = -ENOMEM;
8ed67789 5213
86393e52
AD
5214 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5215 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5216
fc66f95c
ED
5217 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5218 goto out_ip6_dst_ops;
5219
421842ed
DA
5220 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5221 sizeof(*net->ipv6.fib6_null_entry),
5222 GFP_KERNEL);
5223 if (!net->ipv6.fib6_null_entry)
5224 goto out_ip6_dst_entries;
5225
8ed67789
DL
5226 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5227 sizeof(*net->ipv6.ip6_null_entry),
5228 GFP_KERNEL);
5229 if (!net->ipv6.ip6_null_entry)
421842ed 5230 goto out_fib6_null_entry;
d8d1f30b 5231 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5232 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5233 ip6_template_metrics, true);
8ed67789
DL
5234
5235#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5236 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5237 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5238 sizeof(*net->ipv6.ip6_prohibit_entry),
5239 GFP_KERNEL);
68fffc67
PZ
5240 if (!net->ipv6.ip6_prohibit_entry)
5241 goto out_ip6_null_entry;
d8d1f30b 5242 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5243 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5244 ip6_template_metrics, true);
8ed67789
DL
5245
5246 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5247 sizeof(*net->ipv6.ip6_blk_hole_entry),
5248 GFP_KERNEL);
68fffc67
PZ
5249 if (!net->ipv6.ip6_blk_hole_entry)
5250 goto out_ip6_prohibit_entry;
d8d1f30b 5251 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5252 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5253 ip6_template_metrics, true);
8ed67789
DL
5254#endif
5255
b339a47c
PZ
5256 net->ipv6.sysctl.flush_delay = 0;
5257 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5258 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5259 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5260 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5261 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5262 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5263 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
7c6bb7d2 5264 net->ipv6.sysctl.skip_notify_on_dev_down = 0;
b339a47c 5265
6891a346
BT
5266 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5267
8ed67789
DL
5268 ret = 0;
5269out:
5270 return ret;
f2fc6a54 5271
68fffc67
PZ
5272#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5273out_ip6_prohibit_entry:
5274 kfree(net->ipv6.ip6_prohibit_entry);
5275out_ip6_null_entry:
5276 kfree(net->ipv6.ip6_null_entry);
5277#endif
421842ed
DA
5278out_fib6_null_entry:
5279 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5280out_ip6_dst_entries:
5281 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5282out_ip6_dst_ops:
f2fc6a54 5283 goto out;
cdb18761
DL
5284}
5285
2c8c1e72 5286static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5287{
421842ed 5288 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5289 kfree(net->ipv6.ip6_null_entry);
5290#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5291 kfree(net->ipv6.ip6_prohibit_entry);
5292 kfree(net->ipv6.ip6_blk_hole_entry);
5293#endif
41bb78b4 5294 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5295}
5296
d189634e
TG
5297static int __net_init ip6_route_net_init_late(struct net *net)
5298{
5299#ifdef CONFIG_PROC_FS
c3506372
CH
5300 proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5301 sizeof(struct ipv6_route_iter));
3617d949
CH
5302 proc_create_net_single("rt6_stats", 0444, net->proc_net,
5303 rt6_stats_seq_show, NULL);
d189634e
TG
5304#endif
5305 return 0;
5306}
5307
5308static void __net_exit ip6_route_net_exit_late(struct net *net)
5309{
5310#ifdef CONFIG_PROC_FS
ece31ffd
G
5311 remove_proc_entry("ipv6_route", net->proc_net);
5312 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5313#endif
5314}
5315
cdb18761
DL
5316static struct pernet_operations ip6_route_net_ops = {
5317 .init = ip6_route_net_init,
5318 .exit = ip6_route_net_exit,
5319};
5320
c3426b47
DM
5321static int __net_init ipv6_inetpeer_init(struct net *net)
5322{
5323 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5324
5325 if (!bp)
5326 return -ENOMEM;
5327 inet_peer_base_init(bp);
5328 net->ipv6.peers = bp;
5329 return 0;
5330}
5331
5332static void __net_exit ipv6_inetpeer_exit(struct net *net)
5333{
5334 struct inet_peer_base *bp = net->ipv6.peers;
5335
5336 net->ipv6.peers = NULL;
56a6b248 5337 inetpeer_invalidate_tree(bp);
c3426b47
DM
5338 kfree(bp);
5339}
5340
2b823f72 5341static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5342 .init = ipv6_inetpeer_init,
5343 .exit = ipv6_inetpeer_exit,
5344};
5345
d189634e
TG
5346static struct pernet_operations ip6_route_net_late_ops = {
5347 .init = ip6_route_net_init_late,
5348 .exit = ip6_route_net_exit_late,
5349};
5350
8ed67789
DL
5351static struct notifier_block ip6_route_dev_notifier = {
5352 .notifier_call = ip6_route_dev_notify,
242d3a49 5353 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5354};
5355
2f460933
WC
5356void __init ip6_route_init_special_entries(void)
5357{
5358 /* Registering of the loopback is done before this portion of code,
5359 * the loopback reference in rt6_info will not be taken, do it
5360 * manually for init_net */
ad1601ae 5361 init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
2f460933
WC
5362 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5363 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5364 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5365 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5366 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5367 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5368 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5369 #endif
5370}
5371
433d49c3 5372int __init ip6_route_init(void)
1da177e4 5373{
433d49c3 5374 int ret;
8d0b94af 5375 int cpu;
433d49c3 5376
9a7ec3a9
DL
5377 ret = -ENOMEM;
5378 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5379 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5380 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5381 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5382 goto out;
14e50e57 5383
fc66f95c 5384 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5385 if (ret)
bdb3289f 5386 goto out_kmem_cache;
bdb3289f 5387
c3426b47
DM
5388 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5389 if (ret)
e8803b6c 5390 goto out_dst_entries;
2a0c451a 5391
7e52b33b
DM
5392 ret = register_pernet_subsys(&ip6_route_net_ops);
5393 if (ret)
5394 goto out_register_inetpeer;
c3426b47 5395
5dc121e9
AE
5396 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5397
e8803b6c 5398 ret = fib6_init();
433d49c3 5399 if (ret)
8ed67789 5400 goto out_register_subsys;
433d49c3 5401
433d49c3
DL
5402 ret = xfrm6_init();
5403 if (ret)
e8803b6c 5404 goto out_fib6_init;
c35b7e72 5405
433d49c3
DL
5406 ret = fib6_rules_init();
5407 if (ret)
5408 goto xfrm6_init;
7e5449c2 5409
d189634e
TG
5410 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5411 if (ret)
5412 goto fib6_rules_init;
5413
16feebcf
FW
5414 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5415 inet6_rtm_newroute, NULL, 0);
5416 if (ret < 0)
5417 goto out_register_late_subsys;
5418
5419 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5420 inet6_rtm_delroute, NULL, 0);
5421 if (ret < 0)
5422 goto out_register_late_subsys;
5423
5424 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5425 inet6_rtm_getroute, NULL,
5426 RTNL_FLAG_DOIT_UNLOCKED);
5427 if (ret < 0)
d189634e 5428 goto out_register_late_subsys;
c127ea2c 5429
8ed67789 5430 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5431 if (ret)
d189634e 5432 goto out_register_late_subsys;
8ed67789 5433
8d0b94af
MKL
5434 for_each_possible_cpu(cpu) {
5435 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5436
5437 INIT_LIST_HEAD(&ul->head);
5438 spin_lock_init(&ul->lock);
5439 }
5440
433d49c3
DL
5441out:
5442 return ret;
5443
d189634e 5444out_register_late_subsys:
16feebcf 5445 rtnl_unregister_all(PF_INET6);
d189634e 5446 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5447fib6_rules_init:
433d49c3
DL
5448 fib6_rules_cleanup();
5449xfrm6_init:
433d49c3 5450 xfrm6_fini();
2a0c451a
TG
5451out_fib6_init:
5452 fib6_gc_cleanup();
8ed67789
DL
5453out_register_subsys:
5454 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5455out_register_inetpeer:
5456 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5457out_dst_entries:
5458 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5459out_kmem_cache:
f2fc6a54 5460 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5461 goto out;
1da177e4
LT
5462}
5463
5464void ip6_route_cleanup(void)
5465{
8ed67789 5466 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5467 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5468 fib6_rules_cleanup();
1da177e4 5469 xfrm6_fini();
1da177e4 5470 fib6_gc_cleanup();
c3426b47 5471 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5472 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5473 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5474 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5475}