ipv6: Add neighbor helpers that use the ipv6 stub
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
eacb9384 66#include <net/ip.h>
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
30d444d3
DA
73static int ip6_rt_type_to_error(u8 fib6_type);
74
75#define CREATE_TRACE_POINTS
76#include <trace/events/fib6.h>
77EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78#undef CREATE_TRACE_POINTS
79
afc154e9 80enum rt6_nud_state {
7e980569
JB
81 RT6_NUD_FAIL_HARD = -3,
82 RT6_NUD_FAIL_PROBE = -2,
83 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
84 RT6_NUD_SUCCEED = 1
85};
86
1da177e4 87static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 88static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 89static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
569d3645 94static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
95
96static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 97static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 98static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 99static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 100static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
101static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102 struct sk_buff *skb, u32 mtu);
103static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
104 struct sk_buff *skb);
8d1c802b
DA
105static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
106static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 107static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 108 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 109 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
110 int iif, int type, u32 portid, u32 seq,
111 unsigned int flags);
8d1c802b 112static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
113 struct in6_addr *daddr,
114 struct in6_addr *saddr);
1da177e4 115
70ceb4f5 116#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 117static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 118 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
119 const struct in6_addr *gwaddr,
120 struct net_device *dev,
95c96174 121 unsigned int pref);
8d1c802b 122static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 123 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
124 const struct in6_addr *gwaddr,
125 struct net_device *dev);
70ceb4f5
YH
126#endif
127
8d0b94af
MKL
128struct uncached_list {
129 spinlock_t lock;
130 struct list_head head;
131};
132
133static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
134
510c321b 135void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
136{
137 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
138
8d0b94af
MKL
139 rt->rt6i_uncached_list = ul;
140
141 spin_lock_bh(&ul->lock);
142 list_add_tail(&rt->rt6i_uncached, &ul->head);
143 spin_unlock_bh(&ul->lock);
144}
145
510c321b 146void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
147{
148 if (!list_empty(&rt->rt6i_uncached)) {
149 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 150 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
151
152 spin_lock_bh(&ul->lock);
153 list_del(&rt->rt6i_uncached);
81eb8447 154 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
155 spin_unlock_bh(&ul->lock);
156 }
157}
158
159static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
160{
161 struct net_device *loopback_dev = net->loopback_dev;
162 int cpu;
163
e332bc67
EB
164 if (dev == loopback_dev)
165 return;
166
8d0b94af
MKL
167 for_each_possible_cpu(cpu) {
168 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
169 struct rt6_info *rt;
170
171 spin_lock_bh(&ul->lock);
172 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
173 struct inet6_dev *rt_idev = rt->rt6i_idev;
174 struct net_device *rt_dev = rt->dst.dev;
175
e332bc67 176 if (rt_idev->dev == dev) {
8d0b94af
MKL
177 rt->rt6i_idev = in6_dev_get(loopback_dev);
178 in6_dev_put(rt_idev);
179 }
180
e332bc67 181 if (rt_dev == dev) {
8d0b94af
MKL
182 rt->dst.dev = loopback_dev;
183 dev_hold(rt->dst.dev);
184 dev_put(rt_dev);
185 }
186 }
187 spin_unlock_bh(&ul->lock);
188 }
189}
190
f8a1b43b 191static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
192 struct sk_buff *skb,
193 const void *daddr)
39232973 194{
a7563f34 195 if (!ipv6_addr_any(p))
39232973 196 return (const void *) p;
f894cbf8
DM
197 else if (skb)
198 return &ipv6_hdr(skb)->daddr;
39232973
DM
199 return daddr;
200}
201
f8a1b43b
DA
202struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
203 struct net_device *dev,
204 struct sk_buff *skb,
205 const void *daddr)
d3aaeb38 206{
39232973
DM
207 struct neighbour *n;
208
f8a1b43b
DA
209 daddr = choose_neigh_daddr(gw, skb, daddr);
210 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
211 if (n)
212 return n;
7adf3246
SB
213
214 n = neigh_create(&nd_tbl, daddr, dev);
215 return IS_ERR(n) ? NULL : n;
f8a1b43b
DA
216}
217
218static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
219 struct sk_buff *skb,
220 const void *daddr)
221{
222 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
223
224 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
225}
226
63fca65d
JA
227static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
228{
229 struct net_device *dev = dst->dev;
230 struct rt6_info *rt = (struct rt6_info *)dst;
231
f8a1b43b 232 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
233 if (!daddr)
234 return;
235 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
236 return;
237 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
238 return;
239 __ipv6_confirm_neigh(dev, daddr);
240}
241
9a7ec3a9 242static struct dst_ops ip6_dst_ops_template = {
1da177e4 243 .family = AF_INET6,
1da177e4
LT
244 .gc = ip6_dst_gc,
245 .gc_thresh = 1024,
246 .check = ip6_dst_check,
0dbaee3b 247 .default_advmss = ip6_default_advmss,
ebb762f2 248 .mtu = ip6_mtu,
d4ead6b3 249 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
250 .destroy = ip6_dst_destroy,
251 .ifdown = ip6_dst_ifdown,
252 .negative_advice = ip6_negative_advice,
253 .link_failure = ip6_link_failure,
254 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 255 .redirect = rt6_do_redirect,
9f8955cc 256 .local_out = __ip6_local_out,
f8a1b43b 257 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 258 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
259};
260
ebb762f2 261static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 262{
618f9bc7
SK
263 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
264
265 return mtu ? : dst->dev->mtu;
ec831ea7
RD
266}
267
6700c270
DM
268static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
269 struct sk_buff *skb, u32 mtu)
14e50e57
DM
270{
271}
272
6700c270
DM
273static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
274 struct sk_buff *skb)
b587ee3b
DM
275{
276}
277
14e50e57
DM
278static struct dst_ops ip6_dst_blackhole_ops = {
279 .family = AF_INET6,
14e50e57
DM
280 .destroy = ip6_dst_destroy,
281 .check = ip6_dst_check,
ebb762f2 282 .mtu = ip6_blackhole_mtu,
214f45c9 283 .default_advmss = ip6_default_advmss,
14e50e57 284 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 285 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 286 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 287 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
288};
289
62fa8a84 290static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 291 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
292};
293
8d1c802b 294static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
295 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
296 .fib6_protocol = RTPROT_KERNEL,
297 .fib6_metric = ~(u32)0,
298 .fib6_ref = ATOMIC_INIT(1),
421842ed
DA
299 .fib6_type = RTN_UNREACHABLE,
300 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
301};
302
fb0af4c7 303static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
304 .dst = {
305 .__refcnt = ATOMIC_INIT(1),
306 .__use = 1,
2c20cbd7 307 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 308 .error = -ENETUNREACH,
d8d1f30b
CG
309 .input = ip6_pkt_discard,
310 .output = ip6_pkt_discard_out,
1da177e4
LT
311 },
312 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
313};
314
101367c2
TG
315#ifdef CONFIG_IPV6_MULTIPLE_TABLES
316
fb0af4c7 317static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
318 .dst = {
319 .__refcnt = ATOMIC_INIT(1),
320 .__use = 1,
2c20cbd7 321 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 322 .error = -EACCES,
d8d1f30b
CG
323 .input = ip6_pkt_prohibit,
324 .output = ip6_pkt_prohibit_out,
101367c2
TG
325 },
326 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
327};
328
fb0af4c7 329static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
330 .dst = {
331 .__refcnt = ATOMIC_INIT(1),
332 .__use = 1,
2c20cbd7 333 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 334 .error = -EINVAL,
d8d1f30b 335 .input = dst_discard,
ede2059d 336 .output = dst_discard_out,
101367c2
TG
337 },
338 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
339};
340
341#endif
342
ebfa45f0
MKL
343static void rt6_info_init(struct rt6_info *rt)
344{
345 struct dst_entry *dst = &rt->dst;
346
347 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
348 INIT_LIST_HEAD(&rt->rt6i_uncached);
349}
350
1da177e4 351/* allocate dst with ip6_dst_ops */
93531c67
DA
352struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
353 int flags)
1da177e4 354{
97bab73f 355 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 356 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 357
81eb8447 358 if (rt) {
ebfa45f0 359 rt6_info_init(rt);
81eb8447
WW
360 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
361 }
8104891b 362
cf911662 363 return rt;
1da177e4 364}
9ab179d8 365EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 366
1da177e4
LT
367static void ip6_dst_destroy(struct dst_entry *dst)
368{
369 struct rt6_info *rt = (struct rt6_info *)dst;
a68886a6 370 struct fib6_info *from;
8d0b94af 371 struct inet6_dev *idev;
1da177e4 372
1620a336 373 ip_dst_metrics_put(dst);
8d0b94af
MKL
374 rt6_uncached_list_del(rt);
375
376 idev = rt->rt6i_idev;
38308473 377 if (idev) {
1da177e4
LT
378 rt->rt6i_idev = NULL;
379 in6_dev_put(idev);
1ab1457c 380 }
1716a961 381
a68886a6
DA
382 rcu_read_lock();
383 from = rcu_dereference(rt->from);
384 rcu_assign_pointer(rt->from, NULL);
93531c67 385 fib6_info_release(from);
a68886a6 386 rcu_read_unlock();
b3419363
DM
387}
388
1da177e4
LT
389static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
390 int how)
391{
392 struct rt6_info *rt = (struct rt6_info *)dst;
393 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 394 struct net_device *loopback_dev =
c346dca1 395 dev_net(dev)->loopback_dev;
1da177e4 396
e5645f51
WW
397 if (idev && idev->dev != loopback_dev) {
398 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
399 if (loopback_idev) {
400 rt->rt6i_idev = loopback_idev;
401 in6_dev_put(idev);
97cac082 402 }
1da177e4
LT
403 }
404}
405
5973fb1e
MKL
406static bool __rt6_check_expired(const struct rt6_info *rt)
407{
408 if (rt->rt6i_flags & RTF_EXPIRES)
409 return time_after(jiffies, rt->dst.expires);
410 else
411 return false;
412}
413
a50feda5 414static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 415{
a68886a6
DA
416 struct fib6_info *from;
417
418 from = rcu_dereference(rt->from);
419
1716a961
G
420 if (rt->rt6i_flags & RTF_EXPIRES) {
421 if (time_after(jiffies, rt->dst.expires))
a50feda5 422 return true;
a68886a6 423 } else if (from) {
1e2ea8ad 424 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 425 fib6_check_expired(from);
1716a961 426 }
a50feda5 427 return false;
1da177e4
LT
428}
429
3b290a31
DA
430struct fib6_info *fib6_multipath_select(const struct net *net,
431 struct fib6_info *match,
432 struct flowi6 *fl6, int oif,
433 const struct sk_buff *skb,
434 int strict)
51ebd318 435{
8d1c802b 436 struct fib6_info *sibling, *next_sibling;
51ebd318 437
b673d6cc
JS
438 /* We might have already computed the hash for ICMPv6 errors. In such
439 * case it will always be non-zero. Otherwise now is the time to do it.
440 */
441 if (!fl6->mp_hash)
b4bac172 442 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 443
ad1601ae 444 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
3d709f69
IS
445 return match;
446
93c2fb25
DA
447 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
448 fib6_siblings) {
5e670d84
DA
449 int nh_upper_bound;
450
ad1601ae 451 nh_upper_bound = atomic_read(&sibling->fib6_nh.fib_nh_upper_bound);
5e670d84 452 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
453 continue;
454 if (rt6_score_route(sibling, oif, strict) < 0)
455 break;
456 match = sibling;
457 break;
458 }
459
51ebd318
ND
460 return match;
461}
462
1da177e4 463/*
66f5d6ce 464 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
465 */
466
8d1c802b
DA
467static inline struct fib6_info *rt6_device_match(struct net *net,
468 struct fib6_info *rt,
b71d1d42 469 const struct in6_addr *saddr,
1da177e4 470 int oif,
d420895e 471 int flags)
1da177e4 472{
8d1c802b 473 struct fib6_info *sprt;
1da177e4 474
5e670d84 475 if (!oif && ipv6_addr_any(saddr) &&
ad1601ae 476 !(rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD))
8067bb8c 477 return rt;
dd3abc4e 478
8fb11a9a 479 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
ad1601ae 480 const struct net_device *dev = sprt->fib6_nh.fib_nh_dev;
dd3abc4e 481
ad1601ae 482 if (sprt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
8067bb8c
IS
483 continue;
484
dd3abc4e 485 if (oif) {
1da177e4
LT
486 if (dev->ifindex == oif)
487 return sprt;
dd3abc4e
YH
488 } else {
489 if (ipv6_chk_addr(net, saddr, dev,
490 flags & RT6_LOOKUP_F_IFACE))
491 return sprt;
1da177e4 492 }
dd3abc4e 493 }
1da177e4 494
eea68cd3
DA
495 if (oif && flags & RT6_LOOKUP_F_IFACE)
496 return net->ipv6.fib6_null_entry;
8067bb8c 497
ad1601ae 498 return rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
499}
500
27097255 501#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
502struct __rt6_probe_work {
503 struct work_struct work;
504 struct in6_addr target;
505 struct net_device *dev;
506};
507
508static void rt6_probe_deferred(struct work_struct *w)
509{
510 struct in6_addr mcaddr;
511 struct __rt6_probe_work *work =
512 container_of(w, struct __rt6_probe_work, work);
513
514 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 515 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 516 dev_put(work->dev);
662f5533 517 kfree(work);
c2f17e82
HFS
518}
519
8d1c802b 520static void rt6_probe(struct fib6_info *rt)
27097255 521{
f547fac6 522 struct __rt6_probe_work *work = NULL;
5e670d84 523 const struct in6_addr *nh_gw;
f2c31e32 524 struct neighbour *neigh;
5e670d84 525 struct net_device *dev;
f547fac6 526 struct inet6_dev *idev;
5e670d84 527
27097255
YH
528 /*
529 * Okay, this does not seem to be appropriate
530 * for now, however, we need to check if it
531 * is really so; aka Router Reachability Probing.
532 *
533 * Router Reachability Probe MUST be rate-limited
534 * to no more than one per minute.
535 */
2b2450ca 536 if (!rt || !rt->fib6_nh.fib_nh_has_gw)
7ff74a59 537 return;
5e670d84 538
ad1601ae
DA
539 nh_gw = &rt->fib6_nh.fib_nh_gw6;
540 dev = rt->fib6_nh.fib_nh_dev;
2152caea 541 rcu_read_lock_bh();
f547fac6 542 idev = __in6_dev_get(dev);
5e670d84 543 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 544 if (neigh) {
8d6c31bf
MKL
545 if (neigh->nud_state & NUD_VALID)
546 goto out;
547
2152caea 548 write_lock(&neigh->lock);
990edb42
MKL
549 if (!(neigh->nud_state & NUD_VALID) &&
550 time_after(jiffies,
dcd1f572 551 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
552 work = kmalloc(sizeof(*work), GFP_ATOMIC);
553 if (work)
554 __neigh_set_probe_once(neigh);
c2f17e82 555 }
2152caea 556 write_unlock(&neigh->lock);
f547fac6
SD
557 } else if (time_after(jiffies, rt->last_probe +
558 idev->cnf.rtr_probe_interval)) {
990edb42 559 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 560 }
990edb42
MKL
561
562 if (work) {
f547fac6 563 rt->last_probe = jiffies;
990edb42 564 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
565 work->target = *nh_gw;
566 dev_hold(dev);
567 work->dev = dev;
990edb42
MKL
568 schedule_work(&work->work);
569 }
570
8d6c31bf 571out:
2152caea 572 rcu_read_unlock_bh();
27097255
YH
573}
574#else
8d1c802b 575static inline void rt6_probe(struct fib6_info *rt)
27097255 576{
27097255
YH
577}
578#endif
579
1da177e4 580/*
554cfb7e 581 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 582 */
8d1c802b 583static inline int rt6_check_dev(struct fib6_info *rt, int oif)
554cfb7e 584{
ad1601ae 585 const struct net_device *dev = rt->fib6_nh.fib_nh_dev;
5e670d84 586
161980f4 587 if (!oif || dev->ifindex == oif)
554cfb7e 588 return 2;
161980f4 589 return 0;
554cfb7e 590}
1da177e4 591
8d1c802b 592static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
1da177e4 593{
afc154e9 594 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 595 struct neighbour *neigh;
f2c31e32 596
93c2fb25 597 if (rt->fib6_flags & RTF_NONEXTHOP ||
2b2450ca 598 !rt->fib6_nh.fib_nh_has_gw)
afc154e9 599 return RT6_NUD_SUCCEED;
145a3621
YH
600
601 rcu_read_lock_bh();
ad1601ae
DA
602 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.fib_nh_dev,
603 &rt->fib6_nh.fib_nh_gw6);
145a3621
YH
604 if (neigh) {
605 read_lock(&neigh->lock);
554cfb7e 606 if (neigh->nud_state & NUD_VALID)
afc154e9 607 ret = RT6_NUD_SUCCEED;
398bcbeb 608#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 609 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 610 ret = RT6_NUD_SUCCEED;
7e980569
JB
611 else
612 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 613#endif
145a3621 614 read_unlock(&neigh->lock);
afc154e9
HFS
615 } else {
616 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 617 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 618 }
145a3621
YH
619 rcu_read_unlock_bh();
620
a5a81f0b 621 return ret;
1da177e4
LT
622}
623
8d1c802b 624static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
1da177e4 625{
a5a81f0b 626 int m;
1ab1457c 627
4d0c5911 628 m = rt6_check_dev(rt, oif);
77d16f45 629 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 630 return RT6_NUD_FAIL_HARD;
ebacaaa0 631#ifdef CONFIG_IPV6_ROUTER_PREF
93c2fb25 632 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
ebacaaa0 633#endif
afc154e9
HFS
634 if (strict & RT6_LOOKUP_F_REACHABLE) {
635 int n = rt6_check_neigh(rt);
636 if (n < 0)
637 return n;
638 }
554cfb7e
YH
639 return m;
640}
641
8d1c802b
DA
642static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
643 int *mpri, struct fib6_info *match,
afc154e9 644 bool *do_rr)
554cfb7e 645{
f11e6659 646 int m;
afc154e9 647 bool match_do_rr = false;
35103d11 648
ad1601ae 649 if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
8067bb8c
IS
650 goto out;
651
ad1601ae
DA
652 if (ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev) &&
653 rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 654 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 655 goto out;
f11e6659 656
14895687 657 if (fib6_check_expired(rt))
f11e6659
DM
658 goto out;
659
660 m = rt6_score_route(rt, oif, strict);
7e980569 661 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
662 match_do_rr = true;
663 m = 0; /* lowest valid score */
7e980569 664 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 665 goto out;
afc154e9
HFS
666 }
667
668 if (strict & RT6_LOOKUP_F_REACHABLE)
669 rt6_probe(rt);
f11e6659 670
7e980569 671 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 672 if (m > *mpri) {
afc154e9 673 *do_rr = match_do_rr;
f11e6659
DM
674 *mpri = m;
675 match = rt;
f11e6659 676 }
f11e6659
DM
677out:
678 return match;
679}
680
8d1c802b
DA
681static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
682 struct fib6_info *leaf,
683 struct fib6_info *rr_head,
afc154e9
HFS
684 u32 metric, int oif, int strict,
685 bool *do_rr)
f11e6659 686{
8d1c802b 687 struct fib6_info *rt, *match, *cont;
554cfb7e 688 int mpri = -1;
1da177e4 689
f11e6659 690 match = NULL;
9fbdcfaf 691 cont = NULL;
8fb11a9a 692 for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 693 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
694 cont = rt;
695 break;
696 }
697
698 match = find_match(rt, oif, strict, &mpri, match, do_rr);
699 }
700
66f5d6ce 701 for (rt = leaf; rt && rt != rr_head;
8fb11a9a 702 rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 703 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
704 cont = rt;
705 break;
706 }
707
afc154e9 708 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
709 }
710
711 if (match || !cont)
712 return match;
713
8fb11a9a 714 for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
afc154e9 715 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 716
f11e6659
DM
717 return match;
718}
1da177e4 719
8d1c802b 720static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
8d1040e8 721 int oif, int strict)
f11e6659 722{
8d1c802b
DA
723 struct fib6_info *leaf = rcu_dereference(fn->leaf);
724 struct fib6_info *match, *rt0;
afc154e9 725 bool do_rr = false;
17ecf590 726 int key_plen;
1da177e4 727
421842ed
DA
728 if (!leaf || leaf == net->ipv6.fib6_null_entry)
729 return net->ipv6.fib6_null_entry;
8d1040e8 730
66f5d6ce 731 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 732 if (!rt0)
66f5d6ce 733 rt0 = leaf;
1da177e4 734
17ecf590
WW
735 /* Double check to make sure fn is not an intermediate node
736 * and fn->leaf does not points to its child's leaf
737 * (This might happen if all routes under fn are deleted from
738 * the tree and fib6_repair_tree() is called on the node.)
739 */
93c2fb25 740 key_plen = rt0->fib6_dst.plen;
17ecf590 741#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
742 if (rt0->fib6_src.plen)
743 key_plen = rt0->fib6_src.plen;
17ecf590
WW
744#endif
745 if (fn->fn_bit != key_plen)
421842ed 746 return net->ipv6.fib6_null_entry;
17ecf590 747
93c2fb25 748 match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
afc154e9 749 &do_rr);
1da177e4 750
afc154e9 751 if (do_rr) {
8fb11a9a 752 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
f11e6659 753
554cfb7e 754 /* no entries matched; do round-robin */
93c2fb25 755 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 756 next = leaf;
f11e6659 757
66f5d6ce 758 if (next != rt0) {
93c2fb25 759 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 760 /* make sure next is not being deleted from the tree */
93c2fb25 761 if (next->fib6_node)
66f5d6ce 762 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 763 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 764 }
1da177e4 765 }
1da177e4 766
421842ed 767 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
768}
769
8d1c802b 770static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
8b9df265 771{
2b2450ca 772 return (rt->fib6_flags & RTF_NONEXTHOP) || rt->fib6_nh.fib_nh_has_gw;
8b9df265
MKL
773}
774
70ceb4f5
YH
775#ifdef CONFIG_IPV6_ROUTE_INFO
776int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 777 const struct in6_addr *gwaddr)
70ceb4f5 778{
c346dca1 779 struct net *net = dev_net(dev);
70ceb4f5
YH
780 struct route_info *rinfo = (struct route_info *) opt;
781 struct in6_addr prefix_buf, *prefix;
782 unsigned int pref;
4bed72e4 783 unsigned long lifetime;
8d1c802b 784 struct fib6_info *rt;
70ceb4f5
YH
785
786 if (len < sizeof(struct route_info)) {
787 return -EINVAL;
788 }
789
790 /* Sanity check for prefix_len and length */
791 if (rinfo->length > 3) {
792 return -EINVAL;
793 } else if (rinfo->prefix_len > 128) {
794 return -EINVAL;
795 } else if (rinfo->prefix_len > 64) {
796 if (rinfo->length < 2) {
797 return -EINVAL;
798 }
799 } else if (rinfo->prefix_len > 0) {
800 if (rinfo->length < 1) {
801 return -EINVAL;
802 }
803 }
804
805 pref = rinfo->route_pref;
806 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 807 return -EINVAL;
70ceb4f5 808
4bed72e4 809 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
810
811 if (rinfo->length == 3)
812 prefix = (struct in6_addr *)rinfo->prefix;
813 else {
814 /* this function is safe */
815 ipv6_addr_prefix(&prefix_buf,
816 (struct in6_addr *)rinfo->prefix,
817 rinfo->prefix_len);
818 prefix = &prefix_buf;
819 }
820
f104a567 821 if (rinfo->prefix_len == 0)
afb1d4b5 822 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
823 else
824 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 825 gwaddr, dev);
70ceb4f5
YH
826
827 if (rt && !lifetime) {
afb1d4b5 828 ip6_del_rt(net, rt);
70ceb4f5
YH
829 rt = NULL;
830 }
831
832 if (!rt && lifetime)
830218c1
DA
833 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
834 dev, pref);
70ceb4f5 835 else if (rt)
93c2fb25
DA
836 rt->fib6_flags = RTF_ROUTEINFO |
837 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
838
839 if (rt) {
1716a961 840 if (!addrconf_finite_timeout(lifetime))
14895687 841 fib6_clean_expires(rt);
1716a961 842 else
14895687 843 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 844
93531c67 845 fib6_info_release(rt);
70ceb4f5
YH
846 }
847 return 0;
848}
849#endif
850
ae90d867
DA
851/*
852 * Misc support functions
853 */
854
855/* called with rcu_lock held */
8d1c802b 856static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
ae90d867 857{
ad1601ae 858 struct net_device *dev = rt->fib6_nh.fib_nh_dev;
ae90d867 859
93c2fb25 860 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
861 /* for copies of local routes, dst->dev needs to be the
862 * device if it is a master device, the master device if
863 * device is enslaved, and the loopback as the default
864 */
865 if (netif_is_l3_slave(dev) &&
93c2fb25 866 !rt6_need_strict(&rt->fib6_dst.addr))
ae90d867
DA
867 dev = l3mdev_master_dev_rcu(dev);
868 else if (!netif_is_l3_master(dev))
869 dev = dev_net(dev)->loopback_dev;
870 /* last case is netif_is_l3_master(dev) is true in which
871 * case we want dev returned to be dev
872 */
873 }
874
875 return dev;
876}
877
6edb3c96
DA
878static const int fib6_prop[RTN_MAX + 1] = {
879 [RTN_UNSPEC] = 0,
880 [RTN_UNICAST] = 0,
881 [RTN_LOCAL] = 0,
882 [RTN_BROADCAST] = 0,
883 [RTN_ANYCAST] = 0,
884 [RTN_MULTICAST] = 0,
885 [RTN_BLACKHOLE] = -EINVAL,
886 [RTN_UNREACHABLE] = -EHOSTUNREACH,
887 [RTN_PROHIBIT] = -EACCES,
888 [RTN_THROW] = -EAGAIN,
889 [RTN_NAT] = -EINVAL,
890 [RTN_XRESOLVE] = -EINVAL,
891};
892
893static int ip6_rt_type_to_error(u8 fib6_type)
894{
895 return fib6_prop[fib6_type];
896}
897
8d1c802b 898static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
899{
900 unsigned short flags = 0;
901
902 if (rt->dst_nocount)
903 flags |= DST_NOCOUNT;
904 if (rt->dst_nopolicy)
905 flags |= DST_NOPOLICY;
906 if (rt->dst_host)
907 flags |= DST_HOST;
908
909 return flags;
910}
911
8d1c802b 912static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96
DA
913{
914 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
915
916 switch (ort->fib6_type) {
917 case RTN_BLACKHOLE:
918 rt->dst.output = dst_discard_out;
919 rt->dst.input = dst_discard;
920 break;
921 case RTN_PROHIBIT:
922 rt->dst.output = ip6_pkt_prohibit_out;
923 rt->dst.input = ip6_pkt_prohibit;
924 break;
925 case RTN_THROW:
926 case RTN_UNREACHABLE:
927 default:
928 rt->dst.output = ip6_pkt_discard_out;
929 rt->dst.input = ip6_pkt_discard;
930 break;
931 }
932}
933
8d1c802b 934static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96 935{
93c2fb25 936 if (ort->fib6_flags & RTF_REJECT) {
6edb3c96
DA
937 ip6_rt_init_dst_reject(rt, ort);
938 return;
939 }
940
941 rt->dst.error = 0;
942 rt->dst.output = ip6_output;
943
d23c4b63 944 if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
6edb3c96 945 rt->dst.input = ip6_input;
93c2fb25 946 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
947 rt->dst.input = ip6_mc_input;
948 } else {
949 rt->dst.input = ip6_forward;
950 }
951
ad1601ae
DA
952 if (ort->fib6_nh.fib_nh_lws) {
953 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.fib_nh_lws);
6edb3c96
DA
954 lwtunnel_set_redirect(&rt->dst);
955 }
956
957 rt->dst.lastuse = jiffies;
958}
959
e873e4b9 960/* Caller must already hold reference to @from */
8d1c802b 961static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 962{
ae90d867 963 rt->rt6i_flags &= ~RTF_EXPIRES;
a68886a6 964 rcu_assign_pointer(rt->from, from);
e1255ed4 965 ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
ae90d867
DA
966}
967
e873e4b9 968/* Caller must already hold reference to @ort */
8d1c802b 969static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
ae90d867 970{
dcd1f572
DA
971 struct net_device *dev = fib6_info_nh_dev(ort);
972
6edb3c96
DA
973 ip6_rt_init_dst(rt, ort);
974
93c2fb25 975 rt->rt6i_dst = ort->fib6_dst;
dcd1f572 976 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
93c2fb25 977 rt->rt6i_flags = ort->fib6_flags;
2b2450ca 978 if (ort->fib6_nh.fib_nh_has_gw) {
ad1601ae 979 rt->rt6i_gateway = ort->fib6_nh.fib_nh_gw6;
2b2450ca
DA
980 rt->rt6i_flags |= RTF_GATEWAY;
981 }
ae90d867 982 rt6_set_from(rt, ort);
ae90d867 983#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 984 rt->rt6i_src = ort->fib6_src;
ae90d867 985#endif
ae90d867
DA
986}
987
a3c00e46
MKL
988static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
989 struct in6_addr *saddr)
990{
66f5d6ce 991 struct fib6_node *pn, *sn;
a3c00e46
MKL
992 while (1) {
993 if (fn->fn_flags & RTN_TL_ROOT)
994 return NULL;
66f5d6ce
WW
995 pn = rcu_dereference(fn->parent);
996 sn = FIB6_SUBTREE(pn);
997 if (sn && sn != fn)
6454743b 998 fn = fib6_node_lookup(sn, NULL, saddr);
a3c00e46
MKL
999 else
1000 fn = pn;
1001 if (fn->fn_flags & RTN_RTINFO)
1002 return fn;
1003 }
1004}
c71099ac 1005
10585b43 1006static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
d3843fe5
WW
1007{
1008 struct rt6_info *rt = *prt;
1009
1010 if (dst_hold_safe(&rt->dst))
1011 return true;
10585b43 1012 if (net) {
d3843fe5
WW
1013 rt = net->ipv6.ip6_null_entry;
1014 dst_hold(&rt->dst);
1015 } else {
1016 rt = NULL;
1017 }
1018 *prt = rt;
1019 return false;
1020}
1021
dec9b0e2 1022/* called with rcu_lock held */
8d1c802b 1023static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
dec9b0e2 1024{
3b6761d1 1025 unsigned short flags = fib6_info_dst_flags(rt);
ad1601ae 1026 struct net_device *dev = rt->fib6_nh.fib_nh_dev;
dec9b0e2
DA
1027 struct rt6_info *nrt;
1028
e873e4b9 1029 if (!fib6_info_hold_safe(rt))
1c87e79a 1030 goto fallback;
e873e4b9 1031
93531c67 1032 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1c87e79a 1033 if (!nrt) {
e873e4b9 1034 fib6_info_release(rt);
1c87e79a
XL
1035 goto fallback;
1036 }
dec9b0e2 1037
1c87e79a
XL
1038 ip6_rt_copy_init(nrt, rt);
1039 return nrt;
1040
1041fallback:
1042 nrt = dev_net(dev)->ipv6.ip6_null_entry;
1043 dst_hold(&nrt->dst);
dec9b0e2
DA
1044 return nrt;
1045}
1046
8ed67789
DL
1047static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1048 struct fib6_table *table,
b75cc8f9
DA
1049 struct flowi6 *fl6,
1050 const struct sk_buff *skb,
1051 int flags)
1da177e4 1052{
8d1c802b 1053 struct fib6_info *f6i;
1da177e4 1054 struct fib6_node *fn;
23fb93a4 1055 struct rt6_info *rt;
1da177e4 1056
b6cdbc85
DA
1057 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1058 flags &= ~RT6_LOOKUP_F_IFACE;
1059
66f5d6ce 1060 rcu_read_lock();
6454743b 1061 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1062restart:
23fb93a4
DA
1063 f6i = rcu_dereference(fn->leaf);
1064 if (!f6i) {
1065 f6i = net->ipv6.fib6_null_entry;
66f5d6ce 1066 } else {
23fb93a4 1067 f6i = rt6_device_match(net, f6i, &fl6->saddr,
66f5d6ce 1068 fl6->flowi6_oif, flags);
93c2fb25 1069 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
3b290a31
DA
1070 f6i = fib6_multipath_select(net, f6i, fl6,
1071 fl6->flowi6_oif, skb,
1072 flags);
66f5d6ce 1073 }
23fb93a4 1074 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1075 fn = fib6_backtrack(fn, &fl6->saddr);
1076 if (fn)
1077 goto restart;
1078 }
2b760fcf 1079
d4bea421 1080 trace_fib6_table_lookup(net, f6i, table, fl6);
d3843fe5 1081
2b760fcf 1082 /* Search through exception table */
23fb93a4
DA
1083 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1084 if (rt) {
10585b43 1085 if (ip6_hold_safe(net, &rt))
dec9b0e2 1086 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1087 } else if (f6i == net->ipv6.fib6_null_entry) {
dec9b0e2
DA
1088 rt = net->ipv6.ip6_null_entry;
1089 dst_hold(&rt->dst);
23fb93a4
DA
1090 } else {
1091 rt = ip6_create_rt_rcu(f6i);
dec9b0e2 1092 }
b811580d 1093
66f5d6ce 1094 rcu_read_unlock();
b811580d 1095
c71099ac 1096 return rt;
c71099ac
TG
1097}
1098
67ba4152 1099struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1100 const struct sk_buff *skb, int flags)
ea6e574e 1101{
b75cc8f9 1102 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1103}
1104EXPORT_SYMBOL_GPL(ip6_route_lookup);
1105
9acd9f3a 1106struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1107 const struct in6_addr *saddr, int oif,
1108 const struct sk_buff *skb, int strict)
c71099ac 1109{
4c9483b2
DM
1110 struct flowi6 fl6 = {
1111 .flowi6_oif = oif,
1112 .daddr = *daddr,
c71099ac
TG
1113 };
1114 struct dst_entry *dst;
77d16f45 1115 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1116
adaa70bb 1117 if (saddr) {
4c9483b2 1118 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1119 flags |= RT6_LOOKUP_F_HAS_SADDR;
1120 }
1121
b75cc8f9 1122 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1123 if (dst->error == 0)
1124 return (struct rt6_info *) dst;
1125
1126 dst_release(dst);
1127
1da177e4
LT
1128 return NULL;
1129}
7159039a
YH
1130EXPORT_SYMBOL(rt6_lookup);
1131
c71099ac 1132/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1133 * It takes new route entry, the addition fails by any reason the
1134 * route is released.
1135 * Caller must hold dst before calling it.
1da177e4
LT
1136 */
1137
8d1c802b 1138static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1139 struct netlink_ext_ack *extack)
1da177e4
LT
1140{
1141 int err;
c71099ac 1142 struct fib6_table *table;
1da177e4 1143
93c2fb25 1144 table = rt->fib6_table;
66f5d6ce 1145 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1146 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1147 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1148
1149 return err;
1150}
1151
8d1c802b 1152int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1153{
afb1d4b5 1154 struct nl_info info = { .nl_net = net, };
e715b6d3 1155
d4ead6b3 1156 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1157}
1158
8d1c802b 1159static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
8b9df265
MKL
1160 const struct in6_addr *daddr,
1161 const struct in6_addr *saddr)
1da177e4 1162{
4832c30d 1163 struct net_device *dev;
1da177e4
LT
1164 struct rt6_info *rt;
1165
1166 /*
1167 * Clone the route.
1168 */
1169
e873e4b9
WW
1170 if (!fib6_info_hold_safe(ort))
1171 return NULL;
1172
4832c30d 1173 dev = ip6_rt_get_dev_rcu(ort);
93531c67 1174 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
e873e4b9
WW
1175 if (!rt) {
1176 fib6_info_release(ort);
83a09abd 1177 return NULL;
e873e4b9 1178 }
83a09abd
MKL
1179
1180 ip6_rt_copy_init(rt, ort);
1181 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1182 rt->dst.flags |= DST_HOST;
1183 rt->rt6i_dst.addr = *daddr;
1184 rt->rt6i_dst.plen = 128;
1da177e4 1185
83a09abd 1186 if (!rt6_is_gw_or_nonexthop(ort)) {
93c2fb25
DA
1187 if (ort->fib6_dst.plen != 128 &&
1188 ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
83a09abd 1189 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1190#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1191 if (rt->rt6i_src.plen && saddr) {
1192 rt->rt6i_src.addr = *saddr;
1193 rt->rt6i_src.plen = 128;
8b9df265 1194 }
83a09abd 1195#endif
95a9a5ba 1196 }
1da177e4 1197
95a9a5ba
YH
1198 return rt;
1199}
1da177e4 1200
8d1c802b 1201static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
d52d3997 1202{
3b6761d1 1203 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1204 struct net_device *dev;
d52d3997
MKL
1205 struct rt6_info *pcpu_rt;
1206
e873e4b9
WW
1207 if (!fib6_info_hold_safe(rt))
1208 return NULL;
1209
4832c30d
DA
1210 rcu_read_lock();
1211 dev = ip6_rt_get_dev_rcu(rt);
93531c67 1212 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1213 rcu_read_unlock();
e873e4b9
WW
1214 if (!pcpu_rt) {
1215 fib6_info_release(rt);
d52d3997 1216 return NULL;
e873e4b9 1217 }
d52d3997 1218 ip6_rt_copy_init(pcpu_rt, rt);
d52d3997
MKL
1219 pcpu_rt->rt6i_flags |= RTF_PCPU;
1220 return pcpu_rt;
1221}
1222
66f5d6ce 1223/* It should be called with rcu_read_lock() acquired */
8d1c802b 1224static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
d52d3997 1225{
a73e4195 1226 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1227
1228 p = this_cpu_ptr(rt->rt6i_pcpu);
1229 pcpu_rt = *p;
1230
d4ead6b3 1231 if (pcpu_rt)
10585b43 1232 ip6_hold_safe(NULL, &pcpu_rt);
d3843fe5 1233
a73e4195
MKL
1234 return pcpu_rt;
1235}
1236
afb1d4b5 1237static struct rt6_info *rt6_make_pcpu_route(struct net *net,
8d1c802b 1238 struct fib6_info *rt)
a73e4195
MKL
1239{
1240 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1241
1242 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1243 if (!pcpu_rt) {
9c7370a1
MKL
1244 dst_hold(&net->ipv6.ip6_null_entry->dst);
1245 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1246 }
1247
a94b9367
WW
1248 dst_hold(&pcpu_rt->dst);
1249 p = this_cpu_ptr(rt->rt6i_pcpu);
1250 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1251 BUG_ON(prev);
a94b9367 1252
d52d3997
MKL
1253 return pcpu_rt;
1254}
1255
35732d01
WW
1256/* exception hash table implementation
1257 */
1258static DEFINE_SPINLOCK(rt6_exception_lock);
1259
1260/* Remove rt6_ex from hash table and free the memory
1261 * Caller must hold rt6_exception_lock
1262 */
1263static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1264 struct rt6_exception *rt6_ex)
1265{
f5b51fe8 1266 struct fib6_info *from;
b2427e67 1267 struct net *net;
81eb8447 1268
35732d01
WW
1269 if (!bucket || !rt6_ex)
1270 return;
b2427e67
CIK
1271
1272 net = dev_net(rt6_ex->rt6i->dst.dev);
f5b51fe8
PA
1273 net->ipv6.rt6_stats->fib_rt_cache--;
1274
1275 /* purge completely the exception to allow releasing the held resources:
1276 * some [sk] cache may keep the dst around for unlimited time
1277 */
1278 from = rcu_dereference_protected(rt6_ex->rt6i->from,
1279 lockdep_is_held(&rt6_exception_lock));
1280 rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
1281 fib6_info_release(from);
1282 dst_dev_put(&rt6_ex->rt6i->dst);
1283
35732d01 1284 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1285 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1286 kfree_rcu(rt6_ex, rcu);
1287 WARN_ON_ONCE(!bucket->depth);
1288 bucket->depth--;
1289}
1290
1291/* Remove oldest rt6_ex in bucket and free the memory
1292 * Caller must hold rt6_exception_lock
1293 */
1294static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1295{
1296 struct rt6_exception *rt6_ex, *oldest = NULL;
1297
1298 if (!bucket)
1299 return;
1300
1301 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1302 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1303 oldest = rt6_ex;
1304 }
1305 rt6_remove_exception(bucket, oldest);
1306}
1307
1308static u32 rt6_exception_hash(const struct in6_addr *dst,
1309 const struct in6_addr *src)
1310{
1311 static u32 seed __read_mostly;
1312 u32 val;
1313
1314 net_get_random_once(&seed, sizeof(seed));
1315 val = jhash(dst, sizeof(*dst), seed);
1316
1317#ifdef CONFIG_IPV6_SUBTREES
1318 if (src)
1319 val = jhash(src, sizeof(*src), val);
1320#endif
1321 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1322}
1323
1324/* Helper function to find the cached rt in the hash table
1325 * and update bucket pointer to point to the bucket for this
1326 * (daddr, saddr) pair
1327 * Caller must hold rt6_exception_lock
1328 */
1329static struct rt6_exception *
1330__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1331 const struct in6_addr *daddr,
1332 const struct in6_addr *saddr)
1333{
1334 struct rt6_exception *rt6_ex;
1335 u32 hval;
1336
1337 if (!(*bucket) || !daddr)
1338 return NULL;
1339
1340 hval = rt6_exception_hash(daddr, saddr);
1341 *bucket += hval;
1342
1343 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1344 struct rt6_info *rt6 = rt6_ex->rt6i;
1345 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1346
1347#ifdef CONFIG_IPV6_SUBTREES
1348 if (matched && saddr)
1349 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1350#endif
1351 if (matched)
1352 return rt6_ex;
1353 }
1354 return NULL;
1355}
1356
1357/* Helper function to find the cached rt in the hash table
1358 * and update bucket pointer to point to the bucket for this
1359 * (daddr, saddr) pair
1360 * Caller must hold rcu_read_lock()
1361 */
1362static struct rt6_exception *
1363__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1364 const struct in6_addr *daddr,
1365 const struct in6_addr *saddr)
1366{
1367 struct rt6_exception *rt6_ex;
1368 u32 hval;
1369
1370 WARN_ON_ONCE(!rcu_read_lock_held());
1371
1372 if (!(*bucket) || !daddr)
1373 return NULL;
1374
1375 hval = rt6_exception_hash(daddr, saddr);
1376 *bucket += hval;
1377
1378 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1379 struct rt6_info *rt6 = rt6_ex->rt6i;
1380 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1381
1382#ifdef CONFIG_IPV6_SUBTREES
1383 if (matched && saddr)
1384 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1385#endif
1386 if (matched)
1387 return rt6_ex;
1388 }
1389 return NULL;
1390}
1391
8d1c802b 1392static unsigned int fib6_mtu(const struct fib6_info *rt)
d4ead6b3
DA
1393{
1394 unsigned int mtu;
1395
dcd1f572
DA
1396 if (rt->fib6_pmtu) {
1397 mtu = rt->fib6_pmtu;
1398 } else {
1399 struct net_device *dev = fib6_info_nh_dev(rt);
1400 struct inet6_dev *idev;
1401
1402 rcu_read_lock();
1403 idev = __in6_dev_get(dev);
1404 mtu = idev->cnf.mtu6;
1405 rcu_read_unlock();
1406 }
1407
d4ead6b3
DA
1408 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1409
ad1601ae 1410 return mtu - lwtunnel_headroom(rt->fib6_nh.fib_nh_lws, mtu);
d4ead6b3
DA
1411}
1412
35732d01 1413static int rt6_insert_exception(struct rt6_info *nrt,
8d1c802b 1414 struct fib6_info *ort)
35732d01 1415{
5e670d84 1416 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1417 struct rt6_exception_bucket *bucket;
1418 struct in6_addr *src_key = NULL;
1419 struct rt6_exception *rt6_ex;
1420 int err = 0;
1421
35732d01
WW
1422 spin_lock_bh(&rt6_exception_lock);
1423
1424 if (ort->exception_bucket_flushed) {
1425 err = -EINVAL;
1426 goto out;
1427 }
1428
1429 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1430 lockdep_is_held(&rt6_exception_lock));
1431 if (!bucket) {
1432 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1433 GFP_ATOMIC);
1434 if (!bucket) {
1435 err = -ENOMEM;
1436 goto out;
1437 }
1438 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1439 }
1440
1441#ifdef CONFIG_IPV6_SUBTREES
1442 /* rt6i_src.plen != 0 indicates ort is in subtree
1443 * and exception table is indexed by a hash of
1444 * both rt6i_dst and rt6i_src.
1445 * Otherwise, the exception table is indexed by
1446 * a hash of only rt6i_dst.
1447 */
93c2fb25 1448 if (ort->fib6_src.plen)
35732d01
WW
1449 src_key = &nrt->rt6i_src.addr;
1450#endif
f5bbe7ee
WW
1451 /* rt6_mtu_change() might lower mtu on ort.
1452 * Only insert this exception route if its mtu
1453 * is less than ort's mtu value.
1454 */
d4ead6b3 1455 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1456 err = -EINVAL;
1457 goto out;
1458 }
60006a48 1459
35732d01
WW
1460 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1461 src_key);
1462 if (rt6_ex)
1463 rt6_remove_exception(bucket, rt6_ex);
1464
1465 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1466 if (!rt6_ex) {
1467 err = -ENOMEM;
1468 goto out;
1469 }
1470 rt6_ex->rt6i = nrt;
1471 rt6_ex->stamp = jiffies;
35732d01
WW
1472 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1473 bucket->depth++;
81eb8447 1474 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1475
1476 if (bucket->depth > FIB6_MAX_DEPTH)
1477 rt6_exception_remove_oldest(bucket);
1478
1479out:
1480 spin_unlock_bh(&rt6_exception_lock);
1481
1482 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1483 if (!err) {
93c2fb25 1484 spin_lock_bh(&ort->fib6_table->tb6_lock);
7aef6859 1485 fib6_update_sernum(net, ort);
93c2fb25 1486 spin_unlock_bh(&ort->fib6_table->tb6_lock);
b886d5f2
PA
1487 fib6_force_start_gc(net);
1488 }
35732d01
WW
1489
1490 return err;
1491}
1492
8d1c802b 1493void rt6_flush_exceptions(struct fib6_info *rt)
35732d01
WW
1494{
1495 struct rt6_exception_bucket *bucket;
1496 struct rt6_exception *rt6_ex;
1497 struct hlist_node *tmp;
1498 int i;
1499
1500 spin_lock_bh(&rt6_exception_lock);
1501 /* Prevent rt6_insert_exception() to recreate the bucket list */
1502 rt->exception_bucket_flushed = 1;
1503
1504 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1505 lockdep_is_held(&rt6_exception_lock));
1506 if (!bucket)
1507 goto out;
1508
1509 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1510 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1511 rt6_remove_exception(bucket, rt6_ex);
1512 WARN_ON_ONCE(bucket->depth);
1513 bucket++;
1514 }
1515
1516out:
1517 spin_unlock_bh(&rt6_exception_lock);
1518}
1519
1520/* Find cached rt in the hash table inside passed in rt
1521 * Caller has to hold rcu_read_lock()
1522 */
8d1c802b 1523static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
1524 struct in6_addr *daddr,
1525 struct in6_addr *saddr)
1526{
1527 struct rt6_exception_bucket *bucket;
1528 struct in6_addr *src_key = NULL;
1529 struct rt6_exception *rt6_ex;
1530 struct rt6_info *res = NULL;
1531
1532 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1533
1534#ifdef CONFIG_IPV6_SUBTREES
1535 /* rt6i_src.plen != 0 indicates rt is in subtree
1536 * and exception table is indexed by a hash of
1537 * both rt6i_dst and rt6i_src.
1538 * Otherwise, the exception table is indexed by
1539 * a hash of only rt6i_dst.
1540 */
93c2fb25 1541 if (rt->fib6_src.plen)
35732d01
WW
1542 src_key = saddr;
1543#endif
1544 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1545
1546 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1547 res = rt6_ex->rt6i;
1548
1549 return res;
1550}
1551
1552/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1553static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1554{
35732d01
WW
1555 struct rt6_exception_bucket *bucket;
1556 struct in6_addr *src_key = NULL;
1557 struct rt6_exception *rt6_ex;
8a14e46f 1558 struct fib6_info *from;
35732d01
WW
1559 int err;
1560
091311de 1561 from = rcu_dereference(rt->from);
35732d01 1562 if (!from ||
442d713b 1563 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1564 return -EINVAL;
1565
1566 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1567 return -ENOENT;
1568
1569 spin_lock_bh(&rt6_exception_lock);
1570 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1571 lockdep_is_held(&rt6_exception_lock));
1572#ifdef CONFIG_IPV6_SUBTREES
1573 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1574 * and exception table is indexed by a hash of
1575 * both rt6i_dst and rt6i_src.
1576 * Otherwise, the exception table is indexed by
1577 * a hash of only rt6i_dst.
1578 */
93c2fb25 1579 if (from->fib6_src.plen)
35732d01
WW
1580 src_key = &rt->rt6i_src.addr;
1581#endif
1582 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1583 &rt->rt6i_dst.addr,
1584 src_key);
1585 if (rt6_ex) {
1586 rt6_remove_exception(bucket, rt6_ex);
1587 err = 0;
1588 } else {
1589 err = -ENOENT;
1590 }
1591
1592 spin_unlock_bh(&rt6_exception_lock);
1593 return err;
1594}
1595
1596/* Find rt6_ex which contains the passed in rt cache and
1597 * refresh its stamp
1598 */
1599static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1600{
35732d01
WW
1601 struct rt6_exception_bucket *bucket;
1602 struct in6_addr *src_key = NULL;
1603 struct rt6_exception *rt6_ex;
193f3685 1604 struct fib6_info *from;
35732d01
WW
1605
1606 rcu_read_lock();
193f3685
PA
1607 from = rcu_dereference(rt->from);
1608 if (!from || !(rt->rt6i_flags & RTF_CACHE))
1609 goto unlock;
1610
35732d01
WW
1611 bucket = rcu_dereference(from->rt6i_exception_bucket);
1612
1613#ifdef CONFIG_IPV6_SUBTREES
1614 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1615 * and exception table is indexed by a hash of
1616 * both rt6i_dst and rt6i_src.
1617 * Otherwise, the exception table is indexed by
1618 * a hash of only rt6i_dst.
1619 */
93c2fb25 1620 if (from->fib6_src.plen)
35732d01
WW
1621 src_key = &rt->rt6i_src.addr;
1622#endif
1623 rt6_ex = __rt6_find_exception_rcu(&bucket,
1624 &rt->rt6i_dst.addr,
1625 src_key);
1626 if (rt6_ex)
1627 rt6_ex->stamp = jiffies;
1628
193f3685 1629unlock:
35732d01
WW
1630 rcu_read_unlock();
1631}
1632
e9fa1495
SB
1633static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1634 struct rt6_info *rt, int mtu)
1635{
1636 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1637 * lowest MTU in the path: always allow updating the route PMTU to
1638 * reflect PMTU decreases.
1639 *
1640 * If the new MTU is higher, and the route PMTU is equal to the local
1641 * MTU, this means the old MTU is the lowest in the path, so allow
1642 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1643 * handle this.
1644 */
1645
1646 if (dst_mtu(&rt->dst) >= mtu)
1647 return true;
1648
1649 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1650 return true;
1651
1652 return false;
1653}
1654
1655static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
8d1c802b 1656 struct fib6_info *rt, int mtu)
f5bbe7ee
WW
1657{
1658 struct rt6_exception_bucket *bucket;
1659 struct rt6_exception *rt6_ex;
1660 int i;
1661
1662 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1663 lockdep_is_held(&rt6_exception_lock));
1664
e9fa1495
SB
1665 if (!bucket)
1666 return;
1667
1668 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1669 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1670 struct rt6_info *entry = rt6_ex->rt6i;
1671
1672 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1673 * route), the metrics of its rt->from have already
e9fa1495
SB
1674 * been updated.
1675 */
d4ead6b3 1676 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1677 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1678 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1679 }
e9fa1495 1680 bucket++;
f5bbe7ee
WW
1681 }
1682}
1683
b16cb459
WW
1684#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1685
8d1c802b 1686static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
b16cb459
WW
1687 struct in6_addr *gateway)
1688{
1689 struct rt6_exception_bucket *bucket;
1690 struct rt6_exception *rt6_ex;
1691 struct hlist_node *tmp;
1692 int i;
1693
1694 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1695 return;
1696
1697 spin_lock_bh(&rt6_exception_lock);
1698 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1699 lockdep_is_held(&rt6_exception_lock));
1700
1701 if (bucket) {
1702 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1703 hlist_for_each_entry_safe(rt6_ex, tmp,
1704 &bucket->chain, hlist) {
1705 struct rt6_info *entry = rt6_ex->rt6i;
1706
1707 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1708 RTF_CACHE_GATEWAY &&
1709 ipv6_addr_equal(gateway,
1710 &entry->rt6i_gateway)) {
1711 rt6_remove_exception(bucket, rt6_ex);
1712 }
1713 }
1714 bucket++;
1715 }
1716 }
1717
1718 spin_unlock_bh(&rt6_exception_lock);
1719}
1720
c757faa8
WW
1721static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1722 struct rt6_exception *rt6_ex,
1723 struct fib6_gc_args *gc_args,
1724 unsigned long now)
1725{
1726 struct rt6_info *rt = rt6_ex->rt6i;
1727
1859bac0
PA
1728 /* we are pruning and obsoleting aged-out and non gateway exceptions
1729 * even if others have still references to them, so that on next
1730 * dst_check() such references can be dropped.
1731 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1732 * expired, independently from their aging, as per RFC 8201 section 4
1733 */
31afeb42
WW
1734 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1735 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1736 RT6_TRACE("aging clone %p\n", rt);
1737 rt6_remove_exception(bucket, rt6_ex);
1738 return;
1739 }
1740 } else if (time_after(jiffies, rt->dst.expires)) {
1741 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1742 rt6_remove_exception(bucket, rt6_ex);
1743 return;
31afeb42
WW
1744 }
1745
1746 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1747 struct neighbour *neigh;
1748 __u8 neigh_flags = 0;
1749
1bfa26ff
ED
1750 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1751 if (neigh)
c757faa8 1752 neigh_flags = neigh->flags;
1bfa26ff 1753
c757faa8
WW
1754 if (!(neigh_flags & NTF_ROUTER)) {
1755 RT6_TRACE("purging route %p via non-router but gateway\n",
1756 rt);
1757 rt6_remove_exception(bucket, rt6_ex);
1758 return;
1759 }
1760 }
31afeb42 1761
c757faa8
WW
1762 gc_args->more++;
1763}
1764
8d1c802b 1765void rt6_age_exceptions(struct fib6_info *rt,
c757faa8
WW
1766 struct fib6_gc_args *gc_args,
1767 unsigned long now)
1768{
1769 struct rt6_exception_bucket *bucket;
1770 struct rt6_exception *rt6_ex;
1771 struct hlist_node *tmp;
1772 int i;
1773
1774 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1775 return;
1776
1bfa26ff
ED
1777 rcu_read_lock_bh();
1778 spin_lock(&rt6_exception_lock);
c757faa8
WW
1779 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1780 lockdep_is_held(&rt6_exception_lock));
1781
1782 if (bucket) {
1783 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1784 hlist_for_each_entry_safe(rt6_ex, tmp,
1785 &bucket->chain, hlist) {
1786 rt6_age_examine_exception(bucket, rt6_ex,
1787 gc_args, now);
1788 }
1789 bucket++;
1790 }
1791 }
1bfa26ff
ED
1792 spin_unlock(&rt6_exception_lock);
1793 rcu_read_unlock_bh();
c757faa8
WW
1794}
1795
1d053da9
DA
1796/* must be called with rcu lock held */
1797struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1798 int oif, struct flowi6 *fl6, int strict)
1da177e4 1799{
367efcb9 1800 struct fib6_node *fn, *saved_fn;
8d1c802b 1801 struct fib6_info *f6i;
1da177e4 1802
6454743b 1803 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1804 saved_fn = fn;
1da177e4 1805
ca254490
DA
1806 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1807 oif = 0;
1808
a3c00e46 1809redo_rt6_select:
23fb93a4 1810 f6i = rt6_select(net, fn, oif, strict);
23fb93a4 1811 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1812 fn = fib6_backtrack(fn, &fl6->saddr);
1813 if (fn)
1814 goto redo_rt6_select;
367efcb9
MKL
1815 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1816 /* also consider unreachable route */
1817 strict &= ~RT6_LOOKUP_F_REACHABLE;
1818 fn = saved_fn;
1819 goto redo_rt6_select;
367efcb9 1820 }
a3c00e46
MKL
1821 }
1822
d4bea421 1823 trace_fib6_table_lookup(net, f6i, table, fl6);
fb9de91e 1824
1d053da9
DA
1825 return f6i;
1826}
1827
1828struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1829 int oif, struct flowi6 *fl6,
1830 const struct sk_buff *skb, int flags)
1831{
1832 struct fib6_info *f6i;
1833 struct rt6_info *rt;
1834 int strict = 0;
1835
1836 strict |= flags & RT6_LOOKUP_F_IFACE;
1837 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1838 if (net->ipv6.devconf_all->forwarding == 0)
1839 strict |= RT6_LOOKUP_F_REACHABLE;
1840
1841 rcu_read_lock();
1842
1843 f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1844 if (f6i->fib6_nsiblings)
1845 f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1846
23fb93a4 1847 if (f6i == net->ipv6.fib6_null_entry) {
421842ed 1848 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1849 rcu_read_unlock();
d3843fe5 1850 dst_hold(&rt->dst);
d3843fe5 1851 return rt;
23fb93a4
DA
1852 }
1853
1854 /*Search through exception table */
1855 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1856 if (rt) {
10585b43 1857 if (ip6_hold_safe(net, &rt))
d3843fe5 1858 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1859
66f5d6ce 1860 rcu_read_unlock();
d52d3997 1861 return rt;
3da59bd9 1862 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
2b2450ca 1863 !f6i->fib6_nh.fib_nh_has_gw)) {
3da59bd9
MKL
1864 /* Create a RTF_CACHE clone which will not be
1865 * owned by the fib6 tree. It is for the special case where
1866 * the daddr in the skb during the neighbor look-up is different
1867 * from the fl6->daddr used to look-up route here.
1868 */
3da59bd9
MKL
1869 struct rt6_info *uncached_rt;
1870
23fb93a4 1871 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
d52d3997 1872
4d85cd0c 1873 rcu_read_unlock();
c71099ac 1874
1cfb71ee
WW
1875 if (uncached_rt) {
1876 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1877 * No need for another dst_hold()
1878 */
8d0b94af 1879 rt6_uncached_list_add(uncached_rt);
81eb8447 1880 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1881 } else {
3da59bd9 1882 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1883 dst_hold(&uncached_rt->dst);
1884 }
b811580d 1885
3da59bd9 1886 return uncached_rt;
d52d3997
MKL
1887 } else {
1888 /* Get a percpu copy */
1889
1890 struct rt6_info *pcpu_rt;
1891
951f788a 1892 local_bh_disable();
23fb93a4 1893 pcpu_rt = rt6_get_pcpu_route(f6i);
d52d3997 1894
93531c67
DA
1895 if (!pcpu_rt)
1896 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1897
951f788a
ED
1898 local_bh_enable();
1899 rcu_read_unlock();
d4bea421 1900
d52d3997
MKL
1901 return pcpu_rt;
1902 }
1da177e4 1903}
9ff74384 1904EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1905
b75cc8f9
DA
1906static struct rt6_info *ip6_pol_route_input(struct net *net,
1907 struct fib6_table *table,
1908 struct flowi6 *fl6,
1909 const struct sk_buff *skb,
1910 int flags)
4acad72d 1911{
b75cc8f9 1912 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1913}
1914
d409b847
MB
1915struct dst_entry *ip6_route_input_lookup(struct net *net,
1916 struct net_device *dev,
b75cc8f9
DA
1917 struct flowi6 *fl6,
1918 const struct sk_buff *skb,
1919 int flags)
72331bc0
SL
1920{
1921 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1922 flags |= RT6_LOOKUP_F_IFACE;
1923
b75cc8f9 1924 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1925}
d409b847 1926EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1927
23aebdac 1928static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1929 struct flow_keys *keys,
1930 struct flow_keys *flkeys)
23aebdac
JS
1931{
1932 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1933 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1934 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1935 const struct ipv6hdr *inner_iph;
1936 const struct icmp6hdr *icmph;
1937 struct ipv6hdr _inner_iph;
cea67a2d 1938 struct icmp6hdr _icmph;
23aebdac
JS
1939
1940 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1941 goto out;
1942
cea67a2d
ED
1943 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1944 sizeof(_icmph), &_icmph);
1945 if (!icmph)
1946 goto out;
1947
23aebdac
JS
1948 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1949 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1950 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1951 icmph->icmp6_type != ICMPV6_PARAMPROB)
1952 goto out;
1953
1954 inner_iph = skb_header_pointer(skb,
1955 skb_transport_offset(skb) + sizeof(*icmph),
1956 sizeof(_inner_iph), &_inner_iph);
1957 if (!inner_iph)
1958 goto out;
1959
1960 key_iph = inner_iph;
5e5d6fed 1961 _flkeys = NULL;
23aebdac 1962out:
5e5d6fed
RP
1963 if (_flkeys) {
1964 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1965 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1966 keys->tags.flow_label = _flkeys->tags.flow_label;
1967 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1968 } else {
1969 keys->addrs.v6addrs.src = key_iph->saddr;
1970 keys->addrs.v6addrs.dst = key_iph->daddr;
fa1be7e0 1971 keys->tags.flow_label = ip6_flowlabel(key_iph);
5e5d6fed
RP
1972 keys->basic.ip_proto = key_iph->nexthdr;
1973 }
23aebdac
JS
1974}
1975
1976/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
1977u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1978 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
1979{
1980 struct flow_keys hash_keys;
9a2a537a 1981 u32 mhash;
23aebdac 1982
bbfa047a 1983 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
1984 case 0:
1985 memset(&hash_keys, 0, sizeof(hash_keys));
1986 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1987 if (skb) {
1988 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1989 } else {
1990 hash_keys.addrs.v6addrs.src = fl6->saddr;
1991 hash_keys.addrs.v6addrs.dst = fl6->daddr;
fa1be7e0 1992 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
b4bac172
DA
1993 hash_keys.basic.ip_proto = fl6->flowi6_proto;
1994 }
1995 break;
1996 case 1:
1997 if (skb) {
1998 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1999 struct flow_keys keys;
2000
2001 /* short-circuit if we already have L4 hash present */
2002 if (skb->l4_hash)
2003 return skb_get_hash_raw(skb) >> 1;
2004
2005 memset(&hash_keys, 0, sizeof(hash_keys));
2006
2007 if (!flkeys) {
2008 skb_flow_dissect_flow_keys(skb, &keys, flag);
2009 flkeys = &keys;
2010 }
2011 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2012 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2013 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2014 hash_keys.ports.src = flkeys->ports.src;
2015 hash_keys.ports.dst = flkeys->ports.dst;
2016 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2017 } else {
2018 memset(&hash_keys, 0, sizeof(hash_keys));
2019 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2020 hash_keys.addrs.v6addrs.src = fl6->saddr;
2021 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2022 hash_keys.ports.src = fl6->fl6_sport;
2023 hash_keys.ports.dst = fl6->fl6_dport;
2024 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2025 }
2026 break;
23aebdac 2027 }
9a2a537a 2028 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2029
9a2a537a 2030 return mhash >> 1;
23aebdac
JS
2031}
2032
c71099ac
TG
2033void ip6_route_input(struct sk_buff *skb)
2034{
b71d1d42 2035 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2036 struct net *net = dev_net(skb->dev);
adaa70bb 2037 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2038 struct ip_tunnel_info *tun_info;
4c9483b2 2039 struct flowi6 fl6 = {
e0d56fdd 2040 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2041 .daddr = iph->daddr,
2042 .saddr = iph->saddr,
6502ca52 2043 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2044 .flowi6_mark = skb->mark,
2045 .flowi6_proto = iph->nexthdr,
c71099ac 2046 };
5e5d6fed 2047 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2048
904af04d 2049 tun_info = skb_tunnel_info(skb);
46fa062a 2050 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2051 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2052
2053 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2054 flkeys = &_flkeys;
2055
23aebdac 2056 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2057 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2058 skb_dst_drop(skb);
b75cc8f9
DA
2059 skb_dst_set(skb,
2060 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2061}
2062
b75cc8f9
DA
2063static struct rt6_info *ip6_pol_route_output(struct net *net,
2064 struct fib6_table *table,
2065 struct flowi6 *fl6,
2066 const struct sk_buff *skb,
2067 int flags)
1da177e4 2068{
b75cc8f9 2069 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2070}
2071
6f21c96a
PA
2072struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2073 struct flowi6 *fl6, int flags)
c71099ac 2074{
d46a9d67 2075 bool any_src;
c71099ac 2076
3ede0bbc
RS
2077 if (ipv6_addr_type(&fl6->daddr) &
2078 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
4c1feac5
DA
2079 struct dst_entry *dst;
2080
2081 dst = l3mdev_link_scope_lookup(net, fl6);
2082 if (dst)
2083 return dst;
2084 }
ca254490 2085
1fb9489b 2086 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2087
d46a9d67 2088 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2089 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2090 (fl6->flowi6_oif && any_src))
77d16f45 2091 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2092
d46a9d67 2093 if (!any_src)
adaa70bb 2094 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2095 else if (sk)
2096 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2097
b75cc8f9 2098 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2099}
6f21c96a 2100EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2101
2774c131 2102struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2103{
5c1e6aa3 2104 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2105 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2106 struct dst_entry *new = NULL;
2107
1dbe3252 2108 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2109 DST_OBSOLETE_DEAD, 0);
14e50e57 2110 if (rt) {
0a1f5962 2111 rt6_info_init(rt);
81eb8447 2112 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2113
0a1f5962 2114 new = &rt->dst;
14e50e57 2115 new->__use = 1;
352e512c 2116 new->input = dst_discard;
ede2059d 2117 new->output = dst_discard_out;
14e50e57 2118
0a1f5962 2119 dst_copy_metrics(new, &ort->dst);
14e50e57 2120
1dbe3252 2121 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2122 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2123 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2124
2125 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2126#ifdef CONFIG_IPV6_SUBTREES
2127 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2128#endif
14e50e57
DM
2129 }
2130
69ead7af
DM
2131 dst_release(dst_orig);
2132 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2133}
14e50e57 2134
1da177e4
LT
2135/*
2136 * Destination cache support functions
2137 */
2138
8d1c802b 2139static bool fib6_check(struct fib6_info *f6i, u32 cookie)
4b32b5ad 2140{
93531c67
DA
2141 u32 rt_cookie = 0;
2142
8ae86971 2143 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
93531c67
DA
2144 return false;
2145
2146 if (fib6_check_expired(f6i))
2147 return false;
2148
2149 return true;
4b32b5ad
MKL
2150}
2151
a68886a6
DA
2152static struct dst_entry *rt6_check(struct rt6_info *rt,
2153 struct fib6_info *from,
2154 u32 cookie)
3da59bd9 2155{
36143645 2156 u32 rt_cookie = 0;
c5cff856 2157
a68886a6 2158 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
93531c67 2159 rt_cookie != cookie)
3da59bd9
MKL
2160 return NULL;
2161
2162 if (rt6_check_expired(rt))
2163 return NULL;
2164
2165 return &rt->dst;
2166}
2167
a68886a6
DA
2168static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2169 struct fib6_info *from,
2170 u32 cookie)
3da59bd9 2171{
5973fb1e
MKL
2172 if (!__rt6_check_expired(rt) &&
2173 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2174 fib6_check(from, cookie))
3da59bd9
MKL
2175 return &rt->dst;
2176 else
2177 return NULL;
2178}
2179
1da177e4
LT
2180static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2181{
a87b7dc9 2182 struct dst_entry *dst_ret;
a68886a6 2183 struct fib6_info *from;
1da177e4
LT
2184 struct rt6_info *rt;
2185
a87b7dc9
DA
2186 rt = container_of(dst, struct rt6_info, dst);
2187
2188 rcu_read_lock();
1da177e4 2189
6f3118b5
ND
2190 /* All IPV6 dsts are created with ->obsolete set to the value
2191 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2192 * into this function always.
2193 */
e3bc10bd 2194
a68886a6 2195 from = rcu_dereference(rt->from);
4b32b5ad 2196
a68886a6
DA
2197 if (from && (rt->rt6i_flags & RTF_PCPU ||
2198 unlikely(!list_empty(&rt->rt6i_uncached))))
2199 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2200 else
a68886a6 2201 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2202
2203 rcu_read_unlock();
2204
2205 return dst_ret;
1da177e4
LT
2206}
2207
2208static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2209{
2210 struct rt6_info *rt = (struct rt6_info *) dst;
2211
2212 if (rt) {
54c1a859 2213 if (rt->rt6i_flags & RTF_CACHE) {
c3c14da0 2214 rcu_read_lock();
54c1a859 2215 if (rt6_check_expired(rt)) {
93531c67 2216 rt6_remove_exception_rt(rt);
54c1a859
YH
2217 dst = NULL;
2218 }
c3c14da0 2219 rcu_read_unlock();
54c1a859 2220 } else {
1da177e4 2221 dst_release(dst);
54c1a859
YH
2222 dst = NULL;
2223 }
1da177e4 2224 }
54c1a859 2225 return dst;
1da177e4
LT
2226}
2227
2228static void ip6_link_failure(struct sk_buff *skb)
2229{
2230 struct rt6_info *rt;
2231
3ffe533c 2232 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2233
adf30907 2234 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2235 if (rt) {
8a14e46f 2236 rcu_read_lock();
1eb4f758 2237 if (rt->rt6i_flags & RTF_CACHE) {
761f6026 2238 rt6_remove_exception_rt(rt);
c5cff856 2239 } else {
a68886a6 2240 struct fib6_info *from;
c5cff856
WW
2241 struct fib6_node *fn;
2242
a68886a6
DA
2243 from = rcu_dereference(rt->from);
2244 if (from) {
2245 fn = rcu_dereference(from->fib6_node);
2246 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2247 fn->fn_sernum = -1;
2248 }
1eb4f758 2249 }
8a14e46f 2250 rcu_read_unlock();
1da177e4
LT
2251 }
2252}
2253
6a3e030f
DA
2254static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2255{
a68886a6
DA
2256 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2257 struct fib6_info *from;
2258
2259 rcu_read_lock();
2260 from = rcu_dereference(rt0->from);
2261 if (from)
2262 rt0->dst.expires = from->expires;
2263 rcu_read_unlock();
2264 }
6a3e030f
DA
2265
2266 dst_set_expires(&rt0->dst, timeout);
2267 rt0->rt6i_flags |= RTF_EXPIRES;
2268}
2269
45e4fd26
MKL
2270static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2271{
2272 struct net *net = dev_net(rt->dst.dev);
2273
d4ead6b3 2274 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2275 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2276 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2277}
2278
0d3f6d29
MKL
2279static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2280{
2281 return !(rt->rt6i_flags & RTF_CACHE) &&
1490ed2a 2282 (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
0d3f6d29
MKL
2283}
2284
45e4fd26
MKL
2285static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2286 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2287{
0dec879f 2288 const struct in6_addr *daddr, *saddr;
67ba4152 2289 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2290
19bda36c
XL
2291 if (dst_metric_locked(dst, RTAX_MTU))
2292 return;
2293
0dec879f
JA
2294 if (iph) {
2295 daddr = &iph->daddr;
2296 saddr = &iph->saddr;
2297 } else if (sk) {
2298 daddr = &sk->sk_v6_daddr;
2299 saddr = &inet6_sk(sk)->saddr;
2300 } else {
2301 daddr = NULL;
2302 saddr = NULL;
2303 }
2304 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2305 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2306 if (mtu >= dst_mtu(dst))
2307 return;
9d289715 2308
0d3f6d29 2309 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2310 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2311 /* update rt6_ex->stamp for cache */
2312 if (rt6->rt6i_flags & RTF_CACHE)
2313 rt6_update_exception_stamp_rt(rt6);
0dec879f 2314 } else if (daddr) {
a68886a6 2315 struct fib6_info *from;
45e4fd26
MKL
2316 struct rt6_info *nrt6;
2317
4d85cd0c 2318 rcu_read_lock();
a68886a6
DA
2319 from = rcu_dereference(rt6->from);
2320 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
45e4fd26
MKL
2321 if (nrt6) {
2322 rt6_do_update_pmtu(nrt6, mtu);
a68886a6 2323 if (rt6_insert_exception(nrt6, from))
2b760fcf 2324 dst_release_immediate(&nrt6->dst);
45e4fd26 2325 }
a68886a6 2326 rcu_read_unlock();
1da177e4
LT
2327 }
2328}
2329
45e4fd26
MKL
2330static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2331 struct sk_buff *skb, u32 mtu)
2332{
2333 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2334}
2335
42ae66c8 2336void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2337 int oif, u32 mark, kuid_t uid)
81aded24
DM
2338{
2339 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2340 struct dst_entry *dst;
dc92095d
2341 struct flowi6 fl6 = {
2342 .flowi6_oif = oif,
2343 .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2344 .daddr = iph->daddr,
2345 .saddr = iph->saddr,
2346 .flowlabel = ip6_flowinfo(iph),
2347 .flowi6_uid = uid,
2348 };
81aded24
DM
2349
2350 dst = ip6_route_output(net, NULL, &fl6);
2351 if (!dst->error)
45e4fd26 2352 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2353 dst_release(dst);
2354}
2355EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2356
2357void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2358{
7ddacfa5 2359 int oif = sk->sk_bound_dev_if;
33c162a9
MKL
2360 struct dst_entry *dst;
2361
7ddacfa5
DA
2362 if (!oif && skb->dev)
2363 oif = l3mdev_master_ifindex(skb->dev);
2364
2365 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2366
2367 dst = __sk_dst_get(sk);
2368 if (!dst || !dst->obsolete ||
2369 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2370 return;
2371
2372 bh_lock_sock(sk);
2373 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2374 ip6_datagram_dst_update(sk, false);
2375 bh_unlock_sock(sk);
81aded24
DM
2376}
2377EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2378
7d6850f7
AK
2379void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2380 const struct flowi6 *fl6)
2381{
2382#ifdef CONFIG_IPV6_SUBTREES
2383 struct ipv6_pinfo *np = inet6_sk(sk);
2384#endif
2385
2386 ip6_dst_store(sk, dst,
2387 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2388 &sk->sk_v6_daddr : NULL,
2389#ifdef CONFIG_IPV6_SUBTREES
2390 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2391 &np->saddr :
2392#endif
2393 NULL);
2394}
2395
b55b76b2
DJ
2396/* Handle redirects */
2397struct ip6rd_flowi {
2398 struct flowi6 fl6;
2399 struct in6_addr gateway;
2400};
2401
2402static struct rt6_info *__ip6_route_redirect(struct net *net,
2403 struct fib6_table *table,
2404 struct flowi6 *fl6,
b75cc8f9 2405 const struct sk_buff *skb,
b55b76b2
DJ
2406 int flags)
2407{
2408 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
23fb93a4 2409 struct rt6_info *ret = NULL, *rt_cache;
8d1c802b 2410 struct fib6_info *rt;
b55b76b2
DJ
2411 struct fib6_node *fn;
2412
2413 /* Get the "current" route for this destination and
67c408cf 2414 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2415 *
2416 * RFC 4861 specifies that redirects should only be
2417 * accepted if they come from the nexthop to the target.
2418 * Due to the way the routes are chosen, this notion
2419 * is a bit fuzzy and one might need to check all possible
2420 * routes.
2421 */
2422
66f5d6ce 2423 rcu_read_lock();
6454743b 2424 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
b55b76b2 2425restart:
66f5d6ce 2426 for_each_fib6_node_rt_rcu(fn) {
ad1601ae 2427 if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
8067bb8c 2428 continue;
14895687 2429 if (fib6_check_expired(rt))
b55b76b2 2430 continue;
93c2fb25 2431 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2432 break;
2b2450ca 2433 if (!rt->fib6_nh.fib_nh_has_gw)
b55b76b2 2434 continue;
ad1601ae 2435 if (fl6->flowi6_oif != rt->fib6_nh.fib_nh_dev->ifindex)
b55b76b2 2436 continue;
2b760fcf
WW
2437 /* rt_cache's gateway might be different from its 'parent'
2438 * in the case of an ip redirect.
2439 * So we keep searching in the exception table if the gateway
2440 * is different.
2441 */
ad1601ae 2442 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.fib_nh_gw6)) {
2b760fcf
WW
2443 rt_cache = rt6_find_cached_rt(rt,
2444 &fl6->daddr,
2445 &fl6->saddr);
2446 if (rt_cache &&
2447 ipv6_addr_equal(&rdfl->gateway,
2448 &rt_cache->rt6i_gateway)) {
23fb93a4 2449 ret = rt_cache;
2b760fcf
WW
2450 break;
2451 }
b55b76b2 2452 continue;
2b760fcf 2453 }
b55b76b2
DJ
2454 break;
2455 }
2456
2457 if (!rt)
421842ed 2458 rt = net->ipv6.fib6_null_entry;
93c2fb25 2459 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2460 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2461 goto out;
2462 }
2463
421842ed 2464 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2465 fn = fib6_backtrack(fn, &fl6->saddr);
2466 if (fn)
2467 goto restart;
b55b76b2 2468 }
a3c00e46 2469
b0a1ba59 2470out:
23fb93a4 2471 if (ret)
10585b43 2472 ip6_hold_safe(net, &ret);
23fb93a4
DA
2473 else
2474 ret = ip6_create_rt_rcu(rt);
b55b76b2 2475
66f5d6ce 2476 rcu_read_unlock();
b55b76b2 2477
b65f164d 2478 trace_fib6_table_lookup(net, rt, table, fl6);
23fb93a4 2479 return ret;
b55b76b2
DJ
2480};
2481
2482static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2483 const struct flowi6 *fl6,
2484 const struct sk_buff *skb,
2485 const struct in6_addr *gateway)
b55b76b2
DJ
2486{
2487 int flags = RT6_LOOKUP_F_HAS_SADDR;
2488 struct ip6rd_flowi rdfl;
2489
2490 rdfl.fl6 = *fl6;
2491 rdfl.gateway = *gateway;
2492
b75cc8f9 2493 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2494 flags, __ip6_route_redirect);
2495}
2496
e2d118a1
LC
2497void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2498 kuid_t uid)
3a5ad2ee
DM
2499{
2500 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2501 struct dst_entry *dst;
1f7f10ac
2502 struct flowi6 fl6 = {
2503 .flowi6_iif = LOOPBACK_IFINDEX,
2504 .flowi6_oif = oif,
2505 .flowi6_mark = mark,
2506 .daddr = iph->daddr,
2507 .saddr = iph->saddr,
2508 .flowlabel = ip6_flowinfo(iph),
2509 .flowi6_uid = uid,
2510 };
3a5ad2ee 2511
b75cc8f9 2512 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2513 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2514 dst_release(dst);
2515}
2516EXPORT_SYMBOL_GPL(ip6_redirect);
2517
d456336d 2518void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
c92a59ec
DJ
2519{
2520 const struct ipv6hdr *iph = ipv6_hdr(skb);
2521 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2522 struct dst_entry *dst;
0b26fb17
2523 struct flowi6 fl6 = {
2524 .flowi6_iif = LOOPBACK_IFINDEX,
2525 .flowi6_oif = oif,
0b26fb17
2526 .daddr = msg->dest,
2527 .saddr = iph->daddr,
2528 .flowi6_uid = sock_net_uid(net, NULL),
2529 };
c92a59ec 2530
b75cc8f9 2531 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2532 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2533 dst_release(dst);
2534}
2535
3a5ad2ee
DM
2536void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2537{
e2d118a1
LC
2538 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2539 sk->sk_uid);
3a5ad2ee
DM
2540}
2541EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2542
0dbaee3b 2543static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2544{
0dbaee3b
DM
2545 struct net_device *dev = dst->dev;
2546 unsigned int mtu = dst_mtu(dst);
2547 struct net *net = dev_net(dev);
2548
1da177e4
LT
2549 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2550
5578689a
DL
2551 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2552 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2553
2554 /*
1ab1457c
YH
2555 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2556 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2557 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2558 * rely only on pmtu discovery"
2559 */
2560 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2561 mtu = IPV6_MAXPLEN;
2562 return mtu;
2563}
2564
ebb762f2 2565static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2566{
d33e4553 2567 struct inet6_dev *idev;
d4ead6b3 2568 unsigned int mtu;
4b32b5ad
MKL
2569
2570 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2571 if (mtu)
30f78d8e 2572 goto out;
618f9bc7
SK
2573
2574 mtu = IPV6_MIN_MTU;
d33e4553
DM
2575
2576 rcu_read_lock();
2577 idev = __in6_dev_get(dst->dev);
2578 if (idev)
2579 mtu = idev->cnf.mtu6;
2580 rcu_read_unlock();
2581
30f78d8e 2582out:
14972cbd
RP
2583 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2584
2585 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2586}
2587
901731b8
DA
2588/* MTU selection:
2589 * 1. mtu on route is locked - use it
2590 * 2. mtu from nexthop exception
2591 * 3. mtu from egress device
2592 *
2593 * based on ip6_dst_mtu_forward and exception logic of
2594 * rt6_find_cached_rt; called with rcu_read_lock
2595 */
2596u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2597 struct in6_addr *saddr)
2598{
2599 struct rt6_exception_bucket *bucket;
2600 struct rt6_exception *rt6_ex;
2601 struct in6_addr *src_key;
2602 struct inet6_dev *idev;
2603 u32 mtu = 0;
2604
2605 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2606 mtu = f6i->fib6_pmtu;
2607 if (mtu)
2608 goto out;
2609 }
2610
2611 src_key = NULL;
2612#ifdef CONFIG_IPV6_SUBTREES
2613 if (f6i->fib6_src.plen)
2614 src_key = saddr;
2615#endif
2616
2617 bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2618 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2619 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2620 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2621
2622 if (likely(!mtu)) {
2623 struct net_device *dev = fib6_info_nh_dev(f6i);
2624
2625 mtu = IPV6_MIN_MTU;
2626 idev = __in6_dev_get(dev);
2627 if (idev && idev->cnf.mtu6 > mtu)
2628 mtu = idev->cnf.mtu6;
2629 }
2630
2631 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2632out:
2633 return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2634}
2635
3b00944c 2636struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2637 struct flowi6 *fl6)
1da177e4 2638{
87a11578 2639 struct dst_entry *dst;
1da177e4
LT
2640 struct rt6_info *rt;
2641 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2642 struct net *net = dev_net(dev);
1da177e4 2643
38308473 2644 if (unlikely(!idev))
122bdf67 2645 return ERR_PTR(-ENODEV);
1da177e4 2646
ad706862 2647 rt = ip6_dst_alloc(net, dev, 0);
38308473 2648 if (unlikely(!rt)) {
1da177e4 2649 in6_dev_put(idev);
87a11578 2650 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2651 goto out;
2652 }
2653
8e2ec639 2654 rt->dst.flags |= DST_HOST;
588753f1 2655 rt->dst.input = ip6_input;
8e2ec639 2656 rt->dst.output = ip6_output;
550bab42 2657 rt->rt6i_gateway = fl6->daddr;
87a11578 2658 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2659 rt->rt6i_dst.plen = 128;
2660 rt->rt6i_idev = idev;
14edd87d 2661 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2662
4c981e28 2663 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2664 * do proper release of the net_device
2665 */
2666 rt6_uncached_list_add(rt);
81eb8447 2667 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2668
87a11578
DM
2669 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2670
1da177e4 2671out:
87a11578 2672 return dst;
1da177e4
LT
2673}
2674
569d3645 2675static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2676{
86393e52 2677 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2678 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2679 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2680 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2681 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2682 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2683 int entries;
7019b78e 2684
fc66f95c 2685 entries = dst_entries_get_fast(ops);
49a18d86 2686 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2687 entries <= rt_max_size)
1da177e4
LT
2688 goto out;
2689
6891a346 2690 net->ipv6.ip6_rt_gc_expire++;
14956643 2691 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2692 entries = dst_entries_get_slow(ops);
2693 if (entries < ops->gc_thresh)
7019b78e 2694 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2695out:
7019b78e 2696 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2697 return entries > rt_max_size;
1da177e4
LT
2698}
2699
8c14586f
DA
2700static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2701 struct fib6_config *cfg,
f4797b33
DA
2702 const struct in6_addr *gw_addr,
2703 u32 tbid, int flags)
8c14586f
DA
2704{
2705 struct flowi6 fl6 = {
2706 .flowi6_oif = cfg->fc_ifindex,
2707 .daddr = *gw_addr,
2708 .saddr = cfg->fc_prefsrc,
2709 };
2710 struct fib6_table *table;
2711 struct rt6_info *rt;
8c14586f 2712
f4797b33 2713 table = fib6_get_table(net, tbid);
8c14586f
DA
2714 if (!table)
2715 return NULL;
2716
2717 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2718 flags |= RT6_LOOKUP_F_HAS_SADDR;
2719
f4797b33 2720 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2721 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2722
2723 /* if table lookup failed, fall back to full lookup */
2724 if (rt == net->ipv6.ip6_null_entry) {
2725 ip6_rt_put(rt);
2726 rt = NULL;
2727 }
2728
2729 return rt;
2730}
2731
fc1e64e1
DA
2732static int ip6_route_check_nh_onlink(struct net *net,
2733 struct fib6_config *cfg,
9fbb704c 2734 const struct net_device *dev,
fc1e64e1
DA
2735 struct netlink_ext_ack *extack)
2736{
44750f84 2737 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2738 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2739 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
bf1dc8ba 2740 struct fib6_info *from;
fc1e64e1
DA
2741 struct rt6_info *grt;
2742 int err;
2743
2744 err = 0;
2745 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2746 if (grt) {
bf1dc8ba
PA
2747 rcu_read_lock();
2748 from = rcu_dereference(grt->from);
58e354c0 2749 if (!grt->dst.error &&
4ed591c8 2750 /* ignore match if it is the default route */
bf1dc8ba 2751 from && !ipv6_addr_any(&from->fib6_dst.addr) &&
58e354c0 2752 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2753 NL_SET_ERR_MSG(extack,
2754 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2755 err = -EINVAL;
2756 }
bf1dc8ba 2757 rcu_read_unlock();
fc1e64e1
DA
2758
2759 ip6_rt_put(grt);
2760 }
2761
2762 return err;
2763}
2764
1edce99f
DA
2765static int ip6_route_check_nh(struct net *net,
2766 struct fib6_config *cfg,
2767 struct net_device **_dev,
2768 struct inet6_dev **idev)
2769{
2770 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2771 struct net_device *dev = _dev ? *_dev : NULL;
2772 struct rt6_info *grt = NULL;
2773 int err = -EHOSTUNREACH;
2774
2775 if (cfg->fc_table) {
f4797b33
DA
2776 int flags = RT6_LOOKUP_F_IFACE;
2777
2778 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2779 cfg->fc_table, flags);
1edce99f
DA
2780 if (grt) {
2781 if (grt->rt6i_flags & RTF_GATEWAY ||
2782 (dev && dev != grt->dst.dev)) {
2783 ip6_rt_put(grt);
2784 grt = NULL;
2785 }
2786 }
2787 }
2788
2789 if (!grt)
b75cc8f9 2790 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2791
2792 if (!grt)
2793 goto out;
2794
2795 if (dev) {
2796 if (dev != grt->dst.dev) {
2797 ip6_rt_put(grt);
2798 goto out;
2799 }
2800 } else {
2801 *_dev = dev = grt->dst.dev;
2802 *idev = grt->rt6i_idev;
2803 dev_hold(dev);
2804 in6_dev_hold(grt->rt6i_idev);
2805 }
2806
2807 if (!(grt->rt6i_flags & RTF_GATEWAY))
2808 err = 0;
2809
2810 ip6_rt_put(grt);
2811
2812out:
2813 return err;
2814}
2815
9fbb704c
DA
2816static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2817 struct net_device **_dev, struct inet6_dev **idev,
2818 struct netlink_ext_ack *extack)
2819{
2820 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2821 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2822 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2823 const struct net_device *dev = *_dev;
232378e8 2824 bool need_addr_check = !dev;
9fbb704c
DA
2825 int err = -EINVAL;
2826
2827 /* if gw_addr is local we will fail to detect this in case
2828 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2829 * will return already-added prefix route via interface that
2830 * prefix route was assigned to, which might be non-loopback.
2831 */
232378e8
DA
2832 if (dev &&
2833 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2834 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2835 goto out;
2836 }
2837
2838 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2839 /* IPv6 strictly inhibits using not link-local
2840 * addresses as nexthop address.
2841 * Otherwise, router will not able to send redirects.
2842 * It is very good, but in some (rare!) circumstances
2843 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2844 * some exceptions. --ANK
2845 * We allow IPv4-mapped nexthops to support RFC4798-type
2846 * addressing
2847 */
2848 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2849 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2850 goto out;
2851 }
2852
2853 if (cfg->fc_flags & RTNH_F_ONLINK)
2854 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2855 else
2856 err = ip6_route_check_nh(net, cfg, _dev, idev);
2857
2858 if (err)
2859 goto out;
2860 }
2861
2862 /* reload in case device was changed */
2863 dev = *_dev;
2864
2865 err = -EINVAL;
2866 if (!dev) {
2867 NL_SET_ERR_MSG(extack, "Egress device not specified");
2868 goto out;
2869 } else if (dev->flags & IFF_LOOPBACK) {
2870 NL_SET_ERR_MSG(extack,
2871 "Egress device can not be loopback device for this route");
2872 goto out;
2873 }
232378e8
DA
2874
2875 /* if we did not check gw_addr above, do so now that the
2876 * egress device has been resolved.
2877 */
2878 if (need_addr_check &&
2879 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2880 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2881 goto out;
2882 }
2883
9fbb704c
DA
2884 err = 0;
2885out:
2886 return err;
2887}
2888
83c44251
DA
2889static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
2890{
2891 if ((flags & RTF_REJECT) ||
2892 (dev && (dev->flags & IFF_LOOPBACK) &&
2893 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2894 !(flags & RTF_LOCAL)))
2895 return true;
2896
2897 return false;
2898}
2899
2900int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
2901 struct fib6_config *cfg, gfp_t gfp_flags,
2902 struct netlink_ext_ack *extack)
2903{
2904 struct net_device *dev = NULL;
2905 struct inet6_dev *idev = NULL;
2906 int addr_type;
2907 int err;
2908
f1741730
DA
2909 fib6_nh->fib_nh_family = AF_INET6;
2910
83c44251
DA
2911 err = -ENODEV;
2912 if (cfg->fc_ifindex) {
2913 dev = dev_get_by_index(net, cfg->fc_ifindex);
2914 if (!dev)
2915 goto out;
2916 idev = in6_dev_get(dev);
2917 if (!idev)
2918 goto out;
2919 }
2920
2921 if (cfg->fc_flags & RTNH_F_ONLINK) {
2922 if (!dev) {
2923 NL_SET_ERR_MSG(extack,
2924 "Nexthop device required for onlink");
2925 goto out;
2926 }
2927
2928 if (!(dev->flags & IFF_UP)) {
2929 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2930 err = -ENETDOWN;
2931 goto out;
2932 }
2933
ad1601ae 2934 fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
83c44251
DA
2935 }
2936
ad1601ae 2937 fib6_nh->fib_nh_weight = 1;
83c44251
DA
2938
2939 /* We cannot add true routes via loopback here,
2940 * they would result in kernel looping; promote them to reject routes
2941 */
2942 addr_type = ipv6_addr_type(&cfg->fc_dst);
2943 if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
2944 /* hold loopback dev/idev if we haven't done so. */
2945 if (dev != net->loopback_dev) {
2946 if (dev) {
2947 dev_put(dev);
2948 in6_dev_put(idev);
2949 }
2950 dev = net->loopback_dev;
2951 dev_hold(dev);
2952 idev = in6_dev_get(dev);
2953 if (!idev) {
2954 err = -ENODEV;
2955 goto out;
2956 }
2957 }
2958 goto set_dev;
2959 }
2960
2961 if (cfg->fc_flags & RTF_GATEWAY) {
2962 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
2963 if (err)
2964 goto out;
2965
ad1601ae 2966 fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
2b2450ca 2967 fib6_nh->fib_nh_has_gw = 1;
83c44251
DA
2968 }
2969
2970 err = -ENODEV;
2971 if (!dev)
2972 goto out;
2973
2974 if (idev->cnf.disable_ipv6) {
2975 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
2976 err = -EACCES;
2977 goto out;
2978 }
2979
2980 if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
2981 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2982 err = -ENETDOWN;
2983 goto out;
2984 }
2985
2986 if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
2987 !netif_carrier_ok(dev))
ad1601ae 2988 fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
83c44251 2989
979e276e
DA
2990 err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
2991 cfg->fc_encap_type, cfg, gfp_flags, extack);
2992 if (err)
2993 goto out;
83c44251 2994set_dev:
ad1601ae 2995 fib6_nh->fib_nh_dev = dev;
f1741730 2996 fib6_nh->fib_nh_oif = dev->ifindex;
83c44251
DA
2997 err = 0;
2998out:
2999 if (idev)
3000 in6_dev_put(idev);
3001
3002 if (err) {
ad1601ae
DA
3003 lwtstate_put(fib6_nh->fib_nh_lws);
3004 fib6_nh->fib_nh_lws = NULL;
83c44251
DA
3005 if (dev)
3006 dev_put(dev);
3007 }
3008
3009 return err;
3010}
3011
dac7d0f2
DA
3012void fib6_nh_release(struct fib6_nh *fib6_nh)
3013{
979e276e 3014 fib_nh_common_release(&fib6_nh->nh_common);
dac7d0f2
DA
3015}
3016
8d1c802b 3017static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 3018 gfp_t gfp_flags,
333c4301 3019 struct netlink_ext_ack *extack)
1da177e4 3020{
5578689a 3021 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 3022 struct fib6_info *rt = NULL;
c71099ac 3023 struct fib6_table *table;
8c5b83f0 3024 int err = -EINVAL;
83c44251 3025 int addr_type;
1da177e4 3026
557c44be 3027 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
3028 if (cfg->fc_flags & RTF_PCPU) {
3029 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 3030 goto out;
d5d531cb 3031 }
557c44be 3032
2ea2352e
WW
3033 /* RTF_CACHE is an internal flag; can not be set by userspace */
3034 if (cfg->fc_flags & RTF_CACHE) {
3035 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3036 goto out;
3037 }
3038
e8478e80
DA
3039 if (cfg->fc_type > RTN_MAX) {
3040 NL_SET_ERR_MSG(extack, "Invalid route type");
3041 goto out;
3042 }
3043
d5d531cb
DA
3044 if (cfg->fc_dst_len > 128) {
3045 NL_SET_ERR_MSG(extack, "Invalid prefix length");
3046 goto out;
3047 }
3048 if (cfg->fc_src_len > 128) {
3049 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 3050 goto out;
d5d531cb 3051 }
1da177e4 3052#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
3053 if (cfg->fc_src_len) {
3054 NL_SET_ERR_MSG(extack,
3055 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 3056 goto out;
d5d531cb 3057 }
1da177e4 3058#endif
fc1e64e1 3059
d71314b4 3060 err = -ENOBUFS;
38308473
DM
3061 if (cfg->fc_nlinfo.nlh &&
3062 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 3063 table = fib6_get_table(net, cfg->fc_table);
38308473 3064 if (!table) {
f3213831 3065 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
3066 table = fib6_new_table(net, cfg->fc_table);
3067 }
3068 } else {
3069 table = fib6_new_table(net, cfg->fc_table);
3070 }
38308473
DM
3071
3072 if (!table)
c71099ac 3073 goto out;
c71099ac 3074
93531c67
DA
3075 err = -ENOMEM;
3076 rt = fib6_info_alloc(gfp_flags);
3077 if (!rt)
1da177e4 3078 goto out;
93531c67 3079
d7e774f3
DA
3080 rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3081 extack);
767a2217
DA
3082 if (IS_ERR(rt->fib6_metrics)) {
3083 err = PTR_ERR(rt->fib6_metrics);
fda21d46
ED
3084 /* Do not leave garbage there. */
3085 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
767a2217
DA
3086 goto out;
3087 }
3088
93531c67
DA
3089 if (cfg->fc_flags & RTF_ADDRCONF)
3090 rt->dst_nocount = true;
1da177e4 3091
1716a961 3092 if (cfg->fc_flags & RTF_EXPIRES)
14895687 3093 fib6_set_expires(rt, jiffies +
1716a961
G
3094 clock_t_to_jiffies(cfg->fc_expires));
3095 else
14895687 3096 fib6_clean_expires(rt);
1da177e4 3097
86872cb5
TG
3098 if (cfg->fc_protocol == RTPROT_UNSPEC)
3099 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 3100 rt->fib6_protocol = cfg->fc_protocol;
86872cb5 3101
83c44251
DA
3102 rt->fib6_table = table;
3103 rt->fib6_metric = cfg->fc_metric;
3104 rt->fib6_type = cfg->fc_type;
2b2450ca 3105 rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
19e42e45 3106
93c2fb25
DA
3107 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3108 rt->fib6_dst.plen = cfg->fc_dst_len;
3109 if (rt->fib6_dst.plen == 128)
3b6761d1 3110 rt->dst_host = true;
e5fd387a 3111
1da177e4 3112#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
3113 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3114 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4 3115#endif
83c44251
DA
3116 err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
3117 if (err)
3118 goto out;
1da177e4
LT
3119
3120 /* We cannot add true routes via loopback here,
83c44251 3121 * they would result in kernel looping; promote them to reject routes
1da177e4 3122 */
83c44251 3123 addr_type = ipv6_addr_type(&cfg->fc_dst);
ad1601ae 3124 if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
83c44251 3125 rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
955ec4cb 3126
c3968a85 3127 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
83c44251
DA
3128 struct net_device *dev = fib6_info_nh_dev(rt);
3129
c3968a85 3130 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3131 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3132 err = -EINVAL;
3133 goto out;
3134 }
93c2fb25
DA
3135 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3136 rt->fib6_prefsrc.plen = 128;
c3968a85 3137 } else
93c2fb25 3138 rt->fib6_prefsrc.plen = 0;
c3968a85 3139
8c5b83f0 3140 return rt;
6b9ea5a6 3141out:
93531c67 3142 fib6_info_release(rt);
8c5b83f0 3143 return ERR_PTR(err);
6b9ea5a6
RP
3144}
3145
acb54e3c 3146int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
333c4301 3147 struct netlink_ext_ack *extack)
6b9ea5a6 3148{
8d1c802b 3149 struct fib6_info *rt;
6b9ea5a6
RP
3150 int err;
3151
acb54e3c 3152 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3153 if (IS_ERR(rt))
3154 return PTR_ERR(rt);
6b9ea5a6 3155
d4ead6b3 3156 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3157 fib6_info_release(rt);
6b9ea5a6 3158
1da177e4
LT
3159 return err;
3160}
3161
8d1c802b 3162static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3163{
afb1d4b5 3164 struct net *net = info->nl_net;
c71099ac 3165 struct fib6_table *table;
afb1d4b5 3166 int err;
1da177e4 3167
421842ed 3168 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3169 err = -ENOENT;
3170 goto out;
3171 }
6c813a72 3172
93c2fb25 3173 table = rt->fib6_table;
66f5d6ce 3174 spin_lock_bh(&table->tb6_lock);
86872cb5 3175 err = fib6_del(rt, info);
66f5d6ce 3176 spin_unlock_bh(&table->tb6_lock);
1da177e4 3177
6825a26c 3178out:
93531c67 3179 fib6_info_release(rt);
1da177e4
LT
3180 return err;
3181}
3182
8d1c802b 3183int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3184{
afb1d4b5
DA
3185 struct nl_info info = { .nl_net = net };
3186
528c4ceb 3187 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3188}
3189
8d1c802b 3190static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3191{
3192 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3193 struct net *net = info->nl_net;
16a16cd3 3194 struct sk_buff *skb = NULL;
0ae81335 3195 struct fib6_table *table;
e3330039 3196 int err = -ENOENT;
0ae81335 3197
421842ed 3198 if (rt == net->ipv6.fib6_null_entry)
e3330039 3199 goto out_put;
93c2fb25 3200 table = rt->fib6_table;
66f5d6ce 3201 spin_lock_bh(&table->tb6_lock);
0ae81335 3202
93c2fb25 3203 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3204 struct fib6_info *sibling, *next_sibling;
0ae81335 3205
16a16cd3
DA
3206 /* prefer to send a single notification with all hops */
3207 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3208 if (skb) {
3209 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3210
d4ead6b3 3211 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3212 NULL, NULL, 0, RTM_DELROUTE,
3213 info->portid, seq, 0) < 0) {
3214 kfree_skb(skb);
3215 skb = NULL;
3216 } else
3217 info->skip_notify = 1;
3218 }
3219
0ae81335 3220 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3221 &rt->fib6_siblings,
3222 fib6_siblings) {
0ae81335
DA
3223 err = fib6_del(sibling, info);
3224 if (err)
e3330039 3225 goto out_unlock;
0ae81335
DA
3226 }
3227 }
3228
3229 err = fib6_del(rt, info);
e3330039 3230out_unlock:
66f5d6ce 3231 spin_unlock_bh(&table->tb6_lock);
e3330039 3232out_put:
93531c67 3233 fib6_info_release(rt);
16a16cd3
DA
3234
3235 if (skb) {
e3330039 3236 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3237 info->nlh, gfp_any());
3238 }
0ae81335
DA
3239 return err;
3240}
3241
23fb93a4
DA
3242static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3243{
3244 int rc = -ESRCH;
3245
3246 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3247 goto out;
3248
3249 if (cfg->fc_flags & RTF_GATEWAY &&
3250 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3251 goto out;
761f6026
XL
3252
3253 rc = rt6_remove_exception_rt(rt);
23fb93a4
DA
3254out:
3255 return rc;
3256}
3257
333c4301
DA
3258static int ip6_route_del(struct fib6_config *cfg,
3259 struct netlink_ext_ack *extack)
1da177e4 3260{
8d1c802b 3261 struct rt6_info *rt_cache;
c71099ac 3262 struct fib6_table *table;
8d1c802b 3263 struct fib6_info *rt;
1da177e4 3264 struct fib6_node *fn;
1da177e4
LT
3265 int err = -ESRCH;
3266
5578689a 3267 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3268 if (!table) {
3269 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3270 return err;
d5d531cb 3271 }
c71099ac 3272
66f5d6ce 3273 rcu_read_lock();
1da177e4 3274
c71099ac 3275 fn = fib6_locate(&table->tb6_root,
86872cb5 3276 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3277 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3278 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3279
1da177e4 3280 if (fn) {
66f5d6ce 3281 for_each_fib6_node_rt_rcu(fn) {
ad1601ae
DA
3282 struct fib6_nh *nh;
3283
2b760fcf 3284 if (cfg->fc_flags & RTF_CACHE) {
23fb93a4
DA
3285 int rc;
3286
2b760fcf
WW
3287 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3288 &cfg->fc_src);
23fb93a4
DA
3289 if (rt_cache) {
3290 rc = ip6_del_cached_rt(rt_cache, cfg);
9e575010
ED
3291 if (rc != -ESRCH) {
3292 rcu_read_unlock();
23fb93a4 3293 return rc;
9e575010 3294 }
23fb93a4
DA
3295 }
3296 continue;
2b760fcf 3297 }
ad1601ae
DA
3298
3299 nh = &rt->fib6_nh;
86872cb5 3300 if (cfg->fc_ifindex &&
ad1601ae
DA
3301 (!nh->fib_nh_dev ||
3302 nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3303 continue;
86872cb5 3304 if (cfg->fc_flags & RTF_GATEWAY &&
ad1601ae 3305 !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
1da177e4 3306 continue;
93c2fb25 3307 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3308 continue;
93c2fb25 3309 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3310 continue;
e873e4b9
WW
3311 if (!fib6_info_hold_safe(rt))
3312 continue;
66f5d6ce 3313 rcu_read_unlock();
1da177e4 3314
0ae81335
DA
3315 /* if gateway was specified only delete the one hop */
3316 if (cfg->fc_flags & RTF_GATEWAY)
3317 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3318
3319 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3320 }
3321 }
66f5d6ce 3322 rcu_read_unlock();
1da177e4
LT
3323
3324 return err;
3325}
3326
6700c270 3327static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3328{
a6279458 3329 struct netevent_redirect netevent;
e8599ff4 3330 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3331 struct ndisc_options ndopts;
3332 struct inet6_dev *in6_dev;
3333 struct neighbour *neigh;
a68886a6 3334 struct fib6_info *from;
71bcdba0 3335 struct rd_msg *msg;
6e157b6a
DM
3336 int optlen, on_link;
3337 u8 *lladdr;
e8599ff4 3338
29a3cad5 3339 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3340 optlen -= sizeof(*msg);
e8599ff4
DM
3341
3342 if (optlen < 0) {
6e157b6a 3343 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3344 return;
3345 }
3346
71bcdba0 3347 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3348
71bcdba0 3349 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3350 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3351 return;
3352 }
3353
6e157b6a 3354 on_link = 0;
71bcdba0 3355 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3356 on_link = 1;
71bcdba0 3357 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3358 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3359 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3360 return;
3361 }
3362
3363 in6_dev = __in6_dev_get(skb->dev);
3364 if (!in6_dev)
3365 return;
3366 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3367 return;
3368
3369 /* RFC2461 8.1:
3370 * The IP source address of the Redirect MUST be the same as the current
3371 * first-hop router for the specified ICMP Destination Address.
3372 */
3373
f997c55c 3374 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3375 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3376 return;
3377 }
6e157b6a
DM
3378
3379 lladdr = NULL;
e8599ff4
DM
3380 if (ndopts.nd_opts_tgt_lladdr) {
3381 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3382 skb->dev);
3383 if (!lladdr) {
3384 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3385 return;
3386 }
3387 }
3388
6e157b6a 3389 rt = (struct rt6_info *) dst;
ec13ad1d 3390 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3391 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3392 return;
6e157b6a 3393 }
e8599ff4 3394
6e157b6a
DM
3395 /* Redirect received -> path was valid.
3396 * Look, redirects are sent only in response to data packets,
3397 * so that this nexthop apparently is reachable. --ANK
3398 */
0dec879f 3399 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3400
71bcdba0 3401 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3402 if (!neigh)
3403 return;
a6279458 3404
1da177e4
LT
3405 /*
3406 * We have finally decided to accept it.
3407 */
3408
f997c55c 3409 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3410 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3411 NEIGH_UPDATE_F_OVERRIDE|
3412 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3413 NEIGH_UPDATE_F_ISROUTER)),
3414 NDISC_REDIRECT, &ndopts);
1da177e4 3415
4d85cd0c 3416 rcu_read_lock();
a68886a6 3417 from = rcu_dereference(rt->from);
e873e4b9
WW
3418 /* This fib6_info_hold() is safe here because we hold reference to rt
3419 * and rt already holds reference to fib6_info.
3420 */
8a14e46f 3421 fib6_info_hold(from);
4d85cd0c 3422 rcu_read_unlock();
8a14e46f
DA
3423
3424 nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
38308473 3425 if (!nrt)
1da177e4
LT
3426 goto out;
3427
3428 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3429 if (on_link)
3430 nrt->rt6i_flags &= ~RTF_GATEWAY;
3431
4e3fd7a0 3432 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3433
2b760fcf
WW
3434 /* No need to remove rt from the exception table if rt is
3435 * a cached route because rt6_insert_exception() will
3436 * takes care of it
3437 */
8a14e46f 3438 if (rt6_insert_exception(nrt, from)) {
2b760fcf
WW
3439 dst_release_immediate(&nrt->dst);
3440 goto out;
3441 }
1da177e4 3442
d8d1f30b
CG
3443 netevent.old = &rt->dst;
3444 netevent.new = &nrt->dst;
71bcdba0 3445 netevent.daddr = &msg->dest;
60592833 3446 netevent.neigh = neigh;
8d71740c
TT
3447 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3448
1da177e4 3449out:
8a14e46f 3450 fib6_info_release(from);
e8599ff4 3451 neigh_release(neigh);
6e157b6a
DM
3452}
3453
70ceb4f5 3454#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3455static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3456 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3457 const struct in6_addr *gwaddr,
3458 struct net_device *dev)
70ceb4f5 3459{
830218c1
DA
3460 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3461 int ifindex = dev->ifindex;
70ceb4f5 3462 struct fib6_node *fn;
8d1c802b 3463 struct fib6_info *rt = NULL;
c71099ac
TG
3464 struct fib6_table *table;
3465
830218c1 3466 table = fib6_get_table(net, tb_id);
38308473 3467 if (!table)
c71099ac 3468 return NULL;
70ceb4f5 3469
66f5d6ce 3470 rcu_read_lock();
38fbeeee 3471 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3472 if (!fn)
3473 goto out;
3474
66f5d6ce 3475 for_each_fib6_node_rt_rcu(fn) {
ad1601ae 3476 if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
70ceb4f5 3477 continue;
2b2450ca
DA
3478 if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
3479 !rt->fib6_nh.fib_nh_has_gw)
70ceb4f5 3480 continue;
ad1601ae 3481 if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
70ceb4f5 3482 continue;
e873e4b9
WW
3483 if (!fib6_info_hold_safe(rt))
3484 continue;
70ceb4f5
YH
3485 break;
3486 }
3487out:
66f5d6ce 3488 rcu_read_unlock();
70ceb4f5
YH
3489 return rt;
3490}
3491
8d1c802b 3492static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3493 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3494 const struct in6_addr *gwaddr,
3495 struct net_device *dev,
95c96174 3496 unsigned int pref)
70ceb4f5 3497{
86872cb5 3498 struct fib6_config cfg = {
238fc7ea 3499 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3500 .fc_ifindex = dev->ifindex,
86872cb5
TG
3501 .fc_dst_len = prefixlen,
3502 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3503 RTF_UP | RTF_PREF(pref),
b91d5329 3504 .fc_protocol = RTPROT_RA,
e8478e80 3505 .fc_type = RTN_UNICAST,
15e47304 3506 .fc_nlinfo.portid = 0,
efa2cea0
DL
3507 .fc_nlinfo.nlh = NULL,
3508 .fc_nlinfo.nl_net = net,
86872cb5
TG
3509 };
3510
830218c1 3511 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3512 cfg.fc_dst = *prefix;
3513 cfg.fc_gateway = *gwaddr;
70ceb4f5 3514
e317da96
YH
3515 /* We should treat it as a default route if prefix length is 0. */
3516 if (!prefixlen)
86872cb5 3517 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3518
acb54e3c 3519 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3520
830218c1 3521 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3522}
3523#endif
3524
8d1c802b 3525struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3526 const struct in6_addr *addr,
3527 struct net_device *dev)
1ab1457c 3528{
830218c1 3529 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3530 struct fib6_info *rt;
c71099ac 3531 struct fib6_table *table;
1da177e4 3532
afb1d4b5 3533 table = fib6_get_table(net, tb_id);
38308473 3534 if (!table)
c71099ac 3535 return NULL;
1da177e4 3536
66f5d6ce
WW
3537 rcu_read_lock();
3538 for_each_fib6_node_rt_rcu(&table->tb6_root) {
ad1601ae
DA
3539 struct fib6_nh *nh = &rt->fib6_nh;
3540
3541 if (dev == nh->fib_nh_dev &&
93c2fb25 3542 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ad1601ae 3543 ipv6_addr_equal(&nh->fib_nh_gw6, addr))
1da177e4
LT
3544 break;
3545 }
e873e4b9
WW
3546 if (rt && !fib6_info_hold_safe(rt))
3547 rt = NULL;
66f5d6ce 3548 rcu_read_unlock();
1da177e4
LT
3549 return rt;
3550}
3551
8d1c802b 3552struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3553 const struct in6_addr *gwaddr,
ebacaaa0
YH
3554 struct net_device *dev,
3555 unsigned int pref)
1da177e4 3556{
86872cb5 3557 struct fib6_config cfg = {
ca254490 3558 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3559 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3560 .fc_ifindex = dev->ifindex,
3561 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3562 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3563 .fc_protocol = RTPROT_RA,
e8478e80 3564 .fc_type = RTN_UNICAST,
15e47304 3565 .fc_nlinfo.portid = 0,
5578689a 3566 .fc_nlinfo.nlh = NULL,
afb1d4b5 3567 .fc_nlinfo.nl_net = net,
86872cb5 3568 };
1da177e4 3569
4e3fd7a0 3570 cfg.fc_gateway = *gwaddr;
1da177e4 3571
acb54e3c 3572 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3573 struct fib6_table *table;
3574
3575 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3576 if (table)
3577 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3578 }
1da177e4 3579
afb1d4b5 3580 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3581}
3582
afb1d4b5
DA
3583static void __rt6_purge_dflt_routers(struct net *net,
3584 struct fib6_table *table)
1da177e4 3585{
8d1c802b 3586 struct fib6_info *rt;
1da177e4
LT
3587
3588restart:
66f5d6ce
WW
3589 rcu_read_lock();
3590 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3591 struct net_device *dev = fib6_info_nh_dev(rt);
3592 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3593
93c2fb25 3594 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
e873e4b9
WW
3595 (!idev || idev->cnf.accept_ra != 2) &&
3596 fib6_info_hold_safe(rt)) {
93531c67
DA
3597 rcu_read_unlock();
3598 ip6_del_rt(net, rt);
1da177e4
LT
3599 goto restart;
3600 }
3601 }
66f5d6ce 3602 rcu_read_unlock();
830218c1
DA
3603
3604 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3605}
3606
3607void rt6_purge_dflt_routers(struct net *net)
3608{
3609 struct fib6_table *table;
3610 struct hlist_head *head;
3611 unsigned int h;
3612
3613 rcu_read_lock();
3614
3615 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3616 head = &net->ipv6.fib_table_hash[h];
3617 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3618 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3619 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3620 }
3621 }
3622
3623 rcu_read_unlock();
1da177e4
LT
3624}
3625
5578689a
DL
3626static void rtmsg_to_fib6_config(struct net *net,
3627 struct in6_rtmsg *rtmsg,
86872cb5
TG
3628 struct fib6_config *cfg)
3629{
8823a3ac
3630 *cfg = (struct fib6_config){
3631 .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3632 : RT6_TABLE_MAIN,
3633 .fc_ifindex = rtmsg->rtmsg_ifindex,
67f69513 3634 .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
8823a3ac
3635 .fc_expires = rtmsg->rtmsg_info,
3636 .fc_dst_len = rtmsg->rtmsg_dst_len,
3637 .fc_src_len = rtmsg->rtmsg_src_len,
3638 .fc_flags = rtmsg->rtmsg_flags,
3639 .fc_type = rtmsg->rtmsg_type,
3640
3641 .fc_nlinfo.nl_net = net,
3642
3643 .fc_dst = rtmsg->rtmsg_dst,
3644 .fc_src = rtmsg->rtmsg_src,
3645 .fc_gateway = rtmsg->rtmsg_gateway,
3646 };
86872cb5
TG
3647}
3648
5578689a 3649int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3650{
86872cb5 3651 struct fib6_config cfg;
1da177e4
LT
3652 struct in6_rtmsg rtmsg;
3653 int err;
3654
67ba4152 3655 switch (cmd) {
1da177e4
LT
3656 case SIOCADDRT: /* Add a route */
3657 case SIOCDELRT: /* Delete a route */
af31f412 3658 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3659 return -EPERM;
3660 err = copy_from_user(&rtmsg, arg,
3661 sizeof(struct in6_rtmsg));
3662 if (err)
3663 return -EFAULT;
86872cb5 3664
5578689a 3665 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3666
1da177e4
LT
3667 rtnl_lock();
3668 switch (cmd) {
3669 case SIOCADDRT:
acb54e3c 3670 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3671 break;
3672 case SIOCDELRT:
333c4301 3673 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3674 break;
3675 default:
3676 err = -EINVAL;
3677 }
3678 rtnl_unlock();
3679
3680 return err;
3ff50b79 3681 }
1da177e4
LT
3682
3683 return -EINVAL;
3684}
3685
3686/*
3687 * Drop the packet on the floor
3688 */
3689
d5fdd6ba 3690static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3691{
612f09e8 3692 int type;
adf30907 3693 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3694 switch (ipstats_mib_noroutes) {
3695 case IPSTATS_MIB_INNOROUTES:
0660e03f 3696 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3697 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3698 IP6_INC_STATS(dev_net(dst->dev),
3699 __in6_dev_get_safely(skb->dev),
3bd653c8 3700 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3701 break;
3702 }
3703 /* FALLTHROUGH */
3704 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3705 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3706 ipstats_mib_noroutes);
612f09e8
YH
3707 break;
3708 }
3ffe533c 3709 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3710 kfree_skb(skb);
3711 return 0;
3712}
3713
9ce8ade0
TG
3714static int ip6_pkt_discard(struct sk_buff *skb)
3715{
612f09e8 3716 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3717}
3718
ede2059d 3719static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3720{
adf30907 3721 skb->dev = skb_dst(skb)->dev;
612f09e8 3722 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3723}
3724
9ce8ade0
TG
3725static int ip6_pkt_prohibit(struct sk_buff *skb)
3726{
612f09e8 3727 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3728}
3729
ede2059d 3730static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3731{
adf30907 3732 skb->dev = skb_dst(skb)->dev;
612f09e8 3733 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3734}
3735
1da177e4
LT
3736/*
3737 * Allocate a dst for local (unicast / anycast) address.
3738 */
3739
360a9887
DA
3740struct fib6_info *addrconf_f6i_alloc(struct net *net,
3741 struct inet6_dev *idev,
3742 const struct in6_addr *addr,
3743 bool anycast, gfp_t gfp_flags)
1da177e4 3744{
c7a1ce39
DA
3745 struct fib6_config cfg = {
3746 .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
3747 .fc_ifindex = idev->dev->ifindex,
3748 .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
3749 .fc_dst = *addr,
3750 .fc_dst_len = 128,
3751 .fc_protocol = RTPROT_KERNEL,
3752 .fc_nlinfo.nl_net = net,
3753 .fc_ignore_dev_down = true,
3754 };
1da177e4 3755
e8478e80 3756 if (anycast) {
c7a1ce39
DA
3757 cfg.fc_type = RTN_ANYCAST;
3758 cfg.fc_flags |= RTF_ANYCAST;
e8478e80 3759 } else {
c7a1ce39
DA
3760 cfg.fc_type = RTN_LOCAL;
3761 cfg.fc_flags |= RTF_LOCAL;
e8478e80 3762 }
1da177e4 3763
c7a1ce39 3764 return ip6_route_info_create(&cfg, gfp_flags, NULL);
1da177e4
LT
3765}
3766
c3968a85
DW
3767/* remove deleted ip from prefsrc entries */
3768struct arg_dev_net_ip {
3769 struct net_device *dev;
3770 struct net *net;
3771 struct in6_addr *addr;
3772};
3773
8d1c802b 3774static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
3775{
3776 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3777 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3778 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3779
ad1601ae 3780 if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
421842ed 3781 rt != net->ipv6.fib6_null_entry &&
93c2fb25 3782 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 3783 spin_lock_bh(&rt6_exception_lock);
c3968a85 3784 /* remove prefsrc entry */
93c2fb25 3785 rt->fib6_prefsrc.plen = 0;
60006a48 3786 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3787 }
3788 return 0;
3789}
3790
3791void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3792{
3793 struct net *net = dev_net(ifp->idev->dev);
3794 struct arg_dev_net_ip adni = {
3795 .dev = ifp->idev->dev,
3796 .net = net,
3797 .addr = &ifp->addr,
3798 };
0c3584d5 3799 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3800}
3801
2b2450ca 3802#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT)
be7a010d
DJ
3803
3804/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 3805static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
3806{
3807 struct in6_addr *gateway = (struct in6_addr *)arg;
3808
93c2fb25 3809 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
2b2450ca 3810 rt->fib6_nh.fib_nh_has_gw &&
ad1601ae 3811 ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
be7a010d
DJ
3812 return -1;
3813 }
b16cb459
WW
3814
3815 /* Further clean up cached routes in exception table.
3816 * This is needed because cached route may have a different
3817 * gateway than its 'parent' in the case of an ip redirect.
3818 */
3819 rt6_exceptions_clean_tohost(rt, gateway);
3820
be7a010d
DJ
3821 return 0;
3822}
3823
3824void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3825{
3826 fib6_clean_all(net, fib6_clean_tohost, gateway);
3827}
3828
2127d95a
IS
3829struct arg_netdev_event {
3830 const struct net_device *dev;
4c981e28
IS
3831 union {
3832 unsigned int nh_flags;
3833 unsigned long event;
3834 };
2127d95a
IS
3835};
3836
8d1c802b 3837static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 3838{
8d1c802b 3839 struct fib6_info *iter;
d7dedee1
IS
3840 struct fib6_node *fn;
3841
93c2fb25
DA
3842 fn = rcu_dereference_protected(rt->fib6_node,
3843 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3844 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 3845 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3846 while (iter) {
93c2fb25 3847 if (iter->fib6_metric == rt->fib6_metric &&
33bd5ac5 3848 rt6_qualify_for_ecmp(iter))
d7dedee1 3849 return iter;
8fb11a9a 3850 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 3851 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
3852 }
3853
3854 return NULL;
3855}
3856
8d1c802b 3857static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 3858{
ad1601ae
DA
3859 if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
3860 (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
3861 ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
d7dedee1
IS
3862 return true;
3863
3864 return false;
3865}
3866
8d1c802b 3867static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 3868{
8d1c802b 3869 struct fib6_info *iter;
d7dedee1
IS
3870 int total = 0;
3871
3872 if (!rt6_is_dead(rt))
ad1601ae 3873 total += rt->fib6_nh.fib_nh_weight;
d7dedee1 3874
93c2fb25 3875 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 3876 if (!rt6_is_dead(iter))
ad1601ae 3877 total += iter->fib6_nh.fib_nh_weight;
d7dedee1
IS
3878 }
3879
3880 return total;
3881}
3882
8d1c802b 3883static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
3884{
3885 int upper_bound = -1;
3886
3887 if (!rt6_is_dead(rt)) {
ad1601ae 3888 *weight += rt->fib6_nh.fib_nh_weight;
d7dedee1
IS
3889 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3890 total) - 1;
3891 }
ad1601ae 3892 atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
d7dedee1
IS
3893}
3894
8d1c802b 3895static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 3896{
8d1c802b 3897 struct fib6_info *iter;
d7dedee1
IS
3898 int weight = 0;
3899
3900 rt6_upper_bound_set(rt, &weight, total);
3901
93c2fb25 3902 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
3903 rt6_upper_bound_set(iter, &weight, total);
3904}
3905
8d1c802b 3906void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 3907{
8d1c802b 3908 struct fib6_info *first;
d7dedee1
IS
3909 int total;
3910
3911 /* In case the entire multipath route was marked for flushing,
3912 * then there is no need to rebalance upon the removal of every
3913 * sibling route.
3914 */
93c2fb25 3915 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
3916 return;
3917
3918 /* During lookup routes are evaluated in order, so we need to
3919 * make sure upper bounds are assigned from the first sibling
3920 * onwards.
3921 */
3922 first = rt6_multipath_first_sibling(rt);
3923 if (WARN_ON_ONCE(!first))
3924 return;
3925
3926 total = rt6_multipath_total_weight(first);
3927 rt6_multipath_upper_bound_set(first, total);
3928}
3929
8d1c802b 3930static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
3931{
3932 const struct arg_netdev_event *arg = p_arg;
7aef6859 3933 struct net *net = dev_net(arg->dev);
2127d95a 3934
ad1601ae
DA
3935 if (rt != net->ipv6.fib6_null_entry &&
3936 rt->fib6_nh.fib_nh_dev == arg->dev) {
3937 rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
7aef6859 3938 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3939 rt6_multipath_rebalance(rt);
1de178ed 3940 }
2127d95a
IS
3941
3942 return 0;
3943}
3944
3945void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3946{
3947 struct arg_netdev_event arg = {
3948 .dev = dev,
6802f3ad
IS
3949 {
3950 .nh_flags = nh_flags,
3951 },
2127d95a
IS
3952 };
3953
3954 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3955 arg.nh_flags |= RTNH_F_LINKDOWN;
3956
3957 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3958}
3959
8d1c802b 3960static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
3961 const struct net_device *dev)
3962{
8d1c802b 3963 struct fib6_info *iter;
1de178ed 3964
ad1601ae 3965 if (rt->fib6_nh.fib_nh_dev == dev)
1de178ed 3966 return true;
93c2fb25 3967 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae 3968 if (iter->fib6_nh.fib_nh_dev == dev)
1de178ed
IS
3969 return true;
3970
3971 return false;
3972}
3973
8d1c802b 3974static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 3975{
8d1c802b 3976 struct fib6_info *iter;
1de178ed
IS
3977
3978 rt->should_flush = 1;
93c2fb25 3979 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
3980 iter->should_flush = 1;
3981}
3982
8d1c802b 3983static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
3984 const struct net_device *down_dev)
3985{
8d1c802b 3986 struct fib6_info *iter;
1de178ed
IS
3987 unsigned int dead = 0;
3988
ad1601ae
DA
3989 if (rt->fib6_nh.fib_nh_dev == down_dev ||
3990 rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
1de178ed 3991 dead++;
93c2fb25 3992 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae
DA
3993 if (iter->fib6_nh.fib_nh_dev == down_dev ||
3994 iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
1de178ed
IS
3995 dead++;
3996
3997 return dead;
3998}
3999
8d1c802b 4000static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed
IS
4001 const struct net_device *dev,
4002 unsigned int nh_flags)
4003{
8d1c802b 4004 struct fib6_info *iter;
1de178ed 4005
ad1601ae
DA
4006 if (rt->fib6_nh.fib_nh_dev == dev)
4007 rt->fib6_nh.fib_nh_flags |= nh_flags;
93c2fb25 4008 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae
DA
4009 if (iter->fib6_nh.fib_nh_dev == dev)
4010 iter->fib6_nh.fib_nh_flags |= nh_flags;
1de178ed
IS
4011}
4012
a1a22c12 4013/* called with write lock held for table with rt */
8d1c802b 4014static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 4015{
4c981e28
IS
4016 const struct arg_netdev_event *arg = p_arg;
4017 const struct net_device *dev = arg->dev;
7aef6859 4018 struct net *net = dev_net(dev);
8ed67789 4019
421842ed 4020 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
4021 return 0;
4022
4023 switch (arg->event) {
4024 case NETDEV_UNREGISTER:
ad1601ae 4025 return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
27c6fa73 4026 case NETDEV_DOWN:
1de178ed 4027 if (rt->should_flush)
27c6fa73 4028 return -1;
93c2fb25 4029 if (!rt->fib6_nsiblings)
ad1601ae 4030 return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
1de178ed
IS
4031 if (rt6_multipath_uses_dev(rt, dev)) {
4032 unsigned int count;
4033
4034 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 4035 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
4036 rt6_multipath_flush(rt);
4037 return -1;
4038 }
4039 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4040 RTNH_F_LINKDOWN);
7aef6859 4041 fib6_update_sernum(net, rt);
d7dedee1 4042 rt6_multipath_rebalance(rt);
1de178ed
IS
4043 }
4044 return -2;
27c6fa73 4045 case NETDEV_CHANGE:
ad1601ae 4046 if (rt->fib6_nh.fib_nh_dev != dev ||
93c2fb25 4047 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 4048 break;
ad1601ae 4049 rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 4050 rt6_multipath_rebalance(rt);
27c6fa73 4051 break;
2b241361 4052 }
c159d30c 4053
1da177e4
LT
4054 return 0;
4055}
4056
27c6fa73 4057void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 4058{
4c981e28 4059 struct arg_netdev_event arg = {
8ed67789 4060 .dev = dev,
6802f3ad
IS
4061 {
4062 .event = event,
4063 },
8ed67789 4064 };
7c6bb7d2 4065 struct net *net = dev_net(dev);
8ed67789 4066
7c6bb7d2
DA
4067 if (net->ipv6.sysctl.skip_notify_on_dev_down)
4068 fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4069 else
4070 fib6_clean_all(net, fib6_ifdown, &arg);
4c981e28
IS
4071}
4072
4073void rt6_disable_ip(struct net_device *dev, unsigned long event)
4074{
4075 rt6_sync_down_dev(dev, event);
4076 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4077 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4078}
4079
95c96174 4080struct rt6_mtu_change_arg {
1da177e4 4081 struct net_device *dev;
95c96174 4082 unsigned int mtu;
1da177e4
LT
4083};
4084
8d1c802b 4085static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4086{
4087 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4088 struct inet6_dev *idev;
4089
4090 /* In IPv6 pmtu discovery is not optional,
4091 so that RTAX_MTU lock cannot disable it.
4092 We still use this lock to block changes
4093 caused by addrconf/ndisc.
4094 */
4095
4096 idev = __in6_dev_get(arg->dev);
38308473 4097 if (!idev)
1da177e4
LT
4098 return 0;
4099
4100 /* For administrative MTU increase, there is no way to discover
4101 IPv6 PMTU increase, so PMTU increase should be updated here.
4102 Since RFC 1981 doesn't include administrative MTU increase
4103 update PMTU increase is a MUST. (i.e. jumbo frame)
4104 */
ad1601ae 4105 if (rt->fib6_nh.fib_nh_dev == arg->dev &&
d4ead6b3
DA
4106 !fib6_metric_locked(rt, RTAX_MTU)) {
4107 u32 mtu = rt->fib6_pmtu;
4108
4109 if (mtu >= arg->mtu ||
4110 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4111 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4112
f5bbe7ee 4113 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4114 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4115 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4116 }
1da177e4
LT
4117 return 0;
4118}
4119
95c96174 4120void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4121{
c71099ac
TG
4122 struct rt6_mtu_change_arg arg = {
4123 .dev = dev,
4124 .mtu = mtu,
4125 };
1da177e4 4126
0c3584d5 4127 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4128}
4129
ef7c79ed 4130static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4131 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 4132 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 4133 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4134 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4135 [RTA_PRIORITY] = { .type = NLA_U32 },
4136 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4137 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4138 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4139 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4140 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4141 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4142 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4143 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 4144 [RTA_TABLE] = { .type = NLA_U32 },
eacb9384
RP
4145 [RTA_IP_PROTO] = { .type = NLA_U8 },
4146 [RTA_SPORT] = { .type = NLA_U16 },
4147 [RTA_DPORT] = { .type = NLA_U16 },
86872cb5
TG
4148};
4149
4150static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4151 struct fib6_config *cfg,
4152 struct netlink_ext_ack *extack)
1da177e4 4153{
86872cb5
TG
4154 struct rtmsg *rtm;
4155 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4156 unsigned int pref;
86872cb5 4157 int err;
1da177e4 4158
fceb6435 4159 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
dac9c979 4160 extack);
86872cb5
TG
4161 if (err < 0)
4162 goto errout;
1da177e4 4163
86872cb5
TG
4164 err = -EINVAL;
4165 rtm = nlmsg_data(nlh);
86872cb5 4166
84db8407
4167 *cfg = (struct fib6_config){
4168 .fc_table = rtm->rtm_table,
4169 .fc_dst_len = rtm->rtm_dst_len,
4170 .fc_src_len = rtm->rtm_src_len,
4171 .fc_flags = RTF_UP,
4172 .fc_protocol = rtm->rtm_protocol,
4173 .fc_type = rtm->rtm_type,
4174
4175 .fc_nlinfo.portid = NETLINK_CB(skb).portid,
4176 .fc_nlinfo.nlh = nlh,
4177 .fc_nlinfo.nl_net = sock_net(skb->sk),
4178 };
86872cb5 4179
ef2c7d7b
ND
4180 if (rtm->rtm_type == RTN_UNREACHABLE ||
4181 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4182 rtm->rtm_type == RTN_PROHIBIT ||
4183 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4184 cfg->fc_flags |= RTF_REJECT;
4185
ab79ad14
4186 if (rtm->rtm_type == RTN_LOCAL)
4187 cfg->fc_flags |= RTF_LOCAL;
4188
1f56a01f
MKL
4189 if (rtm->rtm_flags & RTM_F_CLONED)
4190 cfg->fc_flags |= RTF_CACHE;
4191
fc1e64e1
DA
4192 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4193
86872cb5 4194 if (tb[RTA_GATEWAY]) {
67b61f6c 4195 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4196 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4197 }
e3818541
DA
4198 if (tb[RTA_VIA]) {
4199 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4200 goto errout;
4201 }
86872cb5
TG
4202
4203 if (tb[RTA_DST]) {
4204 int plen = (rtm->rtm_dst_len + 7) >> 3;
4205
4206 if (nla_len(tb[RTA_DST]) < plen)
4207 goto errout;
4208
4209 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4210 }
86872cb5
TG
4211
4212 if (tb[RTA_SRC]) {
4213 int plen = (rtm->rtm_src_len + 7) >> 3;
4214
4215 if (nla_len(tb[RTA_SRC]) < plen)
4216 goto errout;
4217
4218 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4219 }
86872cb5 4220
c3968a85 4221 if (tb[RTA_PREFSRC])
67b61f6c 4222 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4223
86872cb5
TG
4224 if (tb[RTA_OIF])
4225 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4226
4227 if (tb[RTA_PRIORITY])
4228 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4229
4230 if (tb[RTA_METRICS]) {
4231 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4232 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4233 }
86872cb5
TG
4234
4235 if (tb[RTA_TABLE])
4236 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4237
51ebd318
ND
4238 if (tb[RTA_MULTIPATH]) {
4239 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4240 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4241
4242 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4243 cfg->fc_mp_len, extack);
9ed59592
DA
4244 if (err < 0)
4245 goto errout;
51ebd318
ND
4246 }
4247
c78ba6d6
LR
4248 if (tb[RTA_PREF]) {
4249 pref = nla_get_u8(tb[RTA_PREF]);
4250 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4251 pref != ICMPV6_ROUTER_PREF_HIGH)
4252 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4253 cfg->fc_flags |= RTF_PREF(pref);
4254 }
4255
19e42e45
RP
4256 if (tb[RTA_ENCAP])
4257 cfg->fc_encap = tb[RTA_ENCAP];
4258
9ed59592 4259 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4260 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4261
c255bd68 4262 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4263 if (err < 0)
4264 goto errout;
4265 }
4266
32bc201e
XL
4267 if (tb[RTA_EXPIRES]) {
4268 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4269
4270 if (addrconf_finite_timeout(timeout)) {
4271 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4272 cfg->fc_flags |= RTF_EXPIRES;
4273 }
4274 }
4275
86872cb5
TG
4276 err = 0;
4277errout:
4278 return err;
1da177e4
LT
4279}
4280
6b9ea5a6 4281struct rt6_nh {
8d1c802b 4282 struct fib6_info *fib6_info;
6b9ea5a6 4283 struct fib6_config r_cfg;
6b9ea5a6
RP
4284 struct list_head next;
4285};
4286
d4ead6b3
DA
4287static int ip6_route_info_append(struct net *net,
4288 struct list_head *rt6_nh_list,
8d1c802b
DA
4289 struct fib6_info *rt,
4290 struct fib6_config *r_cfg)
6b9ea5a6
RP
4291{
4292 struct rt6_nh *nh;
6b9ea5a6
RP
4293 int err = -EEXIST;
4294
4295 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4296 /* check if fib6_info already exists */
4297 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4298 return err;
4299 }
4300
4301 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4302 if (!nh)
4303 return -ENOMEM;
8d1c802b 4304 nh->fib6_info = rt;
6b9ea5a6
RP
4305 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4306 list_add_tail(&nh->next, rt6_nh_list);
4307
4308 return 0;
4309}
4310
8d1c802b
DA
4311static void ip6_route_mpath_notify(struct fib6_info *rt,
4312 struct fib6_info *rt_last,
3b1137fe
DA
4313 struct nl_info *info,
4314 __u16 nlflags)
4315{
4316 /* if this is an APPEND route, then rt points to the first route
4317 * inserted and rt_last points to last route inserted. Userspace
4318 * wants a consistent dump of the route which starts at the first
4319 * nexthop. Since sibling routes are always added at the end of
4320 * the list, find the first sibling of the last route appended
4321 */
93c2fb25
DA
4322 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4323 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4324 struct fib6_info,
93c2fb25 4325 fib6_siblings);
3b1137fe
DA
4326 }
4327
4328 if (rt)
4329 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4330}
4331
333c4301
DA
4332static int ip6_route_multipath_add(struct fib6_config *cfg,
4333 struct netlink_ext_ack *extack)
51ebd318 4334{
8d1c802b 4335 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4336 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4337 struct fib6_config r_cfg;
4338 struct rtnexthop *rtnh;
8d1c802b 4339 struct fib6_info *rt;
6b9ea5a6
RP
4340 struct rt6_nh *err_nh;
4341 struct rt6_nh *nh, *nh_safe;
3b1137fe 4342 __u16 nlflags;
51ebd318
ND
4343 int remaining;
4344 int attrlen;
6b9ea5a6
RP
4345 int err = 1;
4346 int nhn = 0;
4347 int replace = (cfg->fc_nlinfo.nlh &&
4348 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4349 LIST_HEAD(rt6_nh_list);
51ebd318 4350
3b1137fe
DA
4351 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4352 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4353 nlflags |= NLM_F_APPEND;
4354
35f1b4e9 4355 remaining = cfg->fc_mp_len;
51ebd318 4356 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4357
6b9ea5a6 4358 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4359 * fib6_info structs per nexthop
6b9ea5a6 4360 */
51ebd318
ND
4361 while (rtnh_ok(rtnh, remaining)) {
4362 memcpy(&r_cfg, cfg, sizeof(*cfg));
4363 if (rtnh->rtnh_ifindex)
4364 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4365
4366 attrlen = rtnh_attrlen(rtnh);
4367 if (attrlen > 0) {
4368 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4369
4370 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4371 if (nla) {
67b61f6c 4372 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4373 r_cfg.fc_flags |= RTF_GATEWAY;
4374 }
19e42e45
RP
4375 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4376 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4377 if (nla)
4378 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4379 }
6b9ea5a6 4380
68e2ffde 4381 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4382 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4383 if (IS_ERR(rt)) {
4384 err = PTR_ERR(rt);
4385 rt = NULL;
6b9ea5a6 4386 goto cleanup;
8c5b83f0 4387 }
b5d2d75e
DA
4388 if (!rt6_qualify_for_ecmp(rt)) {
4389 err = -EINVAL;
4390 NL_SET_ERR_MSG(extack,
4391 "Device only routes can not be added for IPv6 using the multipath API.");
4392 fib6_info_release(rt);
4393 goto cleanup;
4394 }
6b9ea5a6 4395
ad1601ae 4396 rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
398958ae 4397
d4ead6b3
DA
4398 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4399 rt, &r_cfg);
51ebd318 4400 if (err) {
93531c67 4401 fib6_info_release(rt);
6b9ea5a6
RP
4402 goto cleanup;
4403 }
4404
4405 rtnh = rtnh_next(rtnh, &remaining);
4406 }
4407
3b1137fe
DA
4408 /* for add and replace send one notification with all nexthops.
4409 * Skip the notification in fib6_add_rt2node and send one with
4410 * the full route when done
4411 */
4412 info->skip_notify = 1;
4413
6b9ea5a6
RP
4414 err_nh = NULL;
4415 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4416 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4417 fib6_info_release(nh->fib6_info);
93531c67 4418
f7225172
DA
4419 if (!err) {
4420 /* save reference to last route successfully inserted */
4421 rt_last = nh->fib6_info;
4422
4423 /* save reference to first route for notification */
4424 if (!rt_notif)
4425 rt_notif = nh->fib6_info;
4426 }
3b1137fe 4427
8d1c802b
DA
4428 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4429 nh->fib6_info = NULL;
6b9ea5a6
RP
4430 if (err) {
4431 if (replace && nhn)
a5a82d84
JK
4432 NL_SET_ERR_MSG_MOD(extack,
4433 "multipath route replace failed (check consistency of installed routes)");
6b9ea5a6
RP
4434 err_nh = nh;
4435 goto add_errout;
51ebd318 4436 }
6b9ea5a6 4437
1a72418b 4438 /* Because each route is added like a single route we remove
27596472
MK
4439 * these flags after the first nexthop: if there is a collision,
4440 * we have already failed to add the first nexthop:
4441 * fib6_add_rt2node() has rejected it; when replacing, old
4442 * nexthops have been replaced by first new, the rest should
4443 * be added to it.
1a72418b 4444 */
27596472
MK
4445 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4446 NLM_F_REPLACE);
6b9ea5a6
RP
4447 nhn++;
4448 }
4449
3b1137fe
DA
4450 /* success ... tell user about new route */
4451 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4452 goto cleanup;
4453
4454add_errout:
3b1137fe
DA
4455 /* send notification for routes that were added so that
4456 * the delete notifications sent by ip6_route_del are
4457 * coherent
4458 */
4459 if (rt_notif)
4460 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4461
6b9ea5a6
RP
4462 /* Delete routes that were already added */
4463 list_for_each_entry(nh, &rt6_nh_list, next) {
4464 if (err_nh == nh)
4465 break;
333c4301 4466 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4467 }
4468
4469cleanup:
4470 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4471 if (nh->fib6_info)
4472 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4473 list_del(&nh->next);
4474 kfree(nh);
4475 }
4476
4477 return err;
4478}
4479
333c4301
DA
4480static int ip6_route_multipath_del(struct fib6_config *cfg,
4481 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4482{
4483 struct fib6_config r_cfg;
4484 struct rtnexthop *rtnh;
4485 int remaining;
4486 int attrlen;
4487 int err = 1, last_err = 0;
4488
4489 remaining = cfg->fc_mp_len;
4490 rtnh = (struct rtnexthop *)cfg->fc_mp;
4491
4492 /* Parse a Multipath Entry */
4493 while (rtnh_ok(rtnh, remaining)) {
4494 memcpy(&r_cfg, cfg, sizeof(*cfg));
4495 if (rtnh->rtnh_ifindex)
4496 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4497
4498 attrlen = rtnh_attrlen(rtnh);
4499 if (attrlen > 0) {
4500 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4501
4502 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4503 if (nla) {
4504 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4505 r_cfg.fc_flags |= RTF_GATEWAY;
4506 }
4507 }
333c4301 4508 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4509 if (err)
4510 last_err = err;
4511
51ebd318
ND
4512 rtnh = rtnh_next(rtnh, &remaining);
4513 }
4514
4515 return last_err;
4516}
4517
c21ef3e3
DA
4518static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4519 struct netlink_ext_ack *extack)
1da177e4 4520{
86872cb5
TG
4521 struct fib6_config cfg;
4522 int err;
1da177e4 4523
333c4301 4524 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4525 if (err < 0)
4526 return err;
4527
51ebd318 4528 if (cfg.fc_mp)
333c4301 4529 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4530 else {
4531 cfg.fc_delete_all_nh = 1;
333c4301 4532 return ip6_route_del(&cfg, extack);
0ae81335 4533 }
1da177e4
LT
4534}
4535
c21ef3e3
DA
4536static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4537 struct netlink_ext_ack *extack)
1da177e4 4538{
86872cb5
TG
4539 struct fib6_config cfg;
4540 int err;
1da177e4 4541
333c4301 4542 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4543 if (err < 0)
4544 return err;
4545
67f69513
DA
4546 if (cfg.fc_metric == 0)
4547 cfg.fc_metric = IP6_RT_PRIO_USER;
4548
51ebd318 4549 if (cfg.fc_mp)
333c4301 4550 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4551 else
acb54e3c 4552 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4553}
4554
8d1c802b 4555static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4556{
beb1afac
DA
4557 int nexthop_len = 0;
4558
93c2fb25 4559 if (rt->fib6_nsiblings) {
beb1afac
DA
4560 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4561 + NLA_ALIGN(sizeof(struct rtnexthop))
4562 + nla_total_size(16) /* RTA_GATEWAY */
ad1601ae 4563 + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
beb1afac 4564
93c2fb25 4565 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4566 }
4567
339bf98f
TG
4568 return NLMSG_ALIGN(sizeof(struct rtmsg))
4569 + nla_total_size(16) /* RTA_SRC */
4570 + nla_total_size(16) /* RTA_DST */
4571 + nla_total_size(16) /* RTA_GATEWAY */
4572 + nla_total_size(16) /* RTA_PREFSRC */
4573 + nla_total_size(4) /* RTA_TABLE */
4574 + nla_total_size(4) /* RTA_IIF */
4575 + nla_total_size(4) /* RTA_OIF */
4576 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4577 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4578 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4579 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4580 + nla_total_size(1) /* RTA_PREF */
ad1601ae 4581 + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
beb1afac
DA
4582 + nexthop_len;
4583}
4584
d4ead6b3 4585static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4586 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4587 struct in6_addr *dest, struct in6_addr *src,
15e47304 4588 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4589 unsigned int flags)
1da177e4 4590{
22d0bd82
XL
4591 struct rt6_info *rt6 = (struct rt6_info *)dst;
4592 struct rt6key *rt6_dst, *rt6_src;
4593 u32 *pmetrics, table, rt6_flags;
2d7202bf 4594 struct nlmsghdr *nlh;
22d0bd82 4595 struct rtmsg *rtm;
d4ead6b3 4596 long expires = 0;
1da177e4 4597
15e47304 4598 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4599 if (!nlh)
26932566 4600 return -EMSGSIZE;
2d7202bf 4601
22d0bd82
XL
4602 if (rt6) {
4603 rt6_dst = &rt6->rt6i_dst;
4604 rt6_src = &rt6->rt6i_src;
4605 rt6_flags = rt6->rt6i_flags;
4606 } else {
4607 rt6_dst = &rt->fib6_dst;
4608 rt6_src = &rt->fib6_src;
4609 rt6_flags = rt->fib6_flags;
4610 }
4611
2d7202bf 4612 rtm = nlmsg_data(nlh);
1da177e4 4613 rtm->rtm_family = AF_INET6;
22d0bd82
XL
4614 rtm->rtm_dst_len = rt6_dst->plen;
4615 rtm->rtm_src_len = rt6_src->plen;
1da177e4 4616 rtm->rtm_tos = 0;
93c2fb25
DA
4617 if (rt->fib6_table)
4618 table = rt->fib6_table->tb6_id;
c71099ac 4619 else
9e762a4a 4620 table = RT6_TABLE_UNSPEC;
97f0082a 4621 rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
c78679e8
DM
4622 if (nla_put_u32(skb, RTA_TABLE, table))
4623 goto nla_put_failure;
e8478e80
DA
4624
4625 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4626 rtm->rtm_flags = 0;
4627 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4628 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4629
22d0bd82 4630 if (rt6_flags & RTF_CACHE)
1da177e4
LT
4631 rtm->rtm_flags |= RTM_F_CLONED;
4632
d4ead6b3
DA
4633 if (dest) {
4634 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4635 goto nla_put_failure;
1ab1457c 4636 rtm->rtm_dst_len = 128;
1da177e4 4637 } else if (rtm->rtm_dst_len)
22d0bd82 4638 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
c78679e8 4639 goto nla_put_failure;
1da177e4
LT
4640#ifdef CONFIG_IPV6_SUBTREES
4641 if (src) {
930345ea 4642 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4643 goto nla_put_failure;
1ab1457c 4644 rtm->rtm_src_len = 128;
c78679e8 4645 } else if (rtm->rtm_src_len &&
22d0bd82 4646 nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
c78679e8 4647 goto nla_put_failure;
1da177e4 4648#endif
7bc570c8
YH
4649 if (iif) {
4650#ifdef CONFIG_IPV6_MROUTE
22d0bd82 4651 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
fd61c6ba
DA
4652 int err = ip6mr_get_route(net, skb, rtm, portid);
4653
4654 if (err == 0)
4655 return 0;
4656 if (err < 0)
4657 goto nla_put_failure;
7bc570c8
YH
4658 } else
4659#endif
c78679e8
DM
4660 if (nla_put_u32(skb, RTA_IIF, iif))
4661 goto nla_put_failure;
d4ead6b3 4662 } else if (dest) {
1da177e4 4663 struct in6_addr saddr_buf;
d4ead6b3 4664 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4665 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4666 goto nla_put_failure;
1da177e4 4667 }
2d7202bf 4668
93c2fb25 4669 if (rt->fib6_prefsrc.plen) {
c3968a85 4670 struct in6_addr saddr_buf;
93c2fb25 4671 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4672 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4673 goto nla_put_failure;
c3968a85
DW
4674 }
4675
d4ead6b3
DA
4676 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4677 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4678 goto nla_put_failure;
4679
93c2fb25 4680 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 4681 goto nla_put_failure;
8253947e 4682
beb1afac
DA
4683 /* For multipath routes, walk the siblings list and add
4684 * each as a nexthop within RTA_MULTIPATH.
4685 */
22d0bd82
XL
4686 if (rt6) {
4687 if (rt6_flags & RTF_GATEWAY &&
4688 nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4689 goto nla_put_failure;
4690
4691 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4692 goto nla_put_failure;
4693 } else if (rt->fib6_nsiblings) {
8d1c802b 4694 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
4695 struct nlattr *mp;
4696
4697 mp = nla_nest_start(skb, RTA_MULTIPATH);
4698 if (!mp)
4699 goto nla_put_failure;
4700
c0a72077
DA
4701 if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common,
4702 rt->fib6_nh.fib_nh_weight) < 0)
beb1afac
DA
4703 goto nla_put_failure;
4704
4705 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 4706 &rt->fib6_siblings, fib6_siblings) {
c0a72077
DA
4707 if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common,
4708 sibling->fib6_nh.fib_nh_weight) < 0)
beb1afac
DA
4709 goto nla_put_failure;
4710 }
4711
4712 nla_nest_end(skb, mp);
4713 } else {
c0a72077
DA
4714 if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
4715 &rtm->rtm_flags, false) < 0)
beb1afac
DA
4716 goto nla_put_failure;
4717 }
4718
22d0bd82 4719 if (rt6_flags & RTF_EXPIRES) {
14895687
DA
4720 expires = dst ? dst->expires : rt->expires;
4721 expires -= jiffies;
4722 }
69cdf8f9 4723
d4ead6b3 4724 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4725 goto nla_put_failure;
2d7202bf 4726
22d0bd82 4727 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
c78ba6d6
LR
4728 goto nla_put_failure;
4729
19e42e45 4730
053c095a
JB
4731 nlmsg_end(skb, nlh);
4732 return 0;
2d7202bf
TG
4733
4734nla_put_failure:
26932566
PM
4735 nlmsg_cancel(skb, nlh);
4736 return -EMSGSIZE;
1da177e4
LT
4737}
4738
13e38901
DA
4739static bool fib6_info_uses_dev(const struct fib6_info *f6i,
4740 const struct net_device *dev)
4741{
ad1601ae 4742 if (f6i->fib6_nh.fib_nh_dev == dev)
13e38901
DA
4743 return true;
4744
4745 if (f6i->fib6_nsiblings) {
4746 struct fib6_info *sibling, *next_sibling;
4747
4748 list_for_each_entry_safe(sibling, next_sibling,
4749 &f6i->fib6_siblings, fib6_siblings) {
ad1601ae 4750 if (sibling->fib6_nh.fib_nh_dev == dev)
13e38901
DA
4751 return true;
4752 }
4753 }
4754
4755 return false;
4756}
4757
8d1c802b 4758int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4759{
4760 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
13e38901
DA
4761 struct fib_dump_filter *filter = &arg->filter;
4762 unsigned int flags = NLM_F_MULTI;
1f17e2f2
DA
4763 struct net *net = arg->net;
4764
421842ed 4765 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4766 return 0;
1da177e4 4767
13e38901
DA
4768 if ((filter->flags & RTM_F_PREFIX) &&
4769 !(rt->fib6_flags & RTF_PREFIX_RT)) {
4770 /* success since this is not a prefix route */
4771 return 1;
4772 }
4773 if (filter->filter_set) {
4774 if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
4775 (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
4776 (filter->protocol && rt->fib6_protocol != filter->protocol)) {
f8cfe2ce
DA
4777 return 1;
4778 }
13e38901 4779 flags |= NLM_F_DUMP_FILTERED;
f8cfe2ce 4780 }
1da177e4 4781
d4ead6b3
DA
4782 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4783 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
13e38901 4784 arg->cb->nlh->nlmsg_seq, flags);
1da177e4
LT
4785}
4786
0eff0a27
JK
4787static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
4788 const struct nlmsghdr *nlh,
4789 struct nlattr **tb,
4790 struct netlink_ext_ack *extack)
4791{
4792 struct rtmsg *rtm;
4793 int i, err;
4794
4795 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
4796 NL_SET_ERR_MSG_MOD(extack,
4797 "Invalid header for get route request");
4798 return -EINVAL;
4799 }
4800
4801 if (!netlink_strict_get_check(skb))
4802 return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
4803 rtm_ipv6_policy, extack);
4804
4805 rtm = nlmsg_data(nlh);
4806 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
4807 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
4808 rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
4809 rtm->rtm_type) {
4810 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
4811 return -EINVAL;
4812 }
4813 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
4814 NL_SET_ERR_MSG_MOD(extack,
4815 "Invalid flags for get route request");
4816 return -EINVAL;
4817 }
4818
4819 err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
4820 rtm_ipv6_policy, extack);
4821 if (err)
4822 return err;
4823
4824 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
4825 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
4826 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
4827 return -EINVAL;
4828 }
4829
4830 for (i = 0; i <= RTA_MAX; i++) {
4831 if (!tb[i])
4832 continue;
4833
4834 switch (i) {
4835 case RTA_SRC:
4836 case RTA_DST:
4837 case RTA_IIF:
4838 case RTA_OIF:
4839 case RTA_MARK:
4840 case RTA_UID:
4841 case RTA_SPORT:
4842 case RTA_DPORT:
4843 case RTA_IP_PROTO:
4844 break;
4845 default:
4846 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
4847 return -EINVAL;
4848 }
4849 }
4850
4851 return 0;
4852}
4853
c21ef3e3
DA
4854static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4855 struct netlink_ext_ack *extack)
1da177e4 4856{
3b1e0a65 4857 struct net *net = sock_net(in_skb->sk);
ab364a6f 4858 struct nlattr *tb[RTA_MAX+1];
18c3a61c 4859 int err, iif = 0, oif = 0;
a68886a6 4860 struct fib6_info *from;
18c3a61c 4861 struct dst_entry *dst;
ab364a6f 4862 struct rt6_info *rt;
1da177e4 4863 struct sk_buff *skb;
ab364a6f 4864 struct rtmsg *rtm;
744486d4 4865 struct flowi6 fl6 = {};
18c3a61c 4866 bool fibmatch;
1da177e4 4867
0eff0a27 4868 err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
ab364a6f
TG
4869 if (err < 0)
4870 goto errout;
1da177e4 4871
ab364a6f 4872 err = -EINVAL;
38b7097b
HFS
4873 rtm = nlmsg_data(nlh);
4874 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4875 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4876
ab364a6f
TG
4877 if (tb[RTA_SRC]) {
4878 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4879 goto errout;
4880
4e3fd7a0 4881 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4882 }
4883
4884 if (tb[RTA_DST]) {
4885 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4886 goto errout;
4887
4e3fd7a0 4888 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4889 }
4890
4891 if (tb[RTA_IIF])
4892 iif = nla_get_u32(tb[RTA_IIF]);
4893
4894 if (tb[RTA_OIF])
72331bc0 4895 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4896
2e47b291
LC
4897 if (tb[RTA_MARK])
4898 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4899
622ec2c9
LC
4900 if (tb[RTA_UID])
4901 fl6.flowi6_uid = make_kuid(current_user_ns(),
4902 nla_get_u32(tb[RTA_UID]));
4903 else
4904 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4905
eacb9384
RP
4906 if (tb[RTA_SPORT])
4907 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4908
4909 if (tb[RTA_DPORT])
4910 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4911
4912 if (tb[RTA_IP_PROTO]) {
4913 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
5e1a99ea
HL
4914 &fl6.flowi6_proto, AF_INET6,
4915 extack);
eacb9384
RP
4916 if (err)
4917 goto errout;
4918 }
4919
1da177e4
LT
4920 if (iif) {
4921 struct net_device *dev;
72331bc0
SL
4922 int flags = 0;
4923
121622db
FW
4924 rcu_read_lock();
4925
4926 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4927 if (!dev) {
121622db 4928 rcu_read_unlock();
1da177e4 4929 err = -ENODEV;
ab364a6f 4930 goto errout;
1da177e4 4931 }
72331bc0
SL
4932
4933 fl6.flowi6_iif = iif;
4934
4935 if (!ipv6_addr_any(&fl6.saddr))
4936 flags |= RT6_LOOKUP_F_HAS_SADDR;
4937
b75cc8f9 4938 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4939
4940 rcu_read_unlock();
72331bc0
SL
4941 } else {
4942 fl6.flowi6_oif = oif;
4943
58acfd71 4944 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4945 }
4946
18c3a61c
RP
4947
4948 rt = container_of(dst, struct rt6_info, dst);
4949 if (rt->dst.error) {
4950 err = rt->dst.error;
4951 ip6_rt_put(rt);
4952 goto errout;
1da177e4
LT
4953 }
4954
9d6acb3b
WC
4955 if (rt == net->ipv6.ip6_null_entry) {
4956 err = rt->dst.error;
4957 ip6_rt_put(rt);
4958 goto errout;
4959 }
4960
ab364a6f 4961 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4962 if (!skb) {
94e187c0 4963 ip6_rt_put(rt);
ab364a6f
TG
4964 err = -ENOBUFS;
4965 goto errout;
4966 }
1da177e4 4967
d8d1f30b 4968 skb_dst_set(skb, &rt->dst);
a68886a6
DA
4969
4970 rcu_read_lock();
4971 from = rcu_dereference(rt->from);
4972
18c3a61c 4973 if (fibmatch)
a68886a6 4974 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
18c3a61c
RP
4975 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4976 nlh->nlmsg_seq, 0);
4977 else
a68886a6
DA
4978 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
4979 &fl6.saddr, iif, RTM_NEWROUTE,
d4ead6b3
DA
4980 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4981 0);
a68886a6
DA
4982 rcu_read_unlock();
4983
1da177e4 4984 if (err < 0) {
ab364a6f
TG
4985 kfree_skb(skb);
4986 goto errout;
1da177e4
LT
4987 }
4988
15e47304 4989 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4990errout:
1da177e4 4991 return err;
1da177e4
LT
4992}
4993
8d1c802b 4994void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 4995 unsigned int nlm_flags)
1da177e4
LT
4996{
4997 struct sk_buff *skb;
5578689a 4998 struct net *net = info->nl_net;
528c4ceb
DL
4999 u32 seq;
5000 int err;
5001
5002 err = -ENOBUFS;
38308473 5003 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 5004
19e42e45 5005 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 5006 if (!skb)
21713ebc
TG
5007 goto errout;
5008
d4ead6b3
DA
5009 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5010 event, info->portid, seq, nlm_flags);
26932566
PM
5011 if (err < 0) {
5012 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5013 WARN_ON(err == -EMSGSIZE);
5014 kfree_skb(skb);
5015 goto errout;
5016 }
15e47304 5017 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
5018 info->nlh, gfp_any());
5019 return;
21713ebc
TG
5020errout:
5021 if (err < 0)
5578689a 5022 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
5023}
5024
8ed67789 5025static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 5026 unsigned long event, void *ptr)
8ed67789 5027{
351638e7 5028 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 5029 struct net *net = dev_net(dev);
8ed67789 5030
242d3a49
WC
5031 if (!(dev->flags & IFF_LOOPBACK))
5032 return NOTIFY_OK;
5033
5034 if (event == NETDEV_REGISTER) {
ad1601ae 5035 net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
d8d1f30b 5036 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
5037 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5038#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 5039 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 5040 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 5041 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 5042 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 5043#endif
76da0704
WC
5044 } else if (event == NETDEV_UNREGISTER &&
5045 dev->reg_state != NETREG_UNREGISTERED) {
5046 /* NETDEV_UNREGISTER could be fired for multiple times by
5047 * netdev_wait_allrefs(). Make sure we only call this once.
5048 */
12d94a80 5049 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 5050#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
5051 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5052 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
5053#endif
5054 }
5055
5056 return NOTIFY_OK;
5057}
5058
1da177e4
LT
5059/*
5060 * /proc
5061 */
5062
5063#ifdef CONFIG_PROC_FS
1da177e4
LT
5064static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5065{
69ddb805 5066 struct net *net = (struct net *)seq->private;
1da177e4 5067 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
5068 net->ipv6.rt6_stats->fib_nodes,
5069 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 5070 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
5071 net->ipv6.rt6_stats->fib_rt_entries,
5072 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 5073 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 5074 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
5075
5076 return 0;
5077}
1da177e4
LT
5078#endif /* CONFIG_PROC_FS */
5079
5080#ifdef CONFIG_SYSCTL
5081
1da177e4 5082static
fe2c6338 5083int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
5084 void __user *buffer, size_t *lenp, loff_t *ppos)
5085{
c486da34
LAG
5086 struct net *net;
5087 int delay;
f0fb9b28 5088 int ret;
c486da34 5089 if (!write)
1da177e4 5090 return -EINVAL;
c486da34
LAG
5091
5092 net = (struct net *)ctl->extra1;
5093 delay = net->ipv6.sysctl.flush_delay;
f0fb9b28
AP
5094 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5095 if (ret)
5096 return ret;
5097
2ac3ac8f 5098 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 5099 return 0;
1da177e4
LT
5100}
5101
7c6bb7d2
DA
5102static int zero;
5103static int one = 1;
5104
ed792e28 5105static struct ctl_table ipv6_route_table_template[] = {
1ab1457c 5106 {
1da177e4 5107 .procname = "flush",
4990509f 5108 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 5109 .maxlen = sizeof(int),
89c8b3a1 5110 .mode = 0200,
6d9f239a 5111 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
5112 },
5113 {
1da177e4 5114 .procname = "gc_thresh",
9a7ec3a9 5115 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
5116 .maxlen = sizeof(int),
5117 .mode = 0644,
6d9f239a 5118 .proc_handler = proc_dointvec,
1da177e4
LT
5119 },
5120 {
1da177e4 5121 .procname = "max_size",
4990509f 5122 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
5123 .maxlen = sizeof(int),
5124 .mode = 0644,
6d9f239a 5125 .proc_handler = proc_dointvec,
1da177e4
LT
5126 },
5127 {
1da177e4 5128 .procname = "gc_min_interval",
4990509f 5129 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5130 .maxlen = sizeof(int),
5131 .mode = 0644,
6d9f239a 5132 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5133 },
5134 {
1da177e4 5135 .procname = "gc_timeout",
4990509f 5136 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5137 .maxlen = sizeof(int),
5138 .mode = 0644,
6d9f239a 5139 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5140 },
5141 {
1da177e4 5142 .procname = "gc_interval",
4990509f 5143 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5144 .maxlen = sizeof(int),
5145 .mode = 0644,
6d9f239a 5146 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5147 },
5148 {
1da177e4 5149 .procname = "gc_elasticity",
4990509f 5150 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5151 .maxlen = sizeof(int),
5152 .mode = 0644,
f3d3f616 5153 .proc_handler = proc_dointvec,
1da177e4
LT
5154 },
5155 {
1da177e4 5156 .procname = "mtu_expires",
4990509f 5157 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5158 .maxlen = sizeof(int),
5159 .mode = 0644,
6d9f239a 5160 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5161 },
5162 {
1da177e4 5163 .procname = "min_adv_mss",
4990509f 5164 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5165 .maxlen = sizeof(int),
5166 .mode = 0644,
f3d3f616 5167 .proc_handler = proc_dointvec,
1da177e4
LT
5168 },
5169 {
1da177e4 5170 .procname = "gc_min_interval_ms",
4990509f 5171 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5172 .maxlen = sizeof(int),
5173 .mode = 0644,
6d9f239a 5174 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5175 },
7c6bb7d2
DA
5176 {
5177 .procname = "skip_notify_on_dev_down",
5178 .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5179 .maxlen = sizeof(int),
5180 .mode = 0644,
5181 .proc_handler = proc_dointvec,
5182 .extra1 = &zero,
5183 .extra2 = &one,
5184 },
f8572d8f 5185 { }
1da177e4
LT
5186};
5187
2c8c1e72 5188struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5189{
5190 struct ctl_table *table;
5191
5192 table = kmemdup(ipv6_route_table_template,
5193 sizeof(ipv6_route_table_template),
5194 GFP_KERNEL);
5ee09105
YH
5195
5196 if (table) {
5197 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5198 table[0].extra1 = net;
86393e52 5199 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5200 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5201 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5202 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5203 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5204 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5205 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5206 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5207 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
7c6bb7d2 5208 table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
464dc801
EB
5209
5210 /* Don't export sysctls to unprivileged users */
5211 if (net->user_ns != &init_user_ns)
5212 table[0].procname = NULL;
5ee09105
YH
5213 }
5214
760f2d01
DL
5215 return table;
5216}
1da177e4
LT
5217#endif
5218
2c8c1e72 5219static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5220{
633d424b 5221 int ret = -ENOMEM;
8ed67789 5222
86393e52
AD
5223 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5224 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5225
fc66f95c
ED
5226 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5227 goto out_ip6_dst_ops;
5228
421842ed
DA
5229 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5230 sizeof(*net->ipv6.fib6_null_entry),
5231 GFP_KERNEL);
5232 if (!net->ipv6.fib6_null_entry)
5233 goto out_ip6_dst_entries;
5234
8ed67789
DL
5235 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5236 sizeof(*net->ipv6.ip6_null_entry),
5237 GFP_KERNEL);
5238 if (!net->ipv6.ip6_null_entry)
421842ed 5239 goto out_fib6_null_entry;
d8d1f30b 5240 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5241 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5242 ip6_template_metrics, true);
8ed67789
DL
5243
5244#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5245 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5246 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5247 sizeof(*net->ipv6.ip6_prohibit_entry),
5248 GFP_KERNEL);
68fffc67
PZ
5249 if (!net->ipv6.ip6_prohibit_entry)
5250 goto out_ip6_null_entry;
d8d1f30b 5251 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5252 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5253 ip6_template_metrics, true);
8ed67789
DL
5254
5255 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5256 sizeof(*net->ipv6.ip6_blk_hole_entry),
5257 GFP_KERNEL);
68fffc67
PZ
5258 if (!net->ipv6.ip6_blk_hole_entry)
5259 goto out_ip6_prohibit_entry;
d8d1f30b 5260 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5261 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5262 ip6_template_metrics, true);
8ed67789
DL
5263#endif
5264
b339a47c
PZ
5265 net->ipv6.sysctl.flush_delay = 0;
5266 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5267 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5268 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5269 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5270 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5271 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5272 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
7c6bb7d2 5273 net->ipv6.sysctl.skip_notify_on_dev_down = 0;
b339a47c 5274
6891a346
BT
5275 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5276
8ed67789
DL
5277 ret = 0;
5278out:
5279 return ret;
f2fc6a54 5280
68fffc67
PZ
5281#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5282out_ip6_prohibit_entry:
5283 kfree(net->ipv6.ip6_prohibit_entry);
5284out_ip6_null_entry:
5285 kfree(net->ipv6.ip6_null_entry);
5286#endif
421842ed
DA
5287out_fib6_null_entry:
5288 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5289out_ip6_dst_entries:
5290 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5291out_ip6_dst_ops:
f2fc6a54 5292 goto out;
cdb18761
DL
5293}
5294
2c8c1e72 5295static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5296{
421842ed 5297 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5298 kfree(net->ipv6.ip6_null_entry);
5299#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5300 kfree(net->ipv6.ip6_prohibit_entry);
5301 kfree(net->ipv6.ip6_blk_hole_entry);
5302#endif
41bb78b4 5303 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5304}
5305
d189634e
TG
5306static int __net_init ip6_route_net_init_late(struct net *net)
5307{
5308#ifdef CONFIG_PROC_FS
c3506372
CH
5309 proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5310 sizeof(struct ipv6_route_iter));
3617d949
CH
5311 proc_create_net_single("rt6_stats", 0444, net->proc_net,
5312 rt6_stats_seq_show, NULL);
d189634e
TG
5313#endif
5314 return 0;
5315}
5316
5317static void __net_exit ip6_route_net_exit_late(struct net *net)
5318{
5319#ifdef CONFIG_PROC_FS
ece31ffd
G
5320 remove_proc_entry("ipv6_route", net->proc_net);
5321 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5322#endif
5323}
5324
cdb18761
DL
5325static struct pernet_operations ip6_route_net_ops = {
5326 .init = ip6_route_net_init,
5327 .exit = ip6_route_net_exit,
5328};
5329
c3426b47
DM
5330static int __net_init ipv6_inetpeer_init(struct net *net)
5331{
5332 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5333
5334 if (!bp)
5335 return -ENOMEM;
5336 inet_peer_base_init(bp);
5337 net->ipv6.peers = bp;
5338 return 0;
5339}
5340
5341static void __net_exit ipv6_inetpeer_exit(struct net *net)
5342{
5343 struct inet_peer_base *bp = net->ipv6.peers;
5344
5345 net->ipv6.peers = NULL;
56a6b248 5346 inetpeer_invalidate_tree(bp);
c3426b47
DM
5347 kfree(bp);
5348}
5349
2b823f72 5350static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5351 .init = ipv6_inetpeer_init,
5352 .exit = ipv6_inetpeer_exit,
5353};
5354
d189634e
TG
5355static struct pernet_operations ip6_route_net_late_ops = {
5356 .init = ip6_route_net_init_late,
5357 .exit = ip6_route_net_exit_late,
5358};
5359
8ed67789
DL
5360static struct notifier_block ip6_route_dev_notifier = {
5361 .notifier_call = ip6_route_dev_notify,
242d3a49 5362 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5363};
5364
2f460933
WC
5365void __init ip6_route_init_special_entries(void)
5366{
5367 /* Registering of the loopback is done before this portion of code,
5368 * the loopback reference in rt6_info will not be taken, do it
5369 * manually for init_net */
ad1601ae 5370 init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
2f460933
WC
5371 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5372 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5373 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5374 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5375 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5376 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5377 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5378 #endif
5379}
5380
433d49c3 5381int __init ip6_route_init(void)
1da177e4 5382{
433d49c3 5383 int ret;
8d0b94af 5384 int cpu;
433d49c3 5385
9a7ec3a9
DL
5386 ret = -ENOMEM;
5387 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5388 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5389 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5390 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5391 goto out;
14e50e57 5392
fc66f95c 5393 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5394 if (ret)
bdb3289f 5395 goto out_kmem_cache;
bdb3289f 5396
c3426b47
DM
5397 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5398 if (ret)
e8803b6c 5399 goto out_dst_entries;
2a0c451a 5400
7e52b33b
DM
5401 ret = register_pernet_subsys(&ip6_route_net_ops);
5402 if (ret)
5403 goto out_register_inetpeer;
c3426b47 5404
5dc121e9
AE
5405 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5406
e8803b6c 5407 ret = fib6_init();
433d49c3 5408 if (ret)
8ed67789 5409 goto out_register_subsys;
433d49c3 5410
433d49c3
DL
5411 ret = xfrm6_init();
5412 if (ret)
e8803b6c 5413 goto out_fib6_init;
c35b7e72 5414
433d49c3
DL
5415 ret = fib6_rules_init();
5416 if (ret)
5417 goto xfrm6_init;
7e5449c2 5418
d189634e
TG
5419 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5420 if (ret)
5421 goto fib6_rules_init;
5422
16feebcf
FW
5423 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5424 inet6_rtm_newroute, NULL, 0);
5425 if (ret < 0)
5426 goto out_register_late_subsys;
5427
5428 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5429 inet6_rtm_delroute, NULL, 0);
5430 if (ret < 0)
5431 goto out_register_late_subsys;
5432
5433 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5434 inet6_rtm_getroute, NULL,
5435 RTNL_FLAG_DOIT_UNLOCKED);
5436 if (ret < 0)
d189634e 5437 goto out_register_late_subsys;
c127ea2c 5438
8ed67789 5439 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5440 if (ret)
d189634e 5441 goto out_register_late_subsys;
8ed67789 5442
8d0b94af
MKL
5443 for_each_possible_cpu(cpu) {
5444 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5445
5446 INIT_LIST_HEAD(&ul->head);
5447 spin_lock_init(&ul->lock);
5448 }
5449
433d49c3
DL
5450out:
5451 return ret;
5452
d189634e 5453out_register_late_subsys:
16feebcf 5454 rtnl_unregister_all(PF_INET6);
d189634e 5455 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5456fib6_rules_init:
433d49c3
DL
5457 fib6_rules_cleanup();
5458xfrm6_init:
433d49c3 5459 xfrm6_fini();
2a0c451a
TG
5460out_fib6_init:
5461 fib6_gc_cleanup();
8ed67789
DL
5462out_register_subsys:
5463 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5464out_register_inetpeer:
5465 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5466out_dst_entries:
5467 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5468out_kmem_cache:
f2fc6a54 5469 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5470 goto out;
1da177e4
LT
5471}
5472
5473void ip6_route_cleanup(void)
5474{
8ed67789 5475 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5476 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5477 fib6_rules_cleanup();
1da177e4 5478 xfrm6_fini();
1da177e4 5479 fib6_gc_cleanup();
c3426b47 5480 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5481 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5482 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5483 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5484}