ipv6: Pass fib6_result to fib lookups
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
eacb9384 66#include <net/ip.h>
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
30d444d3
DA
73static int ip6_rt_type_to_error(u8 fib6_type);
74
75#define CREATE_TRACE_POINTS
76#include <trace/events/fib6.h>
77EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78#undef CREATE_TRACE_POINTS
79
afc154e9 80enum rt6_nud_state {
7e980569
JB
81 RT6_NUD_FAIL_HARD = -3,
82 RT6_NUD_FAIL_PROBE = -2,
83 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
84 RT6_NUD_SUCCEED = 1
85};
86
1da177e4 87static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 88static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 89static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
569d3645 94static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
95
96static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 97static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 98static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 99static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 100static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
101static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102 struct sk_buff *skb, u32 mtu);
103static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
104 struct sk_buff *skb);
702cea56
DA
105static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
106 int strict);
8d1c802b 107static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 108static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 109 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 110 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
111 int iif, int type, u32 portid, u32 seq,
112 unsigned int flags);
7e4b5128 113static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
35732d01
WW
114 struct in6_addr *daddr,
115 struct in6_addr *saddr);
1da177e4 116
70ceb4f5 117#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 118static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 119 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
120 const struct in6_addr *gwaddr,
121 struct net_device *dev,
95c96174 122 unsigned int pref);
8d1c802b 123static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 124 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
125 const struct in6_addr *gwaddr,
126 struct net_device *dev);
70ceb4f5
YH
127#endif
128
8d0b94af
MKL
129struct uncached_list {
130 spinlock_t lock;
131 struct list_head head;
132};
133
134static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
135
510c321b 136void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
137{
138 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
139
8d0b94af
MKL
140 rt->rt6i_uncached_list = ul;
141
142 spin_lock_bh(&ul->lock);
143 list_add_tail(&rt->rt6i_uncached, &ul->head);
144 spin_unlock_bh(&ul->lock);
145}
146
510c321b 147void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
148{
149 if (!list_empty(&rt->rt6i_uncached)) {
150 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 151 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
152
153 spin_lock_bh(&ul->lock);
154 list_del(&rt->rt6i_uncached);
81eb8447 155 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
156 spin_unlock_bh(&ul->lock);
157 }
158}
159
160static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
161{
162 struct net_device *loopback_dev = net->loopback_dev;
163 int cpu;
164
e332bc67
EB
165 if (dev == loopback_dev)
166 return;
167
8d0b94af
MKL
168 for_each_possible_cpu(cpu) {
169 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
170 struct rt6_info *rt;
171
172 spin_lock_bh(&ul->lock);
173 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
174 struct inet6_dev *rt_idev = rt->rt6i_idev;
175 struct net_device *rt_dev = rt->dst.dev;
176
e332bc67 177 if (rt_idev->dev == dev) {
8d0b94af
MKL
178 rt->rt6i_idev = in6_dev_get(loopback_dev);
179 in6_dev_put(rt_idev);
180 }
181
e332bc67 182 if (rt_dev == dev) {
8d0b94af
MKL
183 rt->dst.dev = loopback_dev;
184 dev_hold(rt->dst.dev);
185 dev_put(rt_dev);
186 }
187 }
188 spin_unlock_bh(&ul->lock);
189 }
190}
191
f8a1b43b 192static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
193 struct sk_buff *skb,
194 const void *daddr)
39232973 195{
a7563f34 196 if (!ipv6_addr_any(p))
39232973 197 return (const void *) p;
f894cbf8
DM
198 else if (skb)
199 return &ipv6_hdr(skb)->daddr;
39232973
DM
200 return daddr;
201}
202
f8a1b43b
DA
203struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
204 struct net_device *dev,
205 struct sk_buff *skb,
206 const void *daddr)
d3aaeb38 207{
39232973
DM
208 struct neighbour *n;
209
f8a1b43b
DA
210 daddr = choose_neigh_daddr(gw, skb, daddr);
211 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
212 if (n)
213 return n;
7adf3246
SB
214
215 n = neigh_create(&nd_tbl, daddr, dev);
216 return IS_ERR(n) ? NULL : n;
f8a1b43b
DA
217}
218
219static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
220 struct sk_buff *skb,
221 const void *daddr)
222{
223 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
224
225 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
226}
227
63fca65d
JA
228static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
229{
230 struct net_device *dev = dst->dev;
231 struct rt6_info *rt = (struct rt6_info *)dst;
232
f8a1b43b 233 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
234 if (!daddr)
235 return;
236 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
237 return;
238 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
239 return;
240 __ipv6_confirm_neigh(dev, daddr);
241}
242
9a7ec3a9 243static struct dst_ops ip6_dst_ops_template = {
1da177e4 244 .family = AF_INET6,
1da177e4
LT
245 .gc = ip6_dst_gc,
246 .gc_thresh = 1024,
247 .check = ip6_dst_check,
0dbaee3b 248 .default_advmss = ip6_default_advmss,
ebb762f2 249 .mtu = ip6_mtu,
d4ead6b3 250 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
251 .destroy = ip6_dst_destroy,
252 .ifdown = ip6_dst_ifdown,
253 .negative_advice = ip6_negative_advice,
254 .link_failure = ip6_link_failure,
255 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 256 .redirect = rt6_do_redirect,
9f8955cc 257 .local_out = __ip6_local_out,
f8a1b43b 258 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 259 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
260};
261
ebb762f2 262static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 263{
618f9bc7
SK
264 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
265
266 return mtu ? : dst->dev->mtu;
ec831ea7
RD
267}
268
6700c270
DM
269static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
270 struct sk_buff *skb, u32 mtu)
14e50e57
DM
271{
272}
273
6700c270
DM
274static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
275 struct sk_buff *skb)
b587ee3b
DM
276{
277}
278
14e50e57
DM
279static struct dst_ops ip6_dst_blackhole_ops = {
280 .family = AF_INET6,
14e50e57
DM
281 .destroy = ip6_dst_destroy,
282 .check = ip6_dst_check,
ebb762f2 283 .mtu = ip6_blackhole_mtu,
214f45c9 284 .default_advmss = ip6_default_advmss,
14e50e57 285 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 286 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 287 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 288 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
289};
290
62fa8a84 291static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 292 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
293};
294
8d1c802b 295static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
296 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
297 .fib6_protocol = RTPROT_KERNEL,
298 .fib6_metric = ~(u32)0,
299 .fib6_ref = ATOMIC_INIT(1),
421842ed
DA
300 .fib6_type = RTN_UNREACHABLE,
301 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
302};
303
fb0af4c7 304static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
305 .dst = {
306 .__refcnt = ATOMIC_INIT(1),
307 .__use = 1,
2c20cbd7 308 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 309 .error = -ENETUNREACH,
d8d1f30b
CG
310 .input = ip6_pkt_discard,
311 .output = ip6_pkt_discard_out,
1da177e4
LT
312 },
313 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
314};
315
101367c2
TG
316#ifdef CONFIG_IPV6_MULTIPLE_TABLES
317
fb0af4c7 318static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
319 .dst = {
320 .__refcnt = ATOMIC_INIT(1),
321 .__use = 1,
2c20cbd7 322 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 323 .error = -EACCES,
d8d1f30b
CG
324 .input = ip6_pkt_prohibit,
325 .output = ip6_pkt_prohibit_out,
101367c2
TG
326 },
327 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
328};
329
fb0af4c7 330static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
331 .dst = {
332 .__refcnt = ATOMIC_INIT(1),
333 .__use = 1,
2c20cbd7 334 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 335 .error = -EINVAL,
d8d1f30b 336 .input = dst_discard,
ede2059d 337 .output = dst_discard_out,
101367c2
TG
338 },
339 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
340};
341
342#endif
343
ebfa45f0
MKL
344static void rt6_info_init(struct rt6_info *rt)
345{
346 struct dst_entry *dst = &rt->dst;
347
348 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
349 INIT_LIST_HEAD(&rt->rt6i_uncached);
350}
351
1da177e4 352/* allocate dst with ip6_dst_ops */
93531c67
DA
353struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
354 int flags)
1da177e4 355{
97bab73f 356 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 357 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 358
81eb8447 359 if (rt) {
ebfa45f0 360 rt6_info_init(rt);
81eb8447
WW
361 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
362 }
8104891b 363
cf911662 364 return rt;
1da177e4 365}
9ab179d8 366EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 367
1da177e4
LT
368static void ip6_dst_destroy(struct dst_entry *dst)
369{
370 struct rt6_info *rt = (struct rt6_info *)dst;
a68886a6 371 struct fib6_info *from;
8d0b94af 372 struct inet6_dev *idev;
1da177e4 373
1620a336 374 ip_dst_metrics_put(dst);
8d0b94af
MKL
375 rt6_uncached_list_del(rt);
376
377 idev = rt->rt6i_idev;
38308473 378 if (idev) {
1da177e4
LT
379 rt->rt6i_idev = NULL;
380 in6_dev_put(idev);
1ab1457c 381 }
1716a961 382
a68886a6
DA
383 rcu_read_lock();
384 from = rcu_dereference(rt->from);
385 rcu_assign_pointer(rt->from, NULL);
93531c67 386 fib6_info_release(from);
a68886a6 387 rcu_read_unlock();
b3419363
DM
388}
389
1da177e4
LT
390static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
391 int how)
392{
393 struct rt6_info *rt = (struct rt6_info *)dst;
394 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 395 struct net_device *loopback_dev =
c346dca1 396 dev_net(dev)->loopback_dev;
1da177e4 397
e5645f51
WW
398 if (idev && idev->dev != loopback_dev) {
399 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
400 if (loopback_idev) {
401 rt->rt6i_idev = loopback_idev;
402 in6_dev_put(idev);
97cac082 403 }
1da177e4
LT
404 }
405}
406
5973fb1e
MKL
407static bool __rt6_check_expired(const struct rt6_info *rt)
408{
409 if (rt->rt6i_flags & RTF_EXPIRES)
410 return time_after(jiffies, rt->dst.expires);
411 else
412 return false;
413}
414
a50feda5 415static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 416{
a68886a6
DA
417 struct fib6_info *from;
418
419 from = rcu_dereference(rt->from);
420
1716a961
G
421 if (rt->rt6i_flags & RTF_EXPIRES) {
422 if (time_after(jiffies, rt->dst.expires))
a50feda5 423 return true;
a68886a6 424 } else if (from) {
1e2ea8ad 425 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 426 fib6_check_expired(from);
1716a961 427 }
a50feda5 428 return false;
1da177e4
LT
429}
430
b1d40991
DA
431void fib6_select_path(const struct net *net, struct fib6_result *res,
432 struct flowi6 *fl6, int oif, bool have_oif_match,
433 const struct sk_buff *skb, int strict)
51ebd318 434{
8d1c802b 435 struct fib6_info *sibling, *next_sibling;
b1d40991
DA
436 struct fib6_info *match = res->f6i;
437
438 if (!match->fib6_nsiblings || have_oif_match)
439 goto out;
51ebd318 440
b673d6cc
JS
441 /* We might have already computed the hash for ICMPv6 errors. In such
442 * case it will always be non-zero. Otherwise now is the time to do it.
443 */
444 if (!fl6->mp_hash)
b4bac172 445 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 446
ad1601ae 447 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
b1d40991 448 goto out;
3d709f69 449
93c2fb25
DA
450 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
451 fib6_siblings) {
702cea56 452 const struct fib6_nh *nh = &sibling->fib6_nh;
5e670d84
DA
453 int nh_upper_bound;
454
702cea56 455 nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
5e670d84 456 if (fl6->mp_hash > nh_upper_bound)
3d709f69 457 continue;
702cea56 458 if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
3d709f69
IS
459 break;
460 match = sibling;
461 break;
462 }
463
b1d40991
DA
464out:
465 res->f6i = match;
466 res->nh = &match->fib6_nh;
51ebd318
ND
467}
468
1da177e4 469/*
66f5d6ce 470 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
471 */
472
0c59d006
DA
473static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
474 const struct in6_addr *saddr, int oif, int flags)
475{
476 const struct net_device *dev;
477
478 if (nh->fib_nh_flags & RTNH_F_DEAD)
479 return false;
480
481 dev = nh->fib_nh_dev;
482 if (oif) {
483 if (dev->ifindex == oif)
484 return true;
485 } else {
486 if (ipv6_chk_addr(net, saddr, dev,
487 flags & RT6_LOOKUP_F_IFACE))
488 return true;
489 }
490
491 return false;
492}
493
75ef7389
DA
494static void rt6_device_match(struct net *net, struct fib6_result *res,
495 const struct in6_addr *saddr, int oif, int flags)
1da177e4 496{
75ef7389
DA
497 struct fib6_info *f6i = res->f6i;
498 struct fib6_info *spf6i;
499 struct fib6_nh *nh;
1da177e4 500
75ef7389
DA
501 if (!oif && ipv6_addr_any(saddr)) {
502 nh = &f6i->fib6_nh;
503 if (!(nh->fib_nh_flags & RTNH_F_DEAD)) {
504 res->nh = nh;
505 return;
506 }
507 }
dd3abc4e 508
75ef7389
DA
509 for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
510 nh = &spf6i->fib6_nh;
511 if (__rt6_device_match(net, nh, saddr, oif, flags)) {
512 res->f6i = spf6i;
513 res->nh = nh;
514 }
dd3abc4e 515 }
1da177e4 516
75ef7389
DA
517 if (oif && flags & RT6_LOOKUP_F_IFACE) {
518 res->f6i = net->ipv6.fib6_null_entry;
519 res->nh = &res->f6i->fib6_nh;
520 return;
521 }
8067bb8c 522
75ef7389
DA
523 res->nh = &f6i->fib6_nh;
524 if (res->nh->fib_nh_flags & RTNH_F_DEAD) {
525 res->f6i = net->ipv6.fib6_null_entry;
526 res->nh = &res->f6i->fib6_nh;
527 }
1da177e4
LT
528}
529
27097255 530#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
531struct __rt6_probe_work {
532 struct work_struct work;
533 struct in6_addr target;
534 struct net_device *dev;
535};
536
537static void rt6_probe_deferred(struct work_struct *w)
538{
539 struct in6_addr mcaddr;
540 struct __rt6_probe_work *work =
541 container_of(w, struct __rt6_probe_work, work);
542
543 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 544 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 545 dev_put(work->dev);
662f5533 546 kfree(work);
c2f17e82
HFS
547}
548
cc3a86c8 549static void rt6_probe(struct fib6_nh *fib6_nh)
27097255 550{
f547fac6 551 struct __rt6_probe_work *work = NULL;
5e670d84 552 const struct in6_addr *nh_gw;
f2c31e32 553 struct neighbour *neigh;
5e670d84 554 struct net_device *dev;
f547fac6 555 struct inet6_dev *idev;
5e670d84 556
27097255
YH
557 /*
558 * Okay, this does not seem to be appropriate
559 * for now, however, we need to check if it
560 * is really so; aka Router Reachability Probing.
561 *
562 * Router Reachability Probe MUST be rate-limited
563 * to no more than one per minute.
564 */
cc3a86c8 565 if (fib6_nh->fib_nh_gw_family)
7ff74a59 566 return;
5e670d84 567
cc3a86c8
DA
568 nh_gw = &fib6_nh->fib_nh_gw6;
569 dev = fib6_nh->fib_nh_dev;
2152caea 570 rcu_read_lock_bh();
f547fac6 571 idev = __in6_dev_get(dev);
5e670d84 572 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 573 if (neigh) {
8d6c31bf
MKL
574 if (neigh->nud_state & NUD_VALID)
575 goto out;
576
2152caea 577 write_lock(&neigh->lock);
990edb42
MKL
578 if (!(neigh->nud_state & NUD_VALID) &&
579 time_after(jiffies,
dcd1f572 580 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
581 work = kmalloc(sizeof(*work), GFP_ATOMIC);
582 if (work)
583 __neigh_set_probe_once(neigh);
c2f17e82 584 }
2152caea 585 write_unlock(&neigh->lock);
cc3a86c8 586 } else if (time_after(jiffies, fib6_nh->last_probe +
f547fac6 587 idev->cnf.rtr_probe_interval)) {
990edb42 588 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 589 }
990edb42
MKL
590
591 if (work) {
cc3a86c8 592 fib6_nh->last_probe = jiffies;
990edb42 593 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
594 work->target = *nh_gw;
595 dev_hold(dev);
596 work->dev = dev;
990edb42
MKL
597 schedule_work(&work->work);
598 }
599
8d6c31bf 600out:
2152caea 601 rcu_read_unlock_bh();
27097255
YH
602}
603#else
cc3a86c8 604static inline void rt6_probe(struct fib6_nh *fib6_nh)
27097255 605{
27097255
YH
606}
607#endif
608
1da177e4 609/*
554cfb7e 610 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 611 */
1ba9a895 612static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
1da177e4 613{
afc154e9 614 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 615 struct neighbour *neigh;
f2c31e32 616
145a3621 617 rcu_read_lock_bh();
1ba9a895
DA
618 neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
619 &fib6_nh->fib_nh_gw6);
145a3621
YH
620 if (neigh) {
621 read_lock(&neigh->lock);
554cfb7e 622 if (neigh->nud_state & NUD_VALID)
afc154e9 623 ret = RT6_NUD_SUCCEED;
398bcbeb 624#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 625 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 626 ret = RT6_NUD_SUCCEED;
7e980569
JB
627 else
628 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 629#endif
145a3621 630 read_unlock(&neigh->lock);
afc154e9
HFS
631 } else {
632 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 633 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 634 }
145a3621
YH
635 rcu_read_unlock_bh();
636
a5a81f0b 637 return ret;
1da177e4
LT
638}
639
702cea56
DA
640static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
641 int strict)
1da177e4 642{
6e1809a5
DA
643 int m = 0;
644
645 if (!oif || nh->fib_nh_dev->ifindex == oif)
646 m = 2;
1ab1457c 647
77d16f45 648 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 649 return RT6_NUD_FAIL_HARD;
ebacaaa0 650#ifdef CONFIG_IPV6_ROUTER_PREF
702cea56 651 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
ebacaaa0 652#endif
1ba9a895 653 if ((strict & RT6_LOOKUP_F_REACHABLE) &&
702cea56 654 !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
1ba9a895 655 int n = rt6_check_neigh(nh);
afc154e9
HFS
656 if (n < 0)
657 return n;
658 }
554cfb7e
YH
659 return m;
660}
661
28679ed1
DA
662static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
663 int oif, int strict, int *mpri, bool *do_rr)
554cfb7e 664{
afc154e9 665 bool match_do_rr = false;
28679ed1
DA
666 bool rc = false;
667 int m;
35103d11 668
28679ed1 669 if (nh->fib_nh_flags & RTNH_F_DEAD)
8067bb8c
IS
670 goto out;
671
28679ed1
DA
672 if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
673 nh->fib_nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 674 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 675 goto out;
f11e6659 676
28679ed1 677 m = rt6_score_route(nh, fib6_flags, oif, strict);
7e980569 678 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
679 match_do_rr = true;
680 m = 0; /* lowest valid score */
7e980569 681 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 682 goto out;
afc154e9
HFS
683 }
684
685 if (strict & RT6_LOOKUP_F_REACHABLE)
28679ed1 686 rt6_probe(nh);
f11e6659 687
7e980569 688 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 689 if (m > *mpri) {
afc154e9 690 *do_rr = match_do_rr;
f11e6659 691 *mpri = m;
28679ed1 692 rc = true;
f11e6659 693 }
f11e6659 694out:
28679ed1 695 return rc;
f11e6659
DM
696}
697
b7bc4b6a 698static void __find_rr_leaf(struct fib6_info *f6i_start,
30c15f03 699 struct fib6_info *nomatch, u32 metric,
b7bc4b6a 700 struct fib6_result *res, struct fib6_info **cont,
30c15f03 701 int oif, int strict, bool *do_rr, int *mpri)
f11e6659 702{
b7bc4b6a 703 struct fib6_info *f6i;
1da177e4 704
b7bc4b6a
DA
705 for (f6i = f6i_start;
706 f6i && f6i != nomatch;
707 f6i = rcu_dereference(f6i->fib6_next)) {
30c15f03
DA
708 struct fib6_nh *nh;
709
b7bc4b6a
DA
710 if (cont && f6i->fib6_metric != metric) {
711 *cont = f6i;
30c15f03 712 return;
9fbdcfaf
SK
713 }
714
b7bc4b6a 715 if (fib6_check_expired(f6i))
28679ed1
DA
716 continue;
717
b7bc4b6a
DA
718 nh = &f6i->fib6_nh;
719 if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) {
720 res->f6i = f6i;
721 res->nh = nh;
722 }
9fbdcfaf 723 }
30c15f03 724}
9fbdcfaf 725
b7bc4b6a
DA
726static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
727 struct fib6_info *rr_head, int oif, int strict,
728 bool *do_rr, struct fib6_result *res)
30c15f03 729{
b7bc4b6a
DA
730 u32 metric = rr_head->fib6_metric;
731 struct fib6_info *cont = NULL;
30c15f03 732 int mpri = -1;
9fbdcfaf 733
b7bc4b6a 734 __find_rr_leaf(rr_head, NULL, metric, res, &cont,
30c15f03 735 oif, strict, do_rr, &mpri);
28679ed1 736
b7bc4b6a 737 __find_rr_leaf(leaf, rr_head, metric, res, &cont,
30c15f03 738 oif, strict, do_rr, &mpri);
9fbdcfaf 739
b7bc4b6a
DA
740 if (res->f6i || !cont)
741 return;
9fbdcfaf 742
b7bc4b6a 743 __find_rr_leaf(cont, NULL, metric, res, NULL,
30c15f03 744 oif, strict, do_rr, &mpri);
f11e6659 745}
1da177e4 746
b7bc4b6a
DA
747static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
748 struct fib6_result *res, int strict)
f11e6659 749{
8d1c802b 750 struct fib6_info *leaf = rcu_dereference(fn->leaf);
b7bc4b6a 751 struct fib6_info *rt0;
afc154e9 752 bool do_rr = false;
17ecf590 753 int key_plen;
1da177e4 754
b7bc4b6a
DA
755 /* make sure this function or its helpers sets f6i */
756 res->f6i = NULL;
757
421842ed 758 if (!leaf || leaf == net->ipv6.fib6_null_entry)
b7bc4b6a 759 goto out;
8d1040e8 760
66f5d6ce 761 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 762 if (!rt0)
66f5d6ce 763 rt0 = leaf;
1da177e4 764
17ecf590
WW
765 /* Double check to make sure fn is not an intermediate node
766 * and fn->leaf does not points to its child's leaf
767 * (This might happen if all routes under fn are deleted from
768 * the tree and fib6_repair_tree() is called on the node.)
769 */
93c2fb25 770 key_plen = rt0->fib6_dst.plen;
17ecf590 771#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
772 if (rt0->fib6_src.plen)
773 key_plen = rt0->fib6_src.plen;
17ecf590
WW
774#endif
775 if (fn->fn_bit != key_plen)
b7bc4b6a 776 goto out;
1da177e4 777
b7bc4b6a 778 find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
afc154e9 779 if (do_rr) {
8fb11a9a 780 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
f11e6659 781
554cfb7e 782 /* no entries matched; do round-robin */
93c2fb25 783 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 784 next = leaf;
f11e6659 785
66f5d6ce 786 if (next != rt0) {
93c2fb25 787 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 788 /* make sure next is not being deleted from the tree */
93c2fb25 789 if (next->fib6_node)
66f5d6ce 790 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 791 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 792 }
1da177e4 793 }
1da177e4 794
b7bc4b6a
DA
795out:
796 if (!res->f6i) {
797 res->f6i = net->ipv6.fib6_null_entry;
798 res->nh = &res->f6i->fib6_nh;
799 }
1da177e4
LT
800}
801
85bd05de 802static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
8b9df265 803{
85bd05de
DA
804 return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
805 res->nh->fib_nh_gw_family;
8b9df265
MKL
806}
807
70ceb4f5
YH
808#ifdef CONFIG_IPV6_ROUTE_INFO
809int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 810 const struct in6_addr *gwaddr)
70ceb4f5 811{
c346dca1 812 struct net *net = dev_net(dev);
70ceb4f5
YH
813 struct route_info *rinfo = (struct route_info *) opt;
814 struct in6_addr prefix_buf, *prefix;
815 unsigned int pref;
4bed72e4 816 unsigned long lifetime;
8d1c802b 817 struct fib6_info *rt;
70ceb4f5
YH
818
819 if (len < sizeof(struct route_info)) {
820 return -EINVAL;
821 }
822
823 /* Sanity check for prefix_len and length */
824 if (rinfo->length > 3) {
825 return -EINVAL;
826 } else if (rinfo->prefix_len > 128) {
827 return -EINVAL;
828 } else if (rinfo->prefix_len > 64) {
829 if (rinfo->length < 2) {
830 return -EINVAL;
831 }
832 } else if (rinfo->prefix_len > 0) {
833 if (rinfo->length < 1) {
834 return -EINVAL;
835 }
836 }
837
838 pref = rinfo->route_pref;
839 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 840 return -EINVAL;
70ceb4f5 841
4bed72e4 842 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
843
844 if (rinfo->length == 3)
845 prefix = (struct in6_addr *)rinfo->prefix;
846 else {
847 /* this function is safe */
848 ipv6_addr_prefix(&prefix_buf,
849 (struct in6_addr *)rinfo->prefix,
850 rinfo->prefix_len);
851 prefix = &prefix_buf;
852 }
853
f104a567 854 if (rinfo->prefix_len == 0)
afb1d4b5 855 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
856 else
857 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 858 gwaddr, dev);
70ceb4f5
YH
859
860 if (rt && !lifetime) {
afb1d4b5 861 ip6_del_rt(net, rt);
70ceb4f5
YH
862 rt = NULL;
863 }
864
865 if (!rt && lifetime)
830218c1
DA
866 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
867 dev, pref);
70ceb4f5 868 else if (rt)
93c2fb25
DA
869 rt->fib6_flags = RTF_ROUTEINFO |
870 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
871
872 if (rt) {
1716a961 873 if (!addrconf_finite_timeout(lifetime))
14895687 874 fib6_clean_expires(rt);
1716a961 875 else
14895687 876 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 877
93531c67 878 fib6_info_release(rt);
70ceb4f5
YH
879 }
880 return 0;
881}
882#endif
883
ae90d867
DA
884/*
885 * Misc support functions
886 */
887
888/* called with rcu_lock held */
0d161581 889static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
ae90d867 890{
0d161581
DA
891 struct net_device *dev = res->nh->fib_nh_dev;
892 const struct fib6_info *f6i = res->f6i;
ae90d867 893
0d161581 894 if (f6i->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
895 /* for copies of local routes, dst->dev needs to be the
896 * device if it is a master device, the master device if
897 * device is enslaved, and the loopback as the default
898 */
899 if (netif_is_l3_slave(dev) &&
0d161581 900 !rt6_need_strict(&f6i->fib6_dst.addr))
ae90d867
DA
901 dev = l3mdev_master_dev_rcu(dev);
902 else if (!netif_is_l3_master(dev))
903 dev = dev_net(dev)->loopback_dev;
904 /* last case is netif_is_l3_master(dev) is true in which
905 * case we want dev returned to be dev
906 */
907 }
908
909 return dev;
910}
911
6edb3c96
DA
912static const int fib6_prop[RTN_MAX + 1] = {
913 [RTN_UNSPEC] = 0,
914 [RTN_UNICAST] = 0,
915 [RTN_LOCAL] = 0,
916 [RTN_BROADCAST] = 0,
917 [RTN_ANYCAST] = 0,
918 [RTN_MULTICAST] = 0,
919 [RTN_BLACKHOLE] = -EINVAL,
920 [RTN_UNREACHABLE] = -EHOSTUNREACH,
921 [RTN_PROHIBIT] = -EACCES,
922 [RTN_THROW] = -EAGAIN,
923 [RTN_NAT] = -EINVAL,
924 [RTN_XRESOLVE] = -EINVAL,
925};
926
927static int ip6_rt_type_to_error(u8 fib6_type)
928{
929 return fib6_prop[fib6_type];
930}
931
8d1c802b 932static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
933{
934 unsigned short flags = 0;
935
936 if (rt->dst_nocount)
937 flags |= DST_NOCOUNT;
938 if (rt->dst_nopolicy)
939 flags |= DST_NOPOLICY;
940 if (rt->dst_host)
941 flags |= DST_HOST;
942
943 return flags;
944}
945
8d1c802b 946static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96
DA
947{
948 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
949
950 switch (ort->fib6_type) {
951 case RTN_BLACKHOLE:
952 rt->dst.output = dst_discard_out;
953 rt->dst.input = dst_discard;
954 break;
955 case RTN_PROHIBIT:
956 rt->dst.output = ip6_pkt_prohibit_out;
957 rt->dst.input = ip6_pkt_prohibit;
958 break;
959 case RTN_THROW:
960 case RTN_UNREACHABLE:
961 default:
962 rt->dst.output = ip6_pkt_discard_out;
963 rt->dst.input = ip6_pkt_discard;
964 break;
965 }
966}
967
0d161581 968static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
6edb3c96 969{
0d161581
DA
970 struct fib6_info *ort = res->f6i;
971
93c2fb25 972 if (ort->fib6_flags & RTF_REJECT) {
6edb3c96
DA
973 ip6_rt_init_dst_reject(rt, ort);
974 return;
975 }
976
977 rt->dst.error = 0;
978 rt->dst.output = ip6_output;
979
d23c4b63 980 if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
6edb3c96 981 rt->dst.input = ip6_input;
93c2fb25 982 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
983 rt->dst.input = ip6_mc_input;
984 } else {
985 rt->dst.input = ip6_forward;
986 }
987
0d161581
DA
988 if (res->nh->fib_nh_lws) {
989 rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
6edb3c96
DA
990 lwtunnel_set_redirect(&rt->dst);
991 }
992
993 rt->dst.lastuse = jiffies;
994}
995
e873e4b9 996/* Caller must already hold reference to @from */
8d1c802b 997static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 998{
ae90d867 999 rt->rt6i_flags &= ~RTF_EXPIRES;
a68886a6 1000 rcu_assign_pointer(rt->from, from);
e1255ed4 1001 ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
ae90d867
DA
1002}
1003
0d161581
DA
1004/* Caller must already hold reference to f6i in result */
1005static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
ae90d867 1006{
0d161581
DA
1007 const struct fib6_nh *nh = res->nh;
1008 const struct net_device *dev = nh->fib_nh_dev;
1009 struct fib6_info *f6i = res->f6i;
dcd1f572 1010
0d161581 1011 ip6_rt_init_dst(rt, res);
6edb3c96 1012
0d161581 1013 rt->rt6i_dst = f6i->fib6_dst;
dcd1f572 1014 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
0d161581
DA
1015 rt->rt6i_flags = f6i->fib6_flags;
1016 if (nh->fib_nh_gw_family) {
1017 rt->rt6i_gateway = nh->fib_nh_gw6;
2b2450ca
DA
1018 rt->rt6i_flags |= RTF_GATEWAY;
1019 }
0d161581 1020 rt6_set_from(rt, f6i);
ae90d867 1021#ifdef CONFIG_IPV6_SUBTREES
0d161581 1022 rt->rt6i_src = f6i->fib6_src;
ae90d867 1023#endif
ae90d867
DA
1024}
1025
a3c00e46
MKL
1026static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1027 struct in6_addr *saddr)
1028{
66f5d6ce 1029 struct fib6_node *pn, *sn;
a3c00e46
MKL
1030 while (1) {
1031 if (fn->fn_flags & RTN_TL_ROOT)
1032 return NULL;
66f5d6ce
WW
1033 pn = rcu_dereference(fn->parent);
1034 sn = FIB6_SUBTREE(pn);
1035 if (sn && sn != fn)
6454743b 1036 fn = fib6_node_lookup(sn, NULL, saddr);
a3c00e46
MKL
1037 else
1038 fn = pn;
1039 if (fn->fn_flags & RTN_RTINFO)
1040 return fn;
1041 }
1042}
c71099ac 1043
10585b43 1044static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
d3843fe5
WW
1045{
1046 struct rt6_info *rt = *prt;
1047
1048 if (dst_hold_safe(&rt->dst))
1049 return true;
10585b43 1050 if (net) {
d3843fe5
WW
1051 rt = net->ipv6.ip6_null_entry;
1052 dst_hold(&rt->dst);
1053 } else {
1054 rt = NULL;
1055 }
1056 *prt = rt;
1057 return false;
1058}
1059
dec9b0e2 1060/* called with rcu_lock held */
9b6b35ab 1061static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
dec9b0e2 1062{
9b6b35ab
DA
1063 struct net_device *dev = res->nh->fib_nh_dev;
1064 struct fib6_info *f6i = res->f6i;
1065 unsigned short flags;
dec9b0e2
DA
1066 struct rt6_info *nrt;
1067
9b6b35ab 1068 if (!fib6_info_hold_safe(f6i))
1c87e79a 1069 goto fallback;
e873e4b9 1070
9b6b35ab 1071 flags = fib6_info_dst_flags(f6i);
93531c67 1072 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1c87e79a 1073 if (!nrt) {
9b6b35ab 1074 fib6_info_release(f6i);
1c87e79a
XL
1075 goto fallback;
1076 }
dec9b0e2 1077
0d161581 1078 ip6_rt_copy_init(nrt, res);
1c87e79a
XL
1079 return nrt;
1080
1081fallback:
1082 nrt = dev_net(dev)->ipv6.ip6_null_entry;
1083 dst_hold(&nrt->dst);
dec9b0e2
DA
1084 return nrt;
1085}
1086
8ed67789
DL
1087static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1088 struct fib6_table *table,
b75cc8f9
DA
1089 struct flowi6 *fl6,
1090 const struct sk_buff *skb,
1091 int flags)
1da177e4 1092{
b1d40991 1093 struct fib6_result res = {};
1da177e4 1094 struct fib6_node *fn;
23fb93a4 1095 struct rt6_info *rt;
1da177e4 1096
b6cdbc85
DA
1097 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1098 flags &= ~RT6_LOOKUP_F_IFACE;
1099
66f5d6ce 1100 rcu_read_lock();
6454743b 1101 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1102restart:
b1d40991
DA
1103 res.f6i = rcu_dereference(fn->leaf);
1104 if (!res.f6i)
1105 res.f6i = net->ipv6.fib6_null_entry;
af52a52c 1106 else
75ef7389
DA
1107 rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
1108 flags);
af52a52c 1109
b1d40991 1110 if (res.f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1111 fn = fib6_backtrack(fn, &fl6->saddr);
1112 if (fn)
1113 goto restart;
2b760fcf 1114
af52a52c
DA
1115 rt = net->ipv6.ip6_null_entry;
1116 dst_hold(&rt->dst);
1117 goto out;
1118 }
d3843fe5 1119
b1d40991
DA
1120 fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
1121 fl6->flowi6_oif != 0, skb, flags);
1122
2b760fcf 1123 /* Search through exception table */
7e4b5128 1124 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
23fb93a4 1125 if (rt) {
10585b43 1126 if (ip6_hold_safe(net, &rt))
dec9b0e2 1127 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1128 } else {
9b6b35ab 1129 rt = ip6_create_rt_rcu(&res);
dec9b0e2 1130 }
b811580d 1131
af52a52c 1132out:
8ff2e5b2 1133 trace_fib6_table_lookup(net, &res, table, fl6);
af52a52c 1134
66f5d6ce 1135 rcu_read_unlock();
b811580d 1136
c71099ac 1137 return rt;
c71099ac
TG
1138}
1139
67ba4152 1140struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1141 const struct sk_buff *skb, int flags)
ea6e574e 1142{
b75cc8f9 1143 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1144}
1145EXPORT_SYMBOL_GPL(ip6_route_lookup);
1146
9acd9f3a 1147struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1148 const struct in6_addr *saddr, int oif,
1149 const struct sk_buff *skb, int strict)
c71099ac 1150{
4c9483b2
DM
1151 struct flowi6 fl6 = {
1152 .flowi6_oif = oif,
1153 .daddr = *daddr,
c71099ac
TG
1154 };
1155 struct dst_entry *dst;
77d16f45 1156 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1157
adaa70bb 1158 if (saddr) {
4c9483b2 1159 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1160 flags |= RT6_LOOKUP_F_HAS_SADDR;
1161 }
1162
b75cc8f9 1163 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1164 if (dst->error == 0)
1165 return (struct rt6_info *) dst;
1166
1167 dst_release(dst);
1168
1da177e4
LT
1169 return NULL;
1170}
7159039a
YH
1171EXPORT_SYMBOL(rt6_lookup);
1172
c71099ac 1173/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1174 * It takes new route entry, the addition fails by any reason the
1175 * route is released.
1176 * Caller must hold dst before calling it.
1da177e4
LT
1177 */
1178
8d1c802b 1179static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1180 struct netlink_ext_ack *extack)
1da177e4
LT
1181{
1182 int err;
c71099ac 1183 struct fib6_table *table;
1da177e4 1184
93c2fb25 1185 table = rt->fib6_table;
66f5d6ce 1186 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1187 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1188 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1189
1190 return err;
1191}
1192
8d1c802b 1193int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1194{
afb1d4b5 1195 struct nl_info info = { .nl_net = net, };
e715b6d3 1196
d4ead6b3 1197 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1198}
1199
85bd05de 1200static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
8b9df265
MKL
1201 const struct in6_addr *daddr,
1202 const struct in6_addr *saddr)
1da177e4 1203{
85bd05de 1204 struct fib6_info *f6i = res->f6i;
4832c30d 1205 struct net_device *dev;
1da177e4
LT
1206 struct rt6_info *rt;
1207
1208 /*
1209 * Clone the route.
1210 */
1211
85bd05de 1212 if (!fib6_info_hold_safe(f6i))
e873e4b9
WW
1213 return NULL;
1214
0d161581 1215 dev = ip6_rt_get_dev_rcu(res);
93531c67 1216 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
e873e4b9 1217 if (!rt) {
85bd05de 1218 fib6_info_release(f6i);
83a09abd 1219 return NULL;
e873e4b9 1220 }
83a09abd 1221
0d161581 1222 ip6_rt_copy_init(rt, res);
83a09abd 1223 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1224 rt->dst.flags |= DST_HOST;
1225 rt->rt6i_dst.addr = *daddr;
1226 rt->rt6i_dst.plen = 128;
1da177e4 1227
85bd05de
DA
1228 if (!rt6_is_gw_or_nonexthop(res)) {
1229 if (f6i->fib6_dst.plen != 128 &&
1230 ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
83a09abd 1231 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1232#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1233 if (rt->rt6i_src.plen && saddr) {
1234 rt->rt6i_src.addr = *saddr;
1235 rt->rt6i_src.plen = 128;
8b9df265 1236 }
83a09abd 1237#endif
95a9a5ba 1238 }
1da177e4 1239
95a9a5ba
YH
1240 return rt;
1241}
1da177e4 1242
db3fedee 1243static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
d52d3997 1244{
db3fedee
DA
1245 struct fib6_info *f6i = res->f6i;
1246 unsigned short flags = fib6_info_dst_flags(f6i);
4832c30d 1247 struct net_device *dev;
d52d3997
MKL
1248 struct rt6_info *pcpu_rt;
1249
db3fedee 1250 if (!fib6_info_hold_safe(f6i))
e873e4b9
WW
1251 return NULL;
1252
4832c30d 1253 rcu_read_lock();
0d161581 1254 dev = ip6_rt_get_dev_rcu(res);
93531c67 1255 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1256 rcu_read_unlock();
e873e4b9 1257 if (!pcpu_rt) {
db3fedee 1258 fib6_info_release(f6i);
d52d3997 1259 return NULL;
e873e4b9 1260 }
0d161581 1261 ip6_rt_copy_init(pcpu_rt, res);
d52d3997
MKL
1262 pcpu_rt->rt6i_flags |= RTF_PCPU;
1263 return pcpu_rt;
1264}
1265
66f5d6ce 1266/* It should be called with rcu_read_lock() acquired */
db3fedee 1267static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
d52d3997 1268{
a73e4195 1269 struct rt6_info *pcpu_rt, **p;
d52d3997 1270
db3fedee 1271 p = this_cpu_ptr(res->f6i->rt6i_pcpu);
d52d3997
MKL
1272 pcpu_rt = *p;
1273
d4ead6b3 1274 if (pcpu_rt)
10585b43 1275 ip6_hold_safe(NULL, &pcpu_rt);
d3843fe5 1276
a73e4195
MKL
1277 return pcpu_rt;
1278}
1279
afb1d4b5 1280static struct rt6_info *rt6_make_pcpu_route(struct net *net,
db3fedee 1281 const struct fib6_result *res)
a73e4195
MKL
1282{
1283 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997 1284
db3fedee 1285 pcpu_rt = ip6_rt_pcpu_alloc(res);
d52d3997 1286 if (!pcpu_rt) {
9c7370a1
MKL
1287 dst_hold(&net->ipv6.ip6_null_entry->dst);
1288 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1289 }
1290
a94b9367 1291 dst_hold(&pcpu_rt->dst);
db3fedee 1292 p = this_cpu_ptr(res->f6i->rt6i_pcpu);
a94b9367 1293 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1294 BUG_ON(prev);
a94b9367 1295
d52d3997
MKL
1296 return pcpu_rt;
1297}
1298
35732d01
WW
1299/* exception hash table implementation
1300 */
1301static DEFINE_SPINLOCK(rt6_exception_lock);
1302
1303/* Remove rt6_ex from hash table and free the memory
1304 * Caller must hold rt6_exception_lock
1305 */
1306static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1307 struct rt6_exception *rt6_ex)
1308{
f5b51fe8 1309 struct fib6_info *from;
b2427e67 1310 struct net *net;
81eb8447 1311
35732d01
WW
1312 if (!bucket || !rt6_ex)
1313 return;
b2427e67
CIK
1314
1315 net = dev_net(rt6_ex->rt6i->dst.dev);
f5b51fe8
PA
1316 net->ipv6.rt6_stats->fib_rt_cache--;
1317
1318 /* purge completely the exception to allow releasing the held resources:
1319 * some [sk] cache may keep the dst around for unlimited time
1320 */
1321 from = rcu_dereference_protected(rt6_ex->rt6i->from,
1322 lockdep_is_held(&rt6_exception_lock));
1323 rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
1324 fib6_info_release(from);
1325 dst_dev_put(&rt6_ex->rt6i->dst);
1326
35732d01 1327 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1328 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1329 kfree_rcu(rt6_ex, rcu);
1330 WARN_ON_ONCE(!bucket->depth);
1331 bucket->depth--;
1332}
1333
1334/* Remove oldest rt6_ex in bucket and free the memory
1335 * Caller must hold rt6_exception_lock
1336 */
1337static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1338{
1339 struct rt6_exception *rt6_ex, *oldest = NULL;
1340
1341 if (!bucket)
1342 return;
1343
1344 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1345 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1346 oldest = rt6_ex;
1347 }
1348 rt6_remove_exception(bucket, oldest);
1349}
1350
1351static u32 rt6_exception_hash(const struct in6_addr *dst,
1352 const struct in6_addr *src)
1353{
1354 static u32 seed __read_mostly;
1355 u32 val;
1356
1357 net_get_random_once(&seed, sizeof(seed));
1358 val = jhash(dst, sizeof(*dst), seed);
1359
1360#ifdef CONFIG_IPV6_SUBTREES
1361 if (src)
1362 val = jhash(src, sizeof(*src), val);
1363#endif
1364 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1365}
1366
1367/* Helper function to find the cached rt in the hash table
1368 * and update bucket pointer to point to the bucket for this
1369 * (daddr, saddr) pair
1370 * Caller must hold rt6_exception_lock
1371 */
1372static struct rt6_exception *
1373__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1374 const struct in6_addr *daddr,
1375 const struct in6_addr *saddr)
1376{
1377 struct rt6_exception *rt6_ex;
1378 u32 hval;
1379
1380 if (!(*bucket) || !daddr)
1381 return NULL;
1382
1383 hval = rt6_exception_hash(daddr, saddr);
1384 *bucket += hval;
1385
1386 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1387 struct rt6_info *rt6 = rt6_ex->rt6i;
1388 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1389
1390#ifdef CONFIG_IPV6_SUBTREES
1391 if (matched && saddr)
1392 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1393#endif
1394 if (matched)
1395 return rt6_ex;
1396 }
1397 return NULL;
1398}
1399
1400/* Helper function to find the cached rt in the hash table
1401 * and update bucket pointer to point to the bucket for this
1402 * (daddr, saddr) pair
1403 * Caller must hold rcu_read_lock()
1404 */
1405static struct rt6_exception *
1406__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1407 const struct in6_addr *daddr,
1408 const struct in6_addr *saddr)
1409{
1410 struct rt6_exception *rt6_ex;
1411 u32 hval;
1412
1413 WARN_ON_ONCE(!rcu_read_lock_held());
1414
1415 if (!(*bucket) || !daddr)
1416 return NULL;
1417
1418 hval = rt6_exception_hash(daddr, saddr);
1419 *bucket += hval;
1420
1421 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1422 struct rt6_info *rt6 = rt6_ex->rt6i;
1423 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1424
1425#ifdef CONFIG_IPV6_SUBTREES
1426 if (matched && saddr)
1427 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1428#endif
1429 if (matched)
1430 return rt6_ex;
1431 }
1432 return NULL;
1433}
1434
b748f260 1435static unsigned int fib6_mtu(const struct fib6_result *res)
d4ead6b3 1436{
b748f260 1437 const struct fib6_nh *nh = res->nh;
d4ead6b3
DA
1438 unsigned int mtu;
1439
b748f260
DA
1440 if (res->f6i->fib6_pmtu) {
1441 mtu = res->f6i->fib6_pmtu;
dcd1f572 1442 } else {
b748f260 1443 struct net_device *dev = nh->fib_nh_dev;
dcd1f572
DA
1444 struct inet6_dev *idev;
1445
1446 rcu_read_lock();
1447 idev = __in6_dev_get(dev);
1448 mtu = idev->cnf.mtu6;
1449 rcu_read_unlock();
1450 }
1451
d4ead6b3
DA
1452 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1453
b748f260 1454 return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
d4ead6b3
DA
1455}
1456
35732d01 1457static int rt6_insert_exception(struct rt6_info *nrt,
5012f0a5 1458 const struct fib6_result *res)
35732d01 1459{
5e670d84 1460 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1461 struct rt6_exception_bucket *bucket;
1462 struct in6_addr *src_key = NULL;
1463 struct rt6_exception *rt6_ex;
5012f0a5 1464 struct fib6_info *f6i = res->f6i;
35732d01
WW
1465 int err = 0;
1466
35732d01
WW
1467 spin_lock_bh(&rt6_exception_lock);
1468
5012f0a5 1469 if (f6i->exception_bucket_flushed) {
35732d01
WW
1470 err = -EINVAL;
1471 goto out;
1472 }
1473
5012f0a5 1474 bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket,
35732d01
WW
1475 lockdep_is_held(&rt6_exception_lock));
1476 if (!bucket) {
1477 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1478 GFP_ATOMIC);
1479 if (!bucket) {
1480 err = -ENOMEM;
1481 goto out;
1482 }
5012f0a5 1483 rcu_assign_pointer(f6i->rt6i_exception_bucket, bucket);
35732d01
WW
1484 }
1485
1486#ifdef CONFIG_IPV6_SUBTREES
5012f0a5 1487 /* fib6_src.plen != 0 indicates f6i is in subtree
35732d01 1488 * and exception table is indexed by a hash of
5012f0a5 1489 * both fib6_dst and fib6_src.
35732d01 1490 * Otherwise, the exception table is indexed by
5012f0a5 1491 * a hash of only fib6_dst.
35732d01 1492 */
5012f0a5 1493 if (f6i->fib6_src.plen)
35732d01
WW
1494 src_key = &nrt->rt6i_src.addr;
1495#endif
5012f0a5 1496 /* rt6_mtu_change() might lower mtu on f6i.
f5bbe7ee 1497 * Only insert this exception route if its mtu
5012f0a5 1498 * is less than f6i's mtu value.
f5bbe7ee 1499 */
b748f260 1500 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
f5bbe7ee
WW
1501 err = -EINVAL;
1502 goto out;
1503 }
60006a48 1504
35732d01
WW
1505 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1506 src_key);
1507 if (rt6_ex)
1508 rt6_remove_exception(bucket, rt6_ex);
1509
1510 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1511 if (!rt6_ex) {
1512 err = -ENOMEM;
1513 goto out;
1514 }
1515 rt6_ex->rt6i = nrt;
1516 rt6_ex->stamp = jiffies;
35732d01
WW
1517 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1518 bucket->depth++;
81eb8447 1519 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1520
1521 if (bucket->depth > FIB6_MAX_DEPTH)
1522 rt6_exception_remove_oldest(bucket);
1523
1524out:
1525 spin_unlock_bh(&rt6_exception_lock);
1526
1527 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1528 if (!err) {
5012f0a5
DA
1529 spin_lock_bh(&f6i->fib6_table->tb6_lock);
1530 fib6_update_sernum(net, f6i);
1531 spin_unlock_bh(&f6i->fib6_table->tb6_lock);
b886d5f2
PA
1532 fib6_force_start_gc(net);
1533 }
35732d01
WW
1534
1535 return err;
1536}
1537
8d1c802b 1538void rt6_flush_exceptions(struct fib6_info *rt)
35732d01
WW
1539{
1540 struct rt6_exception_bucket *bucket;
1541 struct rt6_exception *rt6_ex;
1542 struct hlist_node *tmp;
1543 int i;
1544
1545 spin_lock_bh(&rt6_exception_lock);
1546 /* Prevent rt6_insert_exception() to recreate the bucket list */
1547 rt->exception_bucket_flushed = 1;
1548
1549 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1550 lockdep_is_held(&rt6_exception_lock));
1551 if (!bucket)
1552 goto out;
1553
1554 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1555 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1556 rt6_remove_exception(bucket, rt6_ex);
1557 WARN_ON_ONCE(bucket->depth);
1558 bucket++;
1559 }
1560
1561out:
1562 spin_unlock_bh(&rt6_exception_lock);
1563}
1564
1565/* Find cached rt in the hash table inside passed in rt
1566 * Caller has to hold rcu_read_lock()
1567 */
7e4b5128 1568static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
35732d01
WW
1569 struct in6_addr *daddr,
1570 struct in6_addr *saddr)
1571{
1572 struct rt6_exception_bucket *bucket;
1573 struct in6_addr *src_key = NULL;
1574 struct rt6_exception *rt6_ex;
7e4b5128 1575 struct rt6_info *ret = NULL;
35732d01 1576
7e4b5128 1577 bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
35732d01
WW
1578
1579#ifdef CONFIG_IPV6_SUBTREES
7e4b5128 1580 /* fib6i_src.plen != 0 indicates f6i is in subtree
35732d01 1581 * and exception table is indexed by a hash of
7e4b5128 1582 * both fib6_dst and fib6_src.
35732d01 1583 * Otherwise, the exception table is indexed by
7e4b5128 1584 * a hash of only fib6_dst.
35732d01 1585 */
7e4b5128 1586 if (res->f6i->fib6_src.plen)
35732d01
WW
1587 src_key = saddr;
1588#endif
1589 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1590
1591 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
7e4b5128 1592 ret = rt6_ex->rt6i;
35732d01 1593
7e4b5128 1594 return ret;
35732d01
WW
1595}
1596
1597/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1598static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1599{
35732d01
WW
1600 struct rt6_exception_bucket *bucket;
1601 struct in6_addr *src_key = NULL;
1602 struct rt6_exception *rt6_ex;
8a14e46f 1603 struct fib6_info *from;
35732d01
WW
1604 int err;
1605
091311de 1606 from = rcu_dereference(rt->from);
35732d01 1607 if (!from ||
442d713b 1608 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1609 return -EINVAL;
1610
1611 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1612 return -ENOENT;
1613
1614 spin_lock_bh(&rt6_exception_lock);
1615 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1616 lockdep_is_held(&rt6_exception_lock));
1617#ifdef CONFIG_IPV6_SUBTREES
1618 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1619 * and exception table is indexed by a hash of
1620 * both rt6i_dst and rt6i_src.
1621 * Otherwise, the exception table is indexed by
1622 * a hash of only rt6i_dst.
1623 */
93c2fb25 1624 if (from->fib6_src.plen)
35732d01
WW
1625 src_key = &rt->rt6i_src.addr;
1626#endif
1627 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1628 &rt->rt6i_dst.addr,
1629 src_key);
1630 if (rt6_ex) {
1631 rt6_remove_exception(bucket, rt6_ex);
1632 err = 0;
1633 } else {
1634 err = -ENOENT;
1635 }
1636
1637 spin_unlock_bh(&rt6_exception_lock);
1638 return err;
1639}
1640
1641/* Find rt6_ex which contains the passed in rt cache and
1642 * refresh its stamp
1643 */
1644static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1645{
35732d01
WW
1646 struct rt6_exception_bucket *bucket;
1647 struct in6_addr *src_key = NULL;
1648 struct rt6_exception *rt6_ex;
193f3685 1649 struct fib6_info *from;
35732d01
WW
1650
1651 rcu_read_lock();
193f3685
PA
1652 from = rcu_dereference(rt->from);
1653 if (!from || !(rt->rt6i_flags & RTF_CACHE))
1654 goto unlock;
1655
35732d01
WW
1656 bucket = rcu_dereference(from->rt6i_exception_bucket);
1657
1658#ifdef CONFIG_IPV6_SUBTREES
1659 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1660 * and exception table is indexed by a hash of
1661 * both rt6i_dst and rt6i_src.
1662 * Otherwise, the exception table is indexed by
1663 * a hash of only rt6i_dst.
1664 */
93c2fb25 1665 if (from->fib6_src.plen)
35732d01
WW
1666 src_key = &rt->rt6i_src.addr;
1667#endif
1668 rt6_ex = __rt6_find_exception_rcu(&bucket,
1669 &rt->rt6i_dst.addr,
1670 src_key);
1671 if (rt6_ex)
1672 rt6_ex->stamp = jiffies;
1673
193f3685 1674unlock:
35732d01
WW
1675 rcu_read_unlock();
1676}
1677
e9fa1495
SB
1678static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1679 struct rt6_info *rt, int mtu)
1680{
1681 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1682 * lowest MTU in the path: always allow updating the route PMTU to
1683 * reflect PMTU decreases.
1684 *
1685 * If the new MTU is higher, and the route PMTU is equal to the local
1686 * MTU, this means the old MTU is the lowest in the path, so allow
1687 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1688 * handle this.
1689 */
1690
1691 if (dst_mtu(&rt->dst) >= mtu)
1692 return true;
1693
1694 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1695 return true;
1696
1697 return false;
1698}
1699
1700static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
8d1c802b 1701 struct fib6_info *rt, int mtu)
f5bbe7ee
WW
1702{
1703 struct rt6_exception_bucket *bucket;
1704 struct rt6_exception *rt6_ex;
1705 int i;
1706
1707 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1708 lockdep_is_held(&rt6_exception_lock));
1709
e9fa1495
SB
1710 if (!bucket)
1711 return;
1712
1713 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1714 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1715 struct rt6_info *entry = rt6_ex->rt6i;
1716
1717 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1718 * route), the metrics of its rt->from have already
e9fa1495
SB
1719 * been updated.
1720 */
d4ead6b3 1721 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1722 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1723 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1724 }
e9fa1495 1725 bucket++;
f5bbe7ee
WW
1726 }
1727}
1728
b16cb459
WW
1729#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1730
8d1c802b 1731static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
b16cb459
WW
1732 struct in6_addr *gateway)
1733{
1734 struct rt6_exception_bucket *bucket;
1735 struct rt6_exception *rt6_ex;
1736 struct hlist_node *tmp;
1737 int i;
1738
1739 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1740 return;
1741
1742 spin_lock_bh(&rt6_exception_lock);
1743 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1744 lockdep_is_held(&rt6_exception_lock));
1745
1746 if (bucket) {
1747 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1748 hlist_for_each_entry_safe(rt6_ex, tmp,
1749 &bucket->chain, hlist) {
1750 struct rt6_info *entry = rt6_ex->rt6i;
1751
1752 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1753 RTF_CACHE_GATEWAY &&
1754 ipv6_addr_equal(gateway,
1755 &entry->rt6i_gateway)) {
1756 rt6_remove_exception(bucket, rt6_ex);
1757 }
1758 }
1759 bucket++;
1760 }
1761 }
1762
1763 spin_unlock_bh(&rt6_exception_lock);
1764}
1765
c757faa8
WW
1766static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1767 struct rt6_exception *rt6_ex,
1768 struct fib6_gc_args *gc_args,
1769 unsigned long now)
1770{
1771 struct rt6_info *rt = rt6_ex->rt6i;
1772
1859bac0
PA
1773 /* we are pruning and obsoleting aged-out and non gateway exceptions
1774 * even if others have still references to them, so that on next
1775 * dst_check() such references can be dropped.
1776 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1777 * expired, independently from their aging, as per RFC 8201 section 4
1778 */
31afeb42
WW
1779 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1780 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1781 RT6_TRACE("aging clone %p\n", rt);
1782 rt6_remove_exception(bucket, rt6_ex);
1783 return;
1784 }
1785 } else if (time_after(jiffies, rt->dst.expires)) {
1786 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1787 rt6_remove_exception(bucket, rt6_ex);
1788 return;
31afeb42
WW
1789 }
1790
1791 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1792 struct neighbour *neigh;
1793 __u8 neigh_flags = 0;
1794
1bfa26ff
ED
1795 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1796 if (neigh)
c757faa8 1797 neigh_flags = neigh->flags;
1bfa26ff 1798
c757faa8
WW
1799 if (!(neigh_flags & NTF_ROUTER)) {
1800 RT6_TRACE("purging route %p via non-router but gateway\n",
1801 rt);
1802 rt6_remove_exception(bucket, rt6_ex);
1803 return;
1804 }
1805 }
31afeb42 1806
c757faa8
WW
1807 gc_args->more++;
1808}
1809
8d1c802b 1810void rt6_age_exceptions(struct fib6_info *rt,
c757faa8
WW
1811 struct fib6_gc_args *gc_args,
1812 unsigned long now)
1813{
1814 struct rt6_exception_bucket *bucket;
1815 struct rt6_exception *rt6_ex;
1816 struct hlist_node *tmp;
1817 int i;
1818
1819 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1820 return;
1821
1bfa26ff
ED
1822 rcu_read_lock_bh();
1823 spin_lock(&rt6_exception_lock);
c757faa8
WW
1824 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1825 lockdep_is_held(&rt6_exception_lock));
1826
1827 if (bucket) {
1828 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1829 hlist_for_each_entry_safe(rt6_ex, tmp,
1830 &bucket->chain, hlist) {
1831 rt6_age_examine_exception(bucket, rt6_ex,
1832 gc_args, now);
1833 }
1834 bucket++;
1835 }
1836 }
1bfa26ff
ED
1837 spin_unlock(&rt6_exception_lock);
1838 rcu_read_unlock_bh();
c757faa8
WW
1839}
1840
1d053da9 1841/* must be called with rcu lock held */
effda4dd
DA
1842int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
1843 struct flowi6 *fl6, struct fib6_result *res, int strict)
1da177e4 1844{
367efcb9 1845 struct fib6_node *fn, *saved_fn;
1da177e4 1846
6454743b 1847 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1848 saved_fn = fn;
1da177e4 1849
ca254490
DA
1850 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1851 oif = 0;
1852
a3c00e46 1853redo_rt6_select:
effda4dd
DA
1854 rt6_select(net, fn, oif, res, strict);
1855 if (res->f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1856 fn = fib6_backtrack(fn, &fl6->saddr);
1857 if (fn)
1858 goto redo_rt6_select;
367efcb9
MKL
1859 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1860 /* also consider unreachable route */
1861 strict &= ~RT6_LOOKUP_F_REACHABLE;
1862 fn = saved_fn;
1863 goto redo_rt6_select;
367efcb9 1864 }
a3c00e46
MKL
1865 }
1866
effda4dd 1867 trace_fib6_table_lookup(net, res, table, fl6);
fb9de91e 1868
effda4dd 1869 return 0;
1d053da9
DA
1870}
1871
1872struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1873 int oif, struct flowi6 *fl6,
1874 const struct sk_buff *skb, int flags)
1875{
b1d40991 1876 struct fib6_result res = {};
1d053da9
DA
1877 struct rt6_info *rt;
1878 int strict = 0;
1879
1880 strict |= flags & RT6_LOOKUP_F_IFACE;
1881 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1882 if (net->ipv6.devconf_all->forwarding == 0)
1883 strict |= RT6_LOOKUP_F_REACHABLE;
1884
1885 rcu_read_lock();
1886
effda4dd 1887 fib6_table_lookup(net, table, oif, fl6, &res, strict);
b1d40991 1888 if (res.f6i == net->ipv6.fib6_null_entry) {
421842ed 1889 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1890 rcu_read_unlock();
d3843fe5 1891 dst_hold(&rt->dst);
d3843fe5 1892 return rt;
23fb93a4
DA
1893 }
1894
b1d40991 1895 fib6_select_path(net, &res, fl6, oif, false, skb, strict);
d83009d4 1896
23fb93a4 1897 /*Search through exception table */
7e4b5128 1898 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
23fb93a4 1899 if (rt) {
10585b43 1900 if (ip6_hold_safe(net, &rt))
d3843fe5 1901 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1902
66f5d6ce 1903 rcu_read_unlock();
d52d3997 1904 return rt;
3da59bd9 1905 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
b1d40991 1906 !res.nh->fib_nh_gw_family)) {
3da59bd9
MKL
1907 /* Create a RTF_CACHE clone which will not be
1908 * owned by the fib6 tree. It is for the special case where
1909 * the daddr in the skb during the neighbor look-up is different
1910 * from the fl6->daddr used to look-up route here.
1911 */
3da59bd9
MKL
1912 struct rt6_info *uncached_rt;
1913
85bd05de 1914 uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
d52d3997 1915
4d85cd0c 1916 rcu_read_unlock();
c71099ac 1917
1cfb71ee
WW
1918 if (uncached_rt) {
1919 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1920 * No need for another dst_hold()
1921 */
8d0b94af 1922 rt6_uncached_list_add(uncached_rt);
81eb8447 1923 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1924 } else {
3da59bd9 1925 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1926 dst_hold(&uncached_rt->dst);
1927 }
b811580d 1928
3da59bd9 1929 return uncached_rt;
d52d3997
MKL
1930 } else {
1931 /* Get a percpu copy */
1932
1933 struct rt6_info *pcpu_rt;
1934
951f788a 1935 local_bh_disable();
db3fedee 1936 pcpu_rt = rt6_get_pcpu_route(&res);
d52d3997 1937
93531c67 1938 if (!pcpu_rt)
db3fedee 1939 pcpu_rt = rt6_make_pcpu_route(net, &res);
93531c67 1940
951f788a
ED
1941 local_bh_enable();
1942 rcu_read_unlock();
d4bea421 1943
d52d3997
MKL
1944 return pcpu_rt;
1945 }
1da177e4 1946}
9ff74384 1947EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1948
b75cc8f9
DA
1949static struct rt6_info *ip6_pol_route_input(struct net *net,
1950 struct fib6_table *table,
1951 struct flowi6 *fl6,
1952 const struct sk_buff *skb,
1953 int flags)
4acad72d 1954{
b75cc8f9 1955 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1956}
1957
d409b847
MB
1958struct dst_entry *ip6_route_input_lookup(struct net *net,
1959 struct net_device *dev,
b75cc8f9
DA
1960 struct flowi6 *fl6,
1961 const struct sk_buff *skb,
1962 int flags)
72331bc0
SL
1963{
1964 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1965 flags |= RT6_LOOKUP_F_IFACE;
1966
b75cc8f9 1967 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1968}
d409b847 1969EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1970
23aebdac 1971static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1972 struct flow_keys *keys,
1973 struct flow_keys *flkeys)
23aebdac
JS
1974{
1975 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1976 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1977 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1978 const struct ipv6hdr *inner_iph;
1979 const struct icmp6hdr *icmph;
1980 struct ipv6hdr _inner_iph;
cea67a2d 1981 struct icmp6hdr _icmph;
23aebdac
JS
1982
1983 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1984 goto out;
1985
cea67a2d
ED
1986 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1987 sizeof(_icmph), &_icmph);
1988 if (!icmph)
1989 goto out;
1990
23aebdac
JS
1991 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1992 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1993 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1994 icmph->icmp6_type != ICMPV6_PARAMPROB)
1995 goto out;
1996
1997 inner_iph = skb_header_pointer(skb,
1998 skb_transport_offset(skb) + sizeof(*icmph),
1999 sizeof(_inner_iph), &_inner_iph);
2000 if (!inner_iph)
2001 goto out;
2002
2003 key_iph = inner_iph;
5e5d6fed 2004 _flkeys = NULL;
23aebdac 2005out:
5e5d6fed
RP
2006 if (_flkeys) {
2007 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2008 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2009 keys->tags.flow_label = _flkeys->tags.flow_label;
2010 keys->basic.ip_proto = _flkeys->basic.ip_proto;
2011 } else {
2012 keys->addrs.v6addrs.src = key_iph->saddr;
2013 keys->addrs.v6addrs.dst = key_iph->daddr;
fa1be7e0 2014 keys->tags.flow_label = ip6_flowlabel(key_iph);
5e5d6fed
RP
2015 keys->basic.ip_proto = key_iph->nexthdr;
2016 }
23aebdac
JS
2017}
2018
2019/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
2020u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2021 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
2022{
2023 struct flow_keys hash_keys;
9a2a537a 2024 u32 mhash;
23aebdac 2025
bbfa047a 2026 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
2027 case 0:
2028 memset(&hash_keys, 0, sizeof(hash_keys));
2029 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2030 if (skb) {
2031 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2032 } else {
2033 hash_keys.addrs.v6addrs.src = fl6->saddr;
2034 hash_keys.addrs.v6addrs.dst = fl6->daddr;
fa1be7e0 2035 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
b4bac172
DA
2036 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2037 }
2038 break;
2039 case 1:
2040 if (skb) {
2041 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2042 struct flow_keys keys;
2043
2044 /* short-circuit if we already have L4 hash present */
2045 if (skb->l4_hash)
2046 return skb_get_hash_raw(skb) >> 1;
2047
2048 memset(&hash_keys, 0, sizeof(hash_keys));
2049
2050 if (!flkeys) {
2051 skb_flow_dissect_flow_keys(skb, &keys, flag);
2052 flkeys = &keys;
2053 }
2054 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2055 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2056 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2057 hash_keys.ports.src = flkeys->ports.src;
2058 hash_keys.ports.dst = flkeys->ports.dst;
2059 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2060 } else {
2061 memset(&hash_keys, 0, sizeof(hash_keys));
2062 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2063 hash_keys.addrs.v6addrs.src = fl6->saddr;
2064 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2065 hash_keys.ports.src = fl6->fl6_sport;
2066 hash_keys.ports.dst = fl6->fl6_dport;
2067 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2068 }
2069 break;
23aebdac 2070 }
9a2a537a 2071 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2072
9a2a537a 2073 return mhash >> 1;
23aebdac
JS
2074}
2075
c71099ac
TG
2076void ip6_route_input(struct sk_buff *skb)
2077{
b71d1d42 2078 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2079 struct net *net = dev_net(skb->dev);
adaa70bb 2080 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2081 struct ip_tunnel_info *tun_info;
4c9483b2 2082 struct flowi6 fl6 = {
e0d56fdd 2083 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2084 .daddr = iph->daddr,
2085 .saddr = iph->saddr,
6502ca52 2086 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2087 .flowi6_mark = skb->mark,
2088 .flowi6_proto = iph->nexthdr,
c71099ac 2089 };
5e5d6fed 2090 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2091
904af04d 2092 tun_info = skb_tunnel_info(skb);
46fa062a 2093 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2094 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2095
2096 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2097 flkeys = &_flkeys;
2098
23aebdac 2099 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2100 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2101 skb_dst_drop(skb);
b75cc8f9
DA
2102 skb_dst_set(skb,
2103 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2104}
2105
b75cc8f9
DA
2106static struct rt6_info *ip6_pol_route_output(struct net *net,
2107 struct fib6_table *table,
2108 struct flowi6 *fl6,
2109 const struct sk_buff *skb,
2110 int flags)
1da177e4 2111{
b75cc8f9 2112 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2113}
2114
6f21c96a
PA
2115struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2116 struct flowi6 *fl6, int flags)
c71099ac 2117{
d46a9d67 2118 bool any_src;
c71099ac 2119
3ede0bbc
RS
2120 if (ipv6_addr_type(&fl6->daddr) &
2121 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
4c1feac5
DA
2122 struct dst_entry *dst;
2123
2124 dst = l3mdev_link_scope_lookup(net, fl6);
2125 if (dst)
2126 return dst;
2127 }
ca254490 2128
1fb9489b 2129 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2130
d46a9d67 2131 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2132 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2133 (fl6->flowi6_oif && any_src))
77d16f45 2134 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2135
d46a9d67 2136 if (!any_src)
adaa70bb 2137 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2138 else if (sk)
2139 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2140
b75cc8f9 2141 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2142}
6f21c96a 2143EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2144
2774c131 2145struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2146{
5c1e6aa3 2147 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2148 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2149 struct dst_entry *new = NULL;
2150
1dbe3252 2151 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2152 DST_OBSOLETE_DEAD, 0);
14e50e57 2153 if (rt) {
0a1f5962 2154 rt6_info_init(rt);
81eb8447 2155 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2156
0a1f5962 2157 new = &rt->dst;
14e50e57 2158 new->__use = 1;
352e512c 2159 new->input = dst_discard;
ede2059d 2160 new->output = dst_discard_out;
14e50e57 2161
0a1f5962 2162 dst_copy_metrics(new, &ort->dst);
14e50e57 2163
1dbe3252 2164 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2165 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2166 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2167
2168 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2169#ifdef CONFIG_IPV6_SUBTREES
2170 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2171#endif
14e50e57
DM
2172 }
2173
69ead7af
DM
2174 dst_release(dst_orig);
2175 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2176}
14e50e57 2177
1da177e4
LT
2178/*
2179 * Destination cache support functions
2180 */
2181
8d1c802b 2182static bool fib6_check(struct fib6_info *f6i, u32 cookie)
4b32b5ad 2183{
93531c67
DA
2184 u32 rt_cookie = 0;
2185
8ae86971 2186 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
93531c67
DA
2187 return false;
2188
2189 if (fib6_check_expired(f6i))
2190 return false;
2191
2192 return true;
4b32b5ad
MKL
2193}
2194
a68886a6
DA
2195static struct dst_entry *rt6_check(struct rt6_info *rt,
2196 struct fib6_info *from,
2197 u32 cookie)
3da59bd9 2198{
36143645 2199 u32 rt_cookie = 0;
c5cff856 2200
a68886a6 2201 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
93531c67 2202 rt_cookie != cookie)
3da59bd9
MKL
2203 return NULL;
2204
2205 if (rt6_check_expired(rt))
2206 return NULL;
2207
2208 return &rt->dst;
2209}
2210
a68886a6
DA
2211static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2212 struct fib6_info *from,
2213 u32 cookie)
3da59bd9 2214{
5973fb1e
MKL
2215 if (!__rt6_check_expired(rt) &&
2216 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2217 fib6_check(from, cookie))
3da59bd9
MKL
2218 return &rt->dst;
2219 else
2220 return NULL;
2221}
2222
1da177e4
LT
2223static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2224{
a87b7dc9 2225 struct dst_entry *dst_ret;
a68886a6 2226 struct fib6_info *from;
1da177e4
LT
2227 struct rt6_info *rt;
2228
a87b7dc9
DA
2229 rt = container_of(dst, struct rt6_info, dst);
2230
2231 rcu_read_lock();
1da177e4 2232
6f3118b5
ND
2233 /* All IPV6 dsts are created with ->obsolete set to the value
2234 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2235 * into this function always.
2236 */
e3bc10bd 2237
a68886a6 2238 from = rcu_dereference(rt->from);
4b32b5ad 2239
a68886a6
DA
2240 if (from && (rt->rt6i_flags & RTF_PCPU ||
2241 unlikely(!list_empty(&rt->rt6i_uncached))))
2242 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2243 else
a68886a6 2244 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2245
2246 rcu_read_unlock();
2247
2248 return dst_ret;
1da177e4
LT
2249}
2250
2251static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2252{
2253 struct rt6_info *rt = (struct rt6_info *) dst;
2254
2255 if (rt) {
54c1a859 2256 if (rt->rt6i_flags & RTF_CACHE) {
c3c14da0 2257 rcu_read_lock();
54c1a859 2258 if (rt6_check_expired(rt)) {
93531c67 2259 rt6_remove_exception_rt(rt);
54c1a859
YH
2260 dst = NULL;
2261 }
c3c14da0 2262 rcu_read_unlock();
54c1a859 2263 } else {
1da177e4 2264 dst_release(dst);
54c1a859
YH
2265 dst = NULL;
2266 }
1da177e4 2267 }
54c1a859 2268 return dst;
1da177e4
LT
2269}
2270
2271static void ip6_link_failure(struct sk_buff *skb)
2272{
2273 struct rt6_info *rt;
2274
3ffe533c 2275 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2276
adf30907 2277 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2278 if (rt) {
8a14e46f 2279 rcu_read_lock();
1eb4f758 2280 if (rt->rt6i_flags & RTF_CACHE) {
761f6026 2281 rt6_remove_exception_rt(rt);
c5cff856 2282 } else {
a68886a6 2283 struct fib6_info *from;
c5cff856
WW
2284 struct fib6_node *fn;
2285
a68886a6
DA
2286 from = rcu_dereference(rt->from);
2287 if (from) {
2288 fn = rcu_dereference(from->fib6_node);
2289 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2290 fn->fn_sernum = -1;
2291 }
1eb4f758 2292 }
8a14e46f 2293 rcu_read_unlock();
1da177e4
LT
2294 }
2295}
2296
6a3e030f
DA
2297static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2298{
a68886a6
DA
2299 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2300 struct fib6_info *from;
2301
2302 rcu_read_lock();
2303 from = rcu_dereference(rt0->from);
2304 if (from)
2305 rt0->dst.expires = from->expires;
2306 rcu_read_unlock();
2307 }
6a3e030f
DA
2308
2309 dst_set_expires(&rt0->dst, timeout);
2310 rt0->rt6i_flags |= RTF_EXPIRES;
2311}
2312
45e4fd26
MKL
2313static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2314{
2315 struct net *net = dev_net(rt->dst.dev);
2316
d4ead6b3 2317 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2318 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2319 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2320}
2321
0d3f6d29
MKL
2322static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2323{
2324 return !(rt->rt6i_flags & RTF_CACHE) &&
1490ed2a 2325 (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
0d3f6d29
MKL
2326}
2327
45e4fd26
MKL
2328static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2329 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2330{
0dec879f 2331 const struct in6_addr *daddr, *saddr;
67ba4152 2332 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2333
19bda36c
XL
2334 if (dst_metric_locked(dst, RTAX_MTU))
2335 return;
2336
0dec879f
JA
2337 if (iph) {
2338 daddr = &iph->daddr;
2339 saddr = &iph->saddr;
2340 } else if (sk) {
2341 daddr = &sk->sk_v6_daddr;
2342 saddr = &inet6_sk(sk)->saddr;
2343 } else {
2344 daddr = NULL;
2345 saddr = NULL;
2346 }
2347 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2348 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2349 if (mtu >= dst_mtu(dst))
2350 return;
9d289715 2351
0d3f6d29 2352 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2353 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2354 /* update rt6_ex->stamp for cache */
2355 if (rt6->rt6i_flags & RTF_CACHE)
2356 rt6_update_exception_stamp_rt(rt6);
0dec879f 2357 } else if (daddr) {
85bd05de 2358 struct fib6_result res = {};
45e4fd26
MKL
2359 struct rt6_info *nrt6;
2360
4d85cd0c 2361 rcu_read_lock();
85bd05de
DA
2362 res.f6i = rcu_dereference(rt6->from);
2363 if (!res.f6i) {
9c69a132
JL
2364 rcu_read_unlock();
2365 return;
2366 }
85bd05de
DA
2367 res.nh = &res.f6i->fib6_nh;
2368 nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
45e4fd26
MKL
2369 if (nrt6) {
2370 rt6_do_update_pmtu(nrt6, mtu);
5012f0a5 2371 if (rt6_insert_exception(nrt6, &res))
2b760fcf 2372 dst_release_immediate(&nrt6->dst);
45e4fd26 2373 }
a68886a6 2374 rcu_read_unlock();
1da177e4
LT
2375 }
2376}
2377
45e4fd26
MKL
2378static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2379 struct sk_buff *skb, u32 mtu)
2380{
2381 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2382}
2383
42ae66c8 2384void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2385 int oif, u32 mark, kuid_t uid)
81aded24
DM
2386{
2387 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2388 struct dst_entry *dst;
dc92095d
2389 struct flowi6 fl6 = {
2390 .flowi6_oif = oif,
2391 .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2392 .daddr = iph->daddr,
2393 .saddr = iph->saddr,
2394 .flowlabel = ip6_flowinfo(iph),
2395 .flowi6_uid = uid,
2396 };
81aded24
DM
2397
2398 dst = ip6_route_output(net, NULL, &fl6);
2399 if (!dst->error)
45e4fd26 2400 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2401 dst_release(dst);
2402}
2403EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2404
2405void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2406{
7ddacfa5 2407 int oif = sk->sk_bound_dev_if;
33c162a9
MKL
2408 struct dst_entry *dst;
2409
7ddacfa5
DA
2410 if (!oif && skb->dev)
2411 oif = l3mdev_master_ifindex(skb->dev);
2412
2413 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2414
2415 dst = __sk_dst_get(sk);
2416 if (!dst || !dst->obsolete ||
2417 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2418 return;
2419
2420 bh_lock_sock(sk);
2421 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2422 ip6_datagram_dst_update(sk, false);
2423 bh_unlock_sock(sk);
81aded24
DM
2424}
2425EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2426
7d6850f7
AK
2427void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2428 const struct flowi6 *fl6)
2429{
2430#ifdef CONFIG_IPV6_SUBTREES
2431 struct ipv6_pinfo *np = inet6_sk(sk);
2432#endif
2433
2434 ip6_dst_store(sk, dst,
2435 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2436 &sk->sk_v6_daddr : NULL,
2437#ifdef CONFIG_IPV6_SUBTREES
2438 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2439 &np->saddr :
2440#endif
2441 NULL);
2442}
2443
9b6b35ab 2444static bool ip6_redirect_nh_match(const struct fib6_result *res,
0b34eb00
DA
2445 struct flowi6 *fl6,
2446 const struct in6_addr *gw,
2447 struct rt6_info **ret)
2448{
9b6b35ab
DA
2449 const struct fib6_nh *nh = res->nh;
2450
0b34eb00
DA
2451 if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
2452 fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
2453 return false;
2454
2455 /* rt_cache's gateway might be different from its 'parent'
2456 * in the case of an ip redirect.
2457 * So we keep searching in the exception table if the gateway
2458 * is different.
2459 */
2460 if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
2461 struct rt6_info *rt_cache;
2462
9b6b35ab 2463 rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
0b34eb00
DA
2464 if (rt_cache &&
2465 ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
2466 *ret = rt_cache;
2467 return true;
2468 }
2469 return false;
2470 }
2471 return true;
2472}
2473
b55b76b2
DJ
2474/* Handle redirects */
2475struct ip6rd_flowi {
2476 struct flowi6 fl6;
2477 struct in6_addr gateway;
2478};
2479
2480static struct rt6_info *__ip6_route_redirect(struct net *net,
2481 struct fib6_table *table,
2482 struct flowi6 *fl6,
b75cc8f9 2483 const struct sk_buff *skb,
b55b76b2
DJ
2484 int flags)
2485{
2486 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
0b34eb00 2487 struct rt6_info *ret = NULL;
9b6b35ab 2488 struct fib6_result res = {};
8d1c802b 2489 struct fib6_info *rt;
b55b76b2
DJ
2490 struct fib6_node *fn;
2491
2492 /* Get the "current" route for this destination and
67c408cf 2493 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2494 *
2495 * RFC 4861 specifies that redirects should only be
2496 * accepted if they come from the nexthop to the target.
2497 * Due to the way the routes are chosen, this notion
2498 * is a bit fuzzy and one might need to check all possible
2499 * routes.
2500 */
2501
66f5d6ce 2502 rcu_read_lock();
6454743b 2503 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
b55b76b2 2504restart:
66f5d6ce 2505 for_each_fib6_node_rt_rcu(fn) {
9b6b35ab
DA
2506 res.f6i = rt;
2507 res.nh = &rt->fib6_nh;
2508
14895687 2509 if (fib6_check_expired(rt))
b55b76b2 2510 continue;
93c2fb25 2511 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2512 break;
9b6b35ab 2513 if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, &ret))
0b34eb00 2514 goto out;
b55b76b2
DJ
2515 }
2516
2517 if (!rt)
421842ed 2518 rt = net->ipv6.fib6_null_entry;
93c2fb25 2519 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2520 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2521 goto out;
2522 }
2523
421842ed 2524 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2525 fn = fib6_backtrack(fn, &fl6->saddr);
2526 if (fn)
2527 goto restart;
b55b76b2 2528 }
a3c00e46 2529
9b6b35ab
DA
2530 res.f6i = rt;
2531 res.nh = &rt->fib6_nh;
b0a1ba59 2532out:
23fb93a4 2533 if (ret)
10585b43 2534 ip6_hold_safe(net, &ret);
23fb93a4 2535 else
9b6b35ab 2536 ret = ip6_create_rt_rcu(&res);
b55b76b2 2537
66f5d6ce 2538 rcu_read_unlock();
b55b76b2 2539
8ff2e5b2 2540 trace_fib6_table_lookup(net, &res, table, fl6);
23fb93a4 2541 return ret;
b55b76b2
DJ
2542};
2543
2544static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2545 const struct flowi6 *fl6,
2546 const struct sk_buff *skb,
2547 const struct in6_addr *gateway)
b55b76b2
DJ
2548{
2549 int flags = RT6_LOOKUP_F_HAS_SADDR;
2550 struct ip6rd_flowi rdfl;
2551
2552 rdfl.fl6 = *fl6;
2553 rdfl.gateway = *gateway;
2554
b75cc8f9 2555 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2556 flags, __ip6_route_redirect);
2557}
2558
e2d118a1
LC
2559void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2560 kuid_t uid)
3a5ad2ee
DM
2561{
2562 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2563 struct dst_entry *dst;
1f7f10ac
2564 struct flowi6 fl6 = {
2565 .flowi6_iif = LOOPBACK_IFINDEX,
2566 .flowi6_oif = oif,
2567 .flowi6_mark = mark,
2568 .daddr = iph->daddr,
2569 .saddr = iph->saddr,
2570 .flowlabel = ip6_flowinfo(iph),
2571 .flowi6_uid = uid,
2572 };
3a5ad2ee 2573
b75cc8f9 2574 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2575 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2576 dst_release(dst);
2577}
2578EXPORT_SYMBOL_GPL(ip6_redirect);
2579
d456336d 2580void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
c92a59ec
DJ
2581{
2582 const struct ipv6hdr *iph = ipv6_hdr(skb);
2583 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2584 struct dst_entry *dst;
0b26fb17
2585 struct flowi6 fl6 = {
2586 .flowi6_iif = LOOPBACK_IFINDEX,
2587 .flowi6_oif = oif,
0b26fb17
2588 .daddr = msg->dest,
2589 .saddr = iph->daddr,
2590 .flowi6_uid = sock_net_uid(net, NULL),
2591 };
c92a59ec 2592
b75cc8f9 2593 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2594 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2595 dst_release(dst);
2596}
2597
3a5ad2ee
DM
2598void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2599{
e2d118a1
LC
2600 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2601 sk->sk_uid);
3a5ad2ee
DM
2602}
2603EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2604
0dbaee3b 2605static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2606{
0dbaee3b
DM
2607 struct net_device *dev = dst->dev;
2608 unsigned int mtu = dst_mtu(dst);
2609 struct net *net = dev_net(dev);
2610
1da177e4
LT
2611 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2612
5578689a
DL
2613 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2614 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2615
2616 /*
1ab1457c
YH
2617 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2618 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2619 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2620 * rely only on pmtu discovery"
2621 */
2622 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2623 mtu = IPV6_MAXPLEN;
2624 return mtu;
2625}
2626
ebb762f2 2627static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2628{
d33e4553 2629 struct inet6_dev *idev;
d4ead6b3 2630 unsigned int mtu;
4b32b5ad
MKL
2631
2632 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2633 if (mtu)
30f78d8e 2634 goto out;
618f9bc7
SK
2635
2636 mtu = IPV6_MIN_MTU;
d33e4553
DM
2637
2638 rcu_read_lock();
2639 idev = __in6_dev_get(dst->dev);
2640 if (idev)
2641 mtu = idev->cnf.mtu6;
2642 rcu_read_unlock();
2643
30f78d8e 2644out:
14972cbd
RP
2645 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2646
2647 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2648}
2649
901731b8
DA
2650/* MTU selection:
2651 * 1. mtu on route is locked - use it
2652 * 2. mtu from nexthop exception
2653 * 3. mtu from egress device
2654 *
2655 * based on ip6_dst_mtu_forward and exception logic of
2656 * rt6_find_cached_rt; called with rcu_read_lock
2657 */
b748f260
DA
2658u32 ip6_mtu_from_fib6(const struct fib6_result *res,
2659 const struct in6_addr *daddr,
2660 const struct in6_addr *saddr)
901731b8
DA
2661{
2662 struct rt6_exception_bucket *bucket;
b748f260
DA
2663 const struct fib6_nh *nh = res->nh;
2664 struct fib6_info *f6i = res->f6i;
2665 const struct in6_addr *src_key;
901731b8 2666 struct rt6_exception *rt6_ex;
901731b8
DA
2667 struct inet6_dev *idev;
2668 u32 mtu = 0;
2669
2670 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2671 mtu = f6i->fib6_pmtu;
2672 if (mtu)
2673 goto out;
2674 }
2675
2676 src_key = NULL;
2677#ifdef CONFIG_IPV6_SUBTREES
2678 if (f6i->fib6_src.plen)
2679 src_key = saddr;
2680#endif
2681
2682 bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2683 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2684 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2685 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2686
2687 if (likely(!mtu)) {
b748f260 2688 struct net_device *dev = nh->fib_nh_dev;
901731b8
DA
2689
2690 mtu = IPV6_MIN_MTU;
2691 idev = __in6_dev_get(dev);
2692 if (idev && idev->cnf.mtu6 > mtu)
2693 mtu = idev->cnf.mtu6;
2694 }
2695
2696 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2697out:
b748f260 2698 return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
901731b8
DA
2699}
2700
3b00944c 2701struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2702 struct flowi6 *fl6)
1da177e4 2703{
87a11578 2704 struct dst_entry *dst;
1da177e4
LT
2705 struct rt6_info *rt;
2706 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2707 struct net *net = dev_net(dev);
1da177e4 2708
38308473 2709 if (unlikely(!idev))
122bdf67 2710 return ERR_PTR(-ENODEV);
1da177e4 2711
ad706862 2712 rt = ip6_dst_alloc(net, dev, 0);
38308473 2713 if (unlikely(!rt)) {
1da177e4 2714 in6_dev_put(idev);
87a11578 2715 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2716 goto out;
2717 }
2718
8e2ec639 2719 rt->dst.flags |= DST_HOST;
588753f1 2720 rt->dst.input = ip6_input;
8e2ec639 2721 rt->dst.output = ip6_output;
550bab42 2722 rt->rt6i_gateway = fl6->daddr;
87a11578 2723 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2724 rt->rt6i_dst.plen = 128;
2725 rt->rt6i_idev = idev;
14edd87d 2726 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2727
4c981e28 2728 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2729 * do proper release of the net_device
2730 */
2731 rt6_uncached_list_add(rt);
81eb8447 2732 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2733
87a11578
DM
2734 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2735
1da177e4 2736out:
87a11578 2737 return dst;
1da177e4
LT
2738}
2739
569d3645 2740static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2741{
86393e52 2742 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2743 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2744 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2745 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2746 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2747 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2748 int entries;
7019b78e 2749
fc66f95c 2750 entries = dst_entries_get_fast(ops);
49a18d86 2751 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2752 entries <= rt_max_size)
1da177e4
LT
2753 goto out;
2754
6891a346 2755 net->ipv6.ip6_rt_gc_expire++;
14956643 2756 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2757 entries = dst_entries_get_slow(ops);
2758 if (entries < ops->gc_thresh)
7019b78e 2759 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2760out:
7019b78e 2761 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2762 return entries > rt_max_size;
1da177e4
LT
2763}
2764
8c14586f
DA
2765static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2766 struct fib6_config *cfg,
f4797b33
DA
2767 const struct in6_addr *gw_addr,
2768 u32 tbid, int flags)
8c14586f
DA
2769{
2770 struct flowi6 fl6 = {
2771 .flowi6_oif = cfg->fc_ifindex,
2772 .daddr = *gw_addr,
2773 .saddr = cfg->fc_prefsrc,
2774 };
2775 struct fib6_table *table;
2776 struct rt6_info *rt;
8c14586f 2777
f4797b33 2778 table = fib6_get_table(net, tbid);
8c14586f
DA
2779 if (!table)
2780 return NULL;
2781
2782 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2783 flags |= RT6_LOOKUP_F_HAS_SADDR;
2784
f4797b33 2785 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2786 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2787
2788 /* if table lookup failed, fall back to full lookup */
2789 if (rt == net->ipv6.ip6_null_entry) {
2790 ip6_rt_put(rt);
2791 rt = NULL;
2792 }
2793
2794 return rt;
2795}
2796
fc1e64e1
DA
2797static int ip6_route_check_nh_onlink(struct net *net,
2798 struct fib6_config *cfg,
9fbb704c 2799 const struct net_device *dev,
fc1e64e1
DA
2800 struct netlink_ext_ack *extack)
2801{
44750f84 2802 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2803 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2804 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
bf1dc8ba 2805 struct fib6_info *from;
fc1e64e1
DA
2806 struct rt6_info *grt;
2807 int err;
2808
2809 err = 0;
2810 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2811 if (grt) {
bf1dc8ba
PA
2812 rcu_read_lock();
2813 from = rcu_dereference(grt->from);
58e354c0 2814 if (!grt->dst.error &&
4ed591c8 2815 /* ignore match if it is the default route */
bf1dc8ba 2816 from && !ipv6_addr_any(&from->fib6_dst.addr) &&
58e354c0 2817 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2818 NL_SET_ERR_MSG(extack,
2819 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2820 err = -EINVAL;
2821 }
bf1dc8ba 2822 rcu_read_unlock();
fc1e64e1
DA
2823
2824 ip6_rt_put(grt);
2825 }
2826
2827 return err;
2828}
2829
1edce99f
DA
2830static int ip6_route_check_nh(struct net *net,
2831 struct fib6_config *cfg,
2832 struct net_device **_dev,
2833 struct inet6_dev **idev)
2834{
2835 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2836 struct net_device *dev = _dev ? *_dev : NULL;
2837 struct rt6_info *grt = NULL;
2838 int err = -EHOSTUNREACH;
2839
2840 if (cfg->fc_table) {
f4797b33
DA
2841 int flags = RT6_LOOKUP_F_IFACE;
2842
2843 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2844 cfg->fc_table, flags);
1edce99f
DA
2845 if (grt) {
2846 if (grt->rt6i_flags & RTF_GATEWAY ||
2847 (dev && dev != grt->dst.dev)) {
2848 ip6_rt_put(grt);
2849 grt = NULL;
2850 }
2851 }
2852 }
2853
2854 if (!grt)
b75cc8f9 2855 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2856
2857 if (!grt)
2858 goto out;
2859
2860 if (dev) {
2861 if (dev != grt->dst.dev) {
2862 ip6_rt_put(grt);
2863 goto out;
2864 }
2865 } else {
2866 *_dev = dev = grt->dst.dev;
2867 *idev = grt->rt6i_idev;
2868 dev_hold(dev);
2869 in6_dev_hold(grt->rt6i_idev);
2870 }
2871
2872 if (!(grt->rt6i_flags & RTF_GATEWAY))
2873 err = 0;
2874
2875 ip6_rt_put(grt);
2876
2877out:
2878 return err;
2879}
2880
9fbb704c
DA
2881static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2882 struct net_device **_dev, struct inet6_dev **idev,
2883 struct netlink_ext_ack *extack)
2884{
2885 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2886 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2887 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2888 const struct net_device *dev = *_dev;
232378e8 2889 bool need_addr_check = !dev;
9fbb704c
DA
2890 int err = -EINVAL;
2891
2892 /* if gw_addr is local we will fail to detect this in case
2893 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2894 * will return already-added prefix route via interface that
2895 * prefix route was assigned to, which might be non-loopback.
2896 */
232378e8
DA
2897 if (dev &&
2898 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2899 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2900 goto out;
2901 }
2902
2903 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2904 /* IPv6 strictly inhibits using not link-local
2905 * addresses as nexthop address.
2906 * Otherwise, router will not able to send redirects.
2907 * It is very good, but in some (rare!) circumstances
2908 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2909 * some exceptions. --ANK
2910 * We allow IPv4-mapped nexthops to support RFC4798-type
2911 * addressing
2912 */
2913 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2914 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2915 goto out;
2916 }
2917
2918 if (cfg->fc_flags & RTNH_F_ONLINK)
2919 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2920 else
2921 err = ip6_route_check_nh(net, cfg, _dev, idev);
2922
2923 if (err)
2924 goto out;
2925 }
2926
2927 /* reload in case device was changed */
2928 dev = *_dev;
2929
2930 err = -EINVAL;
2931 if (!dev) {
2932 NL_SET_ERR_MSG(extack, "Egress device not specified");
2933 goto out;
2934 } else if (dev->flags & IFF_LOOPBACK) {
2935 NL_SET_ERR_MSG(extack,
2936 "Egress device can not be loopback device for this route");
2937 goto out;
2938 }
232378e8
DA
2939
2940 /* if we did not check gw_addr above, do so now that the
2941 * egress device has been resolved.
2942 */
2943 if (need_addr_check &&
2944 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2945 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2946 goto out;
2947 }
2948
9fbb704c
DA
2949 err = 0;
2950out:
2951 return err;
2952}
2953
83c44251
DA
2954static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
2955{
2956 if ((flags & RTF_REJECT) ||
2957 (dev && (dev->flags & IFF_LOOPBACK) &&
2958 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2959 !(flags & RTF_LOCAL)))
2960 return true;
2961
2962 return false;
2963}
2964
2965int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
2966 struct fib6_config *cfg, gfp_t gfp_flags,
2967 struct netlink_ext_ack *extack)
2968{
2969 struct net_device *dev = NULL;
2970 struct inet6_dev *idev = NULL;
2971 int addr_type;
2972 int err;
2973
f1741730
DA
2974 fib6_nh->fib_nh_family = AF_INET6;
2975
83c44251
DA
2976 err = -ENODEV;
2977 if (cfg->fc_ifindex) {
2978 dev = dev_get_by_index(net, cfg->fc_ifindex);
2979 if (!dev)
2980 goto out;
2981 idev = in6_dev_get(dev);
2982 if (!idev)
2983 goto out;
2984 }
2985
2986 if (cfg->fc_flags & RTNH_F_ONLINK) {
2987 if (!dev) {
2988 NL_SET_ERR_MSG(extack,
2989 "Nexthop device required for onlink");
2990 goto out;
2991 }
2992
2993 if (!(dev->flags & IFF_UP)) {
2994 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2995 err = -ENETDOWN;
2996 goto out;
2997 }
2998
ad1601ae 2999 fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
83c44251
DA
3000 }
3001
ad1601ae 3002 fib6_nh->fib_nh_weight = 1;
83c44251
DA
3003
3004 /* We cannot add true routes via loopback here,
3005 * they would result in kernel looping; promote them to reject routes
3006 */
3007 addr_type = ipv6_addr_type(&cfg->fc_dst);
3008 if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
3009 /* hold loopback dev/idev if we haven't done so. */
3010 if (dev != net->loopback_dev) {
3011 if (dev) {
3012 dev_put(dev);
3013 in6_dev_put(idev);
3014 }
3015 dev = net->loopback_dev;
3016 dev_hold(dev);
3017 idev = in6_dev_get(dev);
3018 if (!idev) {
3019 err = -ENODEV;
3020 goto out;
3021 }
3022 }
3023 goto set_dev;
3024 }
3025
3026 if (cfg->fc_flags & RTF_GATEWAY) {
3027 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3028 if (err)
3029 goto out;
3030
ad1601ae 3031 fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
bdf00467 3032 fib6_nh->fib_nh_gw_family = AF_INET6;
83c44251
DA
3033 }
3034
3035 err = -ENODEV;
3036 if (!dev)
3037 goto out;
3038
3039 if (idev->cnf.disable_ipv6) {
3040 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3041 err = -EACCES;
3042 goto out;
3043 }
3044
3045 if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
3046 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3047 err = -ENETDOWN;
3048 goto out;
3049 }
3050
3051 if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3052 !netif_carrier_ok(dev))
ad1601ae 3053 fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
83c44251 3054
979e276e
DA
3055 err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
3056 cfg->fc_encap_type, cfg, gfp_flags, extack);
3057 if (err)
3058 goto out;
83c44251 3059set_dev:
ad1601ae 3060 fib6_nh->fib_nh_dev = dev;
f1741730 3061 fib6_nh->fib_nh_oif = dev->ifindex;
83c44251
DA
3062 err = 0;
3063out:
3064 if (idev)
3065 in6_dev_put(idev);
3066
3067 if (err) {
ad1601ae
DA
3068 lwtstate_put(fib6_nh->fib_nh_lws);
3069 fib6_nh->fib_nh_lws = NULL;
83c44251
DA
3070 if (dev)
3071 dev_put(dev);
3072 }
3073
3074 return err;
3075}
3076
dac7d0f2
DA
3077void fib6_nh_release(struct fib6_nh *fib6_nh)
3078{
979e276e 3079 fib_nh_common_release(&fib6_nh->nh_common);
dac7d0f2
DA
3080}
3081
8d1c802b 3082static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 3083 gfp_t gfp_flags,
333c4301 3084 struct netlink_ext_ack *extack)
1da177e4 3085{
5578689a 3086 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 3087 struct fib6_info *rt = NULL;
c71099ac 3088 struct fib6_table *table;
8c5b83f0 3089 int err = -EINVAL;
83c44251 3090 int addr_type;
1da177e4 3091
557c44be 3092 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
3093 if (cfg->fc_flags & RTF_PCPU) {
3094 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 3095 goto out;
d5d531cb 3096 }
557c44be 3097
2ea2352e
WW
3098 /* RTF_CACHE is an internal flag; can not be set by userspace */
3099 if (cfg->fc_flags & RTF_CACHE) {
3100 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3101 goto out;
3102 }
3103
e8478e80
DA
3104 if (cfg->fc_type > RTN_MAX) {
3105 NL_SET_ERR_MSG(extack, "Invalid route type");
3106 goto out;
3107 }
3108
d5d531cb
DA
3109 if (cfg->fc_dst_len > 128) {
3110 NL_SET_ERR_MSG(extack, "Invalid prefix length");
3111 goto out;
3112 }
3113 if (cfg->fc_src_len > 128) {
3114 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 3115 goto out;
d5d531cb 3116 }
1da177e4 3117#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
3118 if (cfg->fc_src_len) {
3119 NL_SET_ERR_MSG(extack,
3120 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 3121 goto out;
d5d531cb 3122 }
1da177e4 3123#endif
fc1e64e1 3124
d71314b4 3125 err = -ENOBUFS;
38308473
DM
3126 if (cfg->fc_nlinfo.nlh &&
3127 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 3128 table = fib6_get_table(net, cfg->fc_table);
38308473 3129 if (!table) {
f3213831 3130 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
3131 table = fib6_new_table(net, cfg->fc_table);
3132 }
3133 } else {
3134 table = fib6_new_table(net, cfg->fc_table);
3135 }
38308473
DM
3136
3137 if (!table)
c71099ac 3138 goto out;
c71099ac 3139
93531c67
DA
3140 err = -ENOMEM;
3141 rt = fib6_info_alloc(gfp_flags);
3142 if (!rt)
1da177e4 3143 goto out;
93531c67 3144
d7e774f3
DA
3145 rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3146 extack);
767a2217
DA
3147 if (IS_ERR(rt->fib6_metrics)) {
3148 err = PTR_ERR(rt->fib6_metrics);
fda21d46
ED
3149 /* Do not leave garbage there. */
3150 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
767a2217
DA
3151 goto out;
3152 }
3153
93531c67
DA
3154 if (cfg->fc_flags & RTF_ADDRCONF)
3155 rt->dst_nocount = true;
1da177e4 3156
1716a961 3157 if (cfg->fc_flags & RTF_EXPIRES)
14895687 3158 fib6_set_expires(rt, jiffies +
1716a961
G
3159 clock_t_to_jiffies(cfg->fc_expires));
3160 else
14895687 3161 fib6_clean_expires(rt);
1da177e4 3162
86872cb5
TG
3163 if (cfg->fc_protocol == RTPROT_UNSPEC)
3164 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 3165 rt->fib6_protocol = cfg->fc_protocol;
86872cb5 3166
83c44251
DA
3167 rt->fib6_table = table;
3168 rt->fib6_metric = cfg->fc_metric;
3169 rt->fib6_type = cfg->fc_type;
2b2450ca 3170 rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
19e42e45 3171
93c2fb25
DA
3172 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3173 rt->fib6_dst.plen = cfg->fc_dst_len;
3174 if (rt->fib6_dst.plen == 128)
3b6761d1 3175 rt->dst_host = true;
e5fd387a 3176
1da177e4 3177#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
3178 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3179 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4 3180#endif
83c44251
DA
3181 err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
3182 if (err)
3183 goto out;
1da177e4
LT
3184
3185 /* We cannot add true routes via loopback here,
83c44251 3186 * they would result in kernel looping; promote them to reject routes
1da177e4 3187 */
83c44251 3188 addr_type = ipv6_addr_type(&cfg->fc_dst);
ad1601ae 3189 if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
83c44251 3190 rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
955ec4cb 3191
c3968a85 3192 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
83c44251
DA
3193 struct net_device *dev = fib6_info_nh_dev(rt);
3194
c3968a85 3195 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3196 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3197 err = -EINVAL;
3198 goto out;
3199 }
93c2fb25
DA
3200 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3201 rt->fib6_prefsrc.plen = 128;
c3968a85 3202 } else
93c2fb25 3203 rt->fib6_prefsrc.plen = 0;
c3968a85 3204
8c5b83f0 3205 return rt;
6b9ea5a6 3206out:
93531c67 3207 fib6_info_release(rt);
8c5b83f0 3208 return ERR_PTR(err);
6b9ea5a6
RP
3209}
3210
acb54e3c 3211int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
333c4301 3212 struct netlink_ext_ack *extack)
6b9ea5a6 3213{
8d1c802b 3214 struct fib6_info *rt;
6b9ea5a6
RP
3215 int err;
3216
acb54e3c 3217 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3218 if (IS_ERR(rt))
3219 return PTR_ERR(rt);
6b9ea5a6 3220
d4ead6b3 3221 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3222 fib6_info_release(rt);
6b9ea5a6 3223
1da177e4
LT
3224 return err;
3225}
3226
8d1c802b 3227static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3228{
afb1d4b5 3229 struct net *net = info->nl_net;
c71099ac 3230 struct fib6_table *table;
afb1d4b5 3231 int err;
1da177e4 3232
421842ed 3233 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3234 err = -ENOENT;
3235 goto out;
3236 }
6c813a72 3237
93c2fb25 3238 table = rt->fib6_table;
66f5d6ce 3239 spin_lock_bh(&table->tb6_lock);
86872cb5 3240 err = fib6_del(rt, info);
66f5d6ce 3241 spin_unlock_bh(&table->tb6_lock);
1da177e4 3242
6825a26c 3243out:
93531c67 3244 fib6_info_release(rt);
1da177e4
LT
3245 return err;
3246}
3247
8d1c802b 3248int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3249{
afb1d4b5
DA
3250 struct nl_info info = { .nl_net = net };
3251
528c4ceb 3252 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3253}
3254
8d1c802b 3255static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3256{
3257 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3258 struct net *net = info->nl_net;
16a16cd3 3259 struct sk_buff *skb = NULL;
0ae81335 3260 struct fib6_table *table;
e3330039 3261 int err = -ENOENT;
0ae81335 3262
421842ed 3263 if (rt == net->ipv6.fib6_null_entry)
e3330039 3264 goto out_put;
93c2fb25 3265 table = rt->fib6_table;
66f5d6ce 3266 spin_lock_bh(&table->tb6_lock);
0ae81335 3267
93c2fb25 3268 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3269 struct fib6_info *sibling, *next_sibling;
0ae81335 3270
16a16cd3
DA
3271 /* prefer to send a single notification with all hops */
3272 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3273 if (skb) {
3274 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3275
d4ead6b3 3276 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3277 NULL, NULL, 0, RTM_DELROUTE,
3278 info->portid, seq, 0) < 0) {
3279 kfree_skb(skb);
3280 skb = NULL;
3281 } else
3282 info->skip_notify = 1;
3283 }
3284
0ae81335 3285 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3286 &rt->fib6_siblings,
3287 fib6_siblings) {
0ae81335
DA
3288 err = fib6_del(sibling, info);
3289 if (err)
e3330039 3290 goto out_unlock;
0ae81335
DA
3291 }
3292 }
3293
3294 err = fib6_del(rt, info);
e3330039 3295out_unlock:
66f5d6ce 3296 spin_unlock_bh(&table->tb6_lock);
e3330039 3297out_put:
93531c67 3298 fib6_info_release(rt);
16a16cd3
DA
3299
3300 if (skb) {
e3330039 3301 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3302 info->nlh, gfp_any());
3303 }
0ae81335
DA
3304 return err;
3305}
3306
23fb93a4
DA
3307static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3308{
3309 int rc = -ESRCH;
3310
3311 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3312 goto out;
3313
3314 if (cfg->fc_flags & RTF_GATEWAY &&
3315 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3316 goto out;
761f6026
XL
3317
3318 rc = rt6_remove_exception_rt(rt);
23fb93a4
DA
3319out:
3320 return rc;
3321}
3322
333c4301
DA
3323static int ip6_route_del(struct fib6_config *cfg,
3324 struct netlink_ext_ack *extack)
1da177e4 3325{
8d1c802b 3326 struct rt6_info *rt_cache;
c71099ac 3327 struct fib6_table *table;
8d1c802b 3328 struct fib6_info *rt;
1da177e4 3329 struct fib6_node *fn;
1da177e4
LT
3330 int err = -ESRCH;
3331
5578689a 3332 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3333 if (!table) {
3334 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3335 return err;
d5d531cb 3336 }
c71099ac 3337
66f5d6ce 3338 rcu_read_lock();
1da177e4 3339
c71099ac 3340 fn = fib6_locate(&table->tb6_root,
86872cb5 3341 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3342 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3343 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3344
1da177e4 3345 if (fn) {
66f5d6ce 3346 for_each_fib6_node_rt_rcu(fn) {
ad1601ae
DA
3347 struct fib6_nh *nh;
3348
2b760fcf 3349 if (cfg->fc_flags & RTF_CACHE) {
7e4b5128
DA
3350 struct fib6_result res = {
3351 .f6i = rt,
3352 };
23fb93a4
DA
3353 int rc;
3354
7e4b5128
DA
3355 rt_cache = rt6_find_cached_rt(&res,
3356 &cfg->fc_dst,
2b760fcf 3357 &cfg->fc_src);
23fb93a4
DA
3358 if (rt_cache) {
3359 rc = ip6_del_cached_rt(rt_cache, cfg);
9e575010
ED
3360 if (rc != -ESRCH) {
3361 rcu_read_unlock();
23fb93a4 3362 return rc;
9e575010 3363 }
23fb93a4
DA
3364 }
3365 continue;
2b760fcf 3366 }
ad1601ae
DA
3367
3368 nh = &rt->fib6_nh;
86872cb5 3369 if (cfg->fc_ifindex &&
ad1601ae
DA
3370 (!nh->fib_nh_dev ||
3371 nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3372 continue;
86872cb5 3373 if (cfg->fc_flags & RTF_GATEWAY &&
ad1601ae 3374 !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
1da177e4 3375 continue;
93c2fb25 3376 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3377 continue;
93c2fb25 3378 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3379 continue;
e873e4b9
WW
3380 if (!fib6_info_hold_safe(rt))
3381 continue;
66f5d6ce 3382 rcu_read_unlock();
1da177e4 3383
0ae81335
DA
3384 /* if gateway was specified only delete the one hop */
3385 if (cfg->fc_flags & RTF_GATEWAY)
3386 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3387
3388 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3389 }
3390 }
66f5d6ce 3391 rcu_read_unlock();
1da177e4
LT
3392
3393 return err;
3394}
3395
6700c270 3396static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3397{
a6279458 3398 struct netevent_redirect netevent;
e8599ff4 3399 struct rt6_info *rt, *nrt = NULL;
85bd05de 3400 struct fib6_result res = {};
e8599ff4
DM
3401 struct ndisc_options ndopts;
3402 struct inet6_dev *in6_dev;
3403 struct neighbour *neigh;
71bcdba0 3404 struct rd_msg *msg;
6e157b6a
DM
3405 int optlen, on_link;
3406 u8 *lladdr;
e8599ff4 3407
29a3cad5 3408 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3409 optlen -= sizeof(*msg);
e8599ff4
DM
3410
3411 if (optlen < 0) {
6e157b6a 3412 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3413 return;
3414 }
3415
71bcdba0 3416 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3417
71bcdba0 3418 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3419 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3420 return;
3421 }
3422
6e157b6a 3423 on_link = 0;
71bcdba0 3424 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3425 on_link = 1;
71bcdba0 3426 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3427 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3428 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3429 return;
3430 }
3431
3432 in6_dev = __in6_dev_get(skb->dev);
3433 if (!in6_dev)
3434 return;
3435 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3436 return;
3437
3438 /* RFC2461 8.1:
3439 * The IP source address of the Redirect MUST be the same as the current
3440 * first-hop router for the specified ICMP Destination Address.
3441 */
3442
f997c55c 3443 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3444 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3445 return;
3446 }
6e157b6a
DM
3447
3448 lladdr = NULL;
e8599ff4
DM
3449 if (ndopts.nd_opts_tgt_lladdr) {
3450 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3451 skb->dev);
3452 if (!lladdr) {
3453 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3454 return;
3455 }
3456 }
3457
6e157b6a 3458 rt = (struct rt6_info *) dst;
ec13ad1d 3459 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3460 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3461 return;
6e157b6a 3462 }
e8599ff4 3463
6e157b6a
DM
3464 /* Redirect received -> path was valid.
3465 * Look, redirects are sent only in response to data packets,
3466 * so that this nexthop apparently is reachable. --ANK
3467 */
0dec879f 3468 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3469
71bcdba0 3470 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3471 if (!neigh)
3472 return;
a6279458 3473
1da177e4
LT
3474 /*
3475 * We have finally decided to accept it.
3476 */
3477
f997c55c 3478 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3479 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3480 NEIGH_UPDATE_F_OVERRIDE|
3481 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3482 NEIGH_UPDATE_F_ISROUTER)),
3483 NDISC_REDIRECT, &ndopts);
1da177e4 3484
4d85cd0c 3485 rcu_read_lock();
85bd05de 3486 res.f6i = rcu_dereference(rt->from);
e873e4b9
WW
3487 /* This fib6_info_hold() is safe here because we hold reference to rt
3488 * and rt already holds reference to fib6_info.
3489 */
85bd05de 3490 fib6_info_hold(res.f6i);
4d85cd0c 3491 rcu_read_unlock();
8a14e46f 3492
85bd05de
DA
3493 res.nh = &res.f6i->fib6_nh;
3494 nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
38308473 3495 if (!nrt)
1da177e4
LT
3496 goto out;
3497
3498 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3499 if (on_link)
3500 nrt->rt6i_flags &= ~RTF_GATEWAY;
3501
4e3fd7a0 3502 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3503
2b760fcf
WW
3504 /* No need to remove rt from the exception table if rt is
3505 * a cached route because rt6_insert_exception() will
3506 * takes care of it
3507 */
5012f0a5 3508 if (rt6_insert_exception(nrt, &res)) {
2b760fcf
WW
3509 dst_release_immediate(&nrt->dst);
3510 goto out;
3511 }
1da177e4 3512
d8d1f30b
CG
3513 netevent.old = &rt->dst;
3514 netevent.new = &nrt->dst;
71bcdba0 3515 netevent.daddr = &msg->dest;
60592833 3516 netevent.neigh = neigh;
8d71740c
TT
3517 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3518
1da177e4 3519out:
85bd05de 3520 fib6_info_release(res.f6i);
e8599ff4 3521 neigh_release(neigh);
6e157b6a
DM
3522}
3523
70ceb4f5 3524#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3525static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3526 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3527 const struct in6_addr *gwaddr,
3528 struct net_device *dev)
70ceb4f5 3529{
830218c1
DA
3530 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3531 int ifindex = dev->ifindex;
70ceb4f5 3532 struct fib6_node *fn;
8d1c802b 3533 struct fib6_info *rt = NULL;
c71099ac
TG
3534 struct fib6_table *table;
3535
830218c1 3536 table = fib6_get_table(net, tb_id);
38308473 3537 if (!table)
c71099ac 3538 return NULL;
70ceb4f5 3539
66f5d6ce 3540 rcu_read_lock();
38fbeeee 3541 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3542 if (!fn)
3543 goto out;
3544
66f5d6ce 3545 for_each_fib6_node_rt_rcu(fn) {
ad1601ae 3546 if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
70ceb4f5 3547 continue;
2b2450ca 3548 if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
bdf00467 3549 !rt->fib6_nh.fib_nh_gw_family)
70ceb4f5 3550 continue;
ad1601ae 3551 if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
70ceb4f5 3552 continue;
e873e4b9
WW
3553 if (!fib6_info_hold_safe(rt))
3554 continue;
70ceb4f5
YH
3555 break;
3556 }
3557out:
66f5d6ce 3558 rcu_read_unlock();
70ceb4f5
YH
3559 return rt;
3560}
3561
8d1c802b 3562static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3563 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3564 const struct in6_addr *gwaddr,
3565 struct net_device *dev,
95c96174 3566 unsigned int pref)
70ceb4f5 3567{
86872cb5 3568 struct fib6_config cfg = {
238fc7ea 3569 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3570 .fc_ifindex = dev->ifindex,
86872cb5
TG
3571 .fc_dst_len = prefixlen,
3572 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3573 RTF_UP | RTF_PREF(pref),
b91d5329 3574 .fc_protocol = RTPROT_RA,
e8478e80 3575 .fc_type = RTN_UNICAST,
15e47304 3576 .fc_nlinfo.portid = 0,
efa2cea0
DL
3577 .fc_nlinfo.nlh = NULL,
3578 .fc_nlinfo.nl_net = net,
86872cb5
TG
3579 };
3580
830218c1 3581 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3582 cfg.fc_dst = *prefix;
3583 cfg.fc_gateway = *gwaddr;
70ceb4f5 3584
e317da96
YH
3585 /* We should treat it as a default route if prefix length is 0. */
3586 if (!prefixlen)
86872cb5 3587 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3588
acb54e3c 3589 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3590
830218c1 3591 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3592}
3593#endif
3594
8d1c802b 3595struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3596 const struct in6_addr *addr,
3597 struct net_device *dev)
1ab1457c 3598{
830218c1 3599 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3600 struct fib6_info *rt;
c71099ac 3601 struct fib6_table *table;
1da177e4 3602
afb1d4b5 3603 table = fib6_get_table(net, tb_id);
38308473 3604 if (!table)
c71099ac 3605 return NULL;
1da177e4 3606
66f5d6ce
WW
3607 rcu_read_lock();
3608 for_each_fib6_node_rt_rcu(&table->tb6_root) {
ad1601ae
DA
3609 struct fib6_nh *nh = &rt->fib6_nh;
3610
3611 if (dev == nh->fib_nh_dev &&
93c2fb25 3612 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ad1601ae 3613 ipv6_addr_equal(&nh->fib_nh_gw6, addr))
1da177e4
LT
3614 break;
3615 }
e873e4b9
WW
3616 if (rt && !fib6_info_hold_safe(rt))
3617 rt = NULL;
66f5d6ce 3618 rcu_read_unlock();
1da177e4
LT
3619 return rt;
3620}
3621
8d1c802b 3622struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3623 const struct in6_addr *gwaddr,
ebacaaa0
YH
3624 struct net_device *dev,
3625 unsigned int pref)
1da177e4 3626{
86872cb5 3627 struct fib6_config cfg = {
ca254490 3628 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3629 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3630 .fc_ifindex = dev->ifindex,
3631 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3632 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3633 .fc_protocol = RTPROT_RA,
e8478e80 3634 .fc_type = RTN_UNICAST,
15e47304 3635 .fc_nlinfo.portid = 0,
5578689a 3636 .fc_nlinfo.nlh = NULL,
afb1d4b5 3637 .fc_nlinfo.nl_net = net,
86872cb5 3638 };
1da177e4 3639
4e3fd7a0 3640 cfg.fc_gateway = *gwaddr;
1da177e4 3641
acb54e3c 3642 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3643 struct fib6_table *table;
3644
3645 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3646 if (table)
3647 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3648 }
1da177e4 3649
afb1d4b5 3650 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3651}
3652
afb1d4b5
DA
3653static void __rt6_purge_dflt_routers(struct net *net,
3654 struct fib6_table *table)
1da177e4 3655{
8d1c802b 3656 struct fib6_info *rt;
1da177e4
LT
3657
3658restart:
66f5d6ce
WW
3659 rcu_read_lock();
3660 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3661 struct net_device *dev = fib6_info_nh_dev(rt);
3662 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3663
93c2fb25 3664 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
e873e4b9
WW
3665 (!idev || idev->cnf.accept_ra != 2) &&
3666 fib6_info_hold_safe(rt)) {
93531c67
DA
3667 rcu_read_unlock();
3668 ip6_del_rt(net, rt);
1da177e4
LT
3669 goto restart;
3670 }
3671 }
66f5d6ce 3672 rcu_read_unlock();
830218c1
DA
3673
3674 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3675}
3676
3677void rt6_purge_dflt_routers(struct net *net)
3678{
3679 struct fib6_table *table;
3680 struct hlist_head *head;
3681 unsigned int h;
3682
3683 rcu_read_lock();
3684
3685 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3686 head = &net->ipv6.fib_table_hash[h];
3687 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3688 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3689 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3690 }
3691 }
3692
3693 rcu_read_unlock();
1da177e4
LT
3694}
3695
5578689a
DL
3696static void rtmsg_to_fib6_config(struct net *net,
3697 struct in6_rtmsg *rtmsg,
86872cb5
TG
3698 struct fib6_config *cfg)
3699{
8823a3ac
3700 *cfg = (struct fib6_config){
3701 .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3702 : RT6_TABLE_MAIN,
3703 .fc_ifindex = rtmsg->rtmsg_ifindex,
67f69513 3704 .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
8823a3ac
3705 .fc_expires = rtmsg->rtmsg_info,
3706 .fc_dst_len = rtmsg->rtmsg_dst_len,
3707 .fc_src_len = rtmsg->rtmsg_src_len,
3708 .fc_flags = rtmsg->rtmsg_flags,
3709 .fc_type = rtmsg->rtmsg_type,
3710
3711 .fc_nlinfo.nl_net = net,
3712
3713 .fc_dst = rtmsg->rtmsg_dst,
3714 .fc_src = rtmsg->rtmsg_src,
3715 .fc_gateway = rtmsg->rtmsg_gateway,
3716 };
86872cb5
TG
3717}
3718
5578689a 3719int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3720{
86872cb5 3721 struct fib6_config cfg;
1da177e4
LT
3722 struct in6_rtmsg rtmsg;
3723 int err;
3724
67ba4152 3725 switch (cmd) {
1da177e4
LT
3726 case SIOCADDRT: /* Add a route */
3727 case SIOCDELRT: /* Delete a route */
af31f412 3728 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3729 return -EPERM;
3730 err = copy_from_user(&rtmsg, arg,
3731 sizeof(struct in6_rtmsg));
3732 if (err)
3733 return -EFAULT;
86872cb5 3734
5578689a 3735 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3736
1da177e4
LT
3737 rtnl_lock();
3738 switch (cmd) {
3739 case SIOCADDRT:
acb54e3c 3740 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3741 break;
3742 case SIOCDELRT:
333c4301 3743 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3744 break;
3745 default:
3746 err = -EINVAL;
3747 }
3748 rtnl_unlock();
3749
3750 return err;
3ff50b79 3751 }
1da177e4
LT
3752
3753 return -EINVAL;
3754}
3755
3756/*
3757 * Drop the packet on the floor
3758 */
3759
d5fdd6ba 3760static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3761{
612f09e8 3762 int type;
adf30907 3763 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3764 switch (ipstats_mib_noroutes) {
3765 case IPSTATS_MIB_INNOROUTES:
0660e03f 3766 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3767 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3768 IP6_INC_STATS(dev_net(dst->dev),
3769 __in6_dev_get_safely(skb->dev),
3bd653c8 3770 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3771 break;
3772 }
3773 /* FALLTHROUGH */
3774 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3775 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3776 ipstats_mib_noroutes);
612f09e8
YH
3777 break;
3778 }
3ffe533c 3779 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3780 kfree_skb(skb);
3781 return 0;
3782}
3783
9ce8ade0
TG
3784static int ip6_pkt_discard(struct sk_buff *skb)
3785{
612f09e8 3786 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3787}
3788
ede2059d 3789static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3790{
adf30907 3791 skb->dev = skb_dst(skb)->dev;
612f09e8 3792 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3793}
3794
9ce8ade0
TG
3795static int ip6_pkt_prohibit(struct sk_buff *skb)
3796{
612f09e8 3797 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3798}
3799
ede2059d 3800static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3801{
adf30907 3802 skb->dev = skb_dst(skb)->dev;
612f09e8 3803 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3804}
3805
1da177e4
LT
3806/*
3807 * Allocate a dst for local (unicast / anycast) address.
3808 */
3809
360a9887
DA
3810struct fib6_info *addrconf_f6i_alloc(struct net *net,
3811 struct inet6_dev *idev,
3812 const struct in6_addr *addr,
3813 bool anycast, gfp_t gfp_flags)
1da177e4 3814{
c7a1ce39
DA
3815 struct fib6_config cfg = {
3816 .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
3817 .fc_ifindex = idev->dev->ifindex,
3818 .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
3819 .fc_dst = *addr,
3820 .fc_dst_len = 128,
3821 .fc_protocol = RTPROT_KERNEL,
3822 .fc_nlinfo.nl_net = net,
3823 .fc_ignore_dev_down = true,
3824 };
1da177e4 3825
e8478e80 3826 if (anycast) {
c7a1ce39
DA
3827 cfg.fc_type = RTN_ANYCAST;
3828 cfg.fc_flags |= RTF_ANYCAST;
e8478e80 3829 } else {
c7a1ce39
DA
3830 cfg.fc_type = RTN_LOCAL;
3831 cfg.fc_flags |= RTF_LOCAL;
e8478e80 3832 }
1da177e4 3833
c7a1ce39 3834 return ip6_route_info_create(&cfg, gfp_flags, NULL);
1da177e4
LT
3835}
3836
c3968a85
DW
3837/* remove deleted ip from prefsrc entries */
3838struct arg_dev_net_ip {
3839 struct net_device *dev;
3840 struct net *net;
3841 struct in6_addr *addr;
3842};
3843
8d1c802b 3844static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
3845{
3846 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3847 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3848 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3849
ad1601ae 3850 if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
421842ed 3851 rt != net->ipv6.fib6_null_entry &&
93c2fb25 3852 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 3853 spin_lock_bh(&rt6_exception_lock);
c3968a85 3854 /* remove prefsrc entry */
93c2fb25 3855 rt->fib6_prefsrc.plen = 0;
60006a48 3856 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3857 }
3858 return 0;
3859}
3860
3861void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3862{
3863 struct net *net = dev_net(ifp->idev->dev);
3864 struct arg_dev_net_ip adni = {
3865 .dev = ifp->idev->dev,
3866 .net = net,
3867 .addr = &ifp->addr,
3868 };
0c3584d5 3869 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3870}
3871
2b2450ca 3872#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT)
be7a010d
DJ
3873
3874/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 3875static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
3876{
3877 struct in6_addr *gateway = (struct in6_addr *)arg;
3878
93c2fb25 3879 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
bdf00467 3880 rt->fib6_nh.fib_nh_gw_family &&
ad1601ae 3881 ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
be7a010d
DJ
3882 return -1;
3883 }
b16cb459
WW
3884
3885 /* Further clean up cached routes in exception table.
3886 * This is needed because cached route may have a different
3887 * gateway than its 'parent' in the case of an ip redirect.
3888 */
3889 rt6_exceptions_clean_tohost(rt, gateway);
3890
be7a010d
DJ
3891 return 0;
3892}
3893
3894void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3895{
3896 fib6_clean_all(net, fib6_clean_tohost, gateway);
3897}
3898
2127d95a
IS
3899struct arg_netdev_event {
3900 const struct net_device *dev;
4c981e28
IS
3901 union {
3902 unsigned int nh_flags;
3903 unsigned long event;
3904 };
2127d95a
IS
3905};
3906
8d1c802b 3907static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 3908{
8d1c802b 3909 struct fib6_info *iter;
d7dedee1
IS
3910 struct fib6_node *fn;
3911
93c2fb25
DA
3912 fn = rcu_dereference_protected(rt->fib6_node,
3913 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3914 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 3915 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3916 while (iter) {
93c2fb25 3917 if (iter->fib6_metric == rt->fib6_metric &&
33bd5ac5 3918 rt6_qualify_for_ecmp(iter))
d7dedee1 3919 return iter;
8fb11a9a 3920 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 3921 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
3922 }
3923
3924 return NULL;
3925}
3926
8d1c802b 3927static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 3928{
ad1601ae
DA
3929 if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
3930 (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
3931 ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
d7dedee1
IS
3932 return true;
3933
3934 return false;
3935}
3936
8d1c802b 3937static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 3938{
8d1c802b 3939 struct fib6_info *iter;
d7dedee1
IS
3940 int total = 0;
3941
3942 if (!rt6_is_dead(rt))
ad1601ae 3943 total += rt->fib6_nh.fib_nh_weight;
d7dedee1 3944
93c2fb25 3945 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 3946 if (!rt6_is_dead(iter))
ad1601ae 3947 total += iter->fib6_nh.fib_nh_weight;
d7dedee1
IS
3948 }
3949
3950 return total;
3951}
3952
8d1c802b 3953static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
3954{
3955 int upper_bound = -1;
3956
3957 if (!rt6_is_dead(rt)) {
ad1601ae 3958 *weight += rt->fib6_nh.fib_nh_weight;
d7dedee1
IS
3959 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3960 total) - 1;
3961 }
ad1601ae 3962 atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
d7dedee1
IS
3963}
3964
8d1c802b 3965static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 3966{
8d1c802b 3967 struct fib6_info *iter;
d7dedee1
IS
3968 int weight = 0;
3969
3970 rt6_upper_bound_set(rt, &weight, total);
3971
93c2fb25 3972 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
3973 rt6_upper_bound_set(iter, &weight, total);
3974}
3975
8d1c802b 3976void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 3977{
8d1c802b 3978 struct fib6_info *first;
d7dedee1
IS
3979 int total;
3980
3981 /* In case the entire multipath route was marked for flushing,
3982 * then there is no need to rebalance upon the removal of every
3983 * sibling route.
3984 */
93c2fb25 3985 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
3986 return;
3987
3988 /* During lookup routes are evaluated in order, so we need to
3989 * make sure upper bounds are assigned from the first sibling
3990 * onwards.
3991 */
3992 first = rt6_multipath_first_sibling(rt);
3993 if (WARN_ON_ONCE(!first))
3994 return;
3995
3996 total = rt6_multipath_total_weight(first);
3997 rt6_multipath_upper_bound_set(first, total);
3998}
3999
8d1c802b 4000static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
4001{
4002 const struct arg_netdev_event *arg = p_arg;
7aef6859 4003 struct net *net = dev_net(arg->dev);
2127d95a 4004
ad1601ae
DA
4005 if (rt != net->ipv6.fib6_null_entry &&
4006 rt->fib6_nh.fib_nh_dev == arg->dev) {
4007 rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
7aef6859 4008 fib6_update_sernum_upto_root(net, rt);
d7dedee1 4009 rt6_multipath_rebalance(rt);
1de178ed 4010 }
2127d95a
IS
4011
4012 return 0;
4013}
4014
4015void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
4016{
4017 struct arg_netdev_event arg = {
4018 .dev = dev,
6802f3ad
IS
4019 {
4020 .nh_flags = nh_flags,
4021 },
2127d95a
IS
4022 };
4023
4024 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
4025 arg.nh_flags |= RTNH_F_LINKDOWN;
4026
4027 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
4028}
4029
8d1c802b 4030static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
4031 const struct net_device *dev)
4032{
8d1c802b 4033 struct fib6_info *iter;
1de178ed 4034
ad1601ae 4035 if (rt->fib6_nh.fib_nh_dev == dev)
1de178ed 4036 return true;
93c2fb25 4037 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae 4038 if (iter->fib6_nh.fib_nh_dev == dev)
1de178ed
IS
4039 return true;
4040
4041 return false;
4042}
4043
8d1c802b 4044static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 4045{
8d1c802b 4046 struct fib6_info *iter;
1de178ed
IS
4047
4048 rt->should_flush = 1;
93c2fb25 4049 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
4050 iter->should_flush = 1;
4051}
4052
8d1c802b 4053static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
4054 const struct net_device *down_dev)
4055{
8d1c802b 4056 struct fib6_info *iter;
1de178ed
IS
4057 unsigned int dead = 0;
4058
ad1601ae
DA
4059 if (rt->fib6_nh.fib_nh_dev == down_dev ||
4060 rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
1de178ed 4061 dead++;
93c2fb25 4062 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae
DA
4063 if (iter->fib6_nh.fib_nh_dev == down_dev ||
4064 iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
1de178ed
IS
4065 dead++;
4066
4067 return dead;
4068}
4069
8d1c802b 4070static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed
IS
4071 const struct net_device *dev,
4072 unsigned int nh_flags)
4073{
8d1c802b 4074 struct fib6_info *iter;
1de178ed 4075
ad1601ae
DA
4076 if (rt->fib6_nh.fib_nh_dev == dev)
4077 rt->fib6_nh.fib_nh_flags |= nh_flags;
93c2fb25 4078 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae
DA
4079 if (iter->fib6_nh.fib_nh_dev == dev)
4080 iter->fib6_nh.fib_nh_flags |= nh_flags;
1de178ed
IS
4081}
4082
a1a22c12 4083/* called with write lock held for table with rt */
8d1c802b 4084static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 4085{
4c981e28
IS
4086 const struct arg_netdev_event *arg = p_arg;
4087 const struct net_device *dev = arg->dev;
7aef6859 4088 struct net *net = dev_net(dev);
8ed67789 4089
421842ed 4090 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
4091 return 0;
4092
4093 switch (arg->event) {
4094 case NETDEV_UNREGISTER:
ad1601ae 4095 return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
27c6fa73 4096 case NETDEV_DOWN:
1de178ed 4097 if (rt->should_flush)
27c6fa73 4098 return -1;
93c2fb25 4099 if (!rt->fib6_nsiblings)
ad1601ae 4100 return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
1de178ed
IS
4101 if (rt6_multipath_uses_dev(rt, dev)) {
4102 unsigned int count;
4103
4104 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 4105 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
4106 rt6_multipath_flush(rt);
4107 return -1;
4108 }
4109 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4110 RTNH_F_LINKDOWN);
7aef6859 4111 fib6_update_sernum(net, rt);
d7dedee1 4112 rt6_multipath_rebalance(rt);
1de178ed
IS
4113 }
4114 return -2;
27c6fa73 4115 case NETDEV_CHANGE:
ad1601ae 4116 if (rt->fib6_nh.fib_nh_dev != dev ||
93c2fb25 4117 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 4118 break;
ad1601ae 4119 rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 4120 rt6_multipath_rebalance(rt);
27c6fa73 4121 break;
2b241361 4122 }
c159d30c 4123
1da177e4
LT
4124 return 0;
4125}
4126
27c6fa73 4127void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 4128{
4c981e28 4129 struct arg_netdev_event arg = {
8ed67789 4130 .dev = dev,
6802f3ad
IS
4131 {
4132 .event = event,
4133 },
8ed67789 4134 };
7c6bb7d2 4135 struct net *net = dev_net(dev);
8ed67789 4136
7c6bb7d2
DA
4137 if (net->ipv6.sysctl.skip_notify_on_dev_down)
4138 fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4139 else
4140 fib6_clean_all(net, fib6_ifdown, &arg);
4c981e28
IS
4141}
4142
4143void rt6_disable_ip(struct net_device *dev, unsigned long event)
4144{
4145 rt6_sync_down_dev(dev, event);
4146 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4147 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4148}
4149
95c96174 4150struct rt6_mtu_change_arg {
1da177e4 4151 struct net_device *dev;
95c96174 4152 unsigned int mtu;
1da177e4
LT
4153};
4154
8d1c802b 4155static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4156{
4157 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4158 struct inet6_dev *idev;
4159
4160 /* In IPv6 pmtu discovery is not optional,
4161 so that RTAX_MTU lock cannot disable it.
4162 We still use this lock to block changes
4163 caused by addrconf/ndisc.
4164 */
4165
4166 idev = __in6_dev_get(arg->dev);
38308473 4167 if (!idev)
1da177e4
LT
4168 return 0;
4169
4170 /* For administrative MTU increase, there is no way to discover
4171 IPv6 PMTU increase, so PMTU increase should be updated here.
4172 Since RFC 1981 doesn't include administrative MTU increase
4173 update PMTU increase is a MUST. (i.e. jumbo frame)
4174 */
ad1601ae 4175 if (rt->fib6_nh.fib_nh_dev == arg->dev &&
d4ead6b3
DA
4176 !fib6_metric_locked(rt, RTAX_MTU)) {
4177 u32 mtu = rt->fib6_pmtu;
4178
4179 if (mtu >= arg->mtu ||
4180 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4181 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4182
f5bbe7ee 4183 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4184 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4185 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4186 }
1da177e4
LT
4187 return 0;
4188}
4189
95c96174 4190void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4191{
c71099ac
TG
4192 struct rt6_mtu_change_arg arg = {
4193 .dev = dev,
4194 .mtu = mtu,
4195 };
1da177e4 4196
0c3584d5 4197 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4198}
4199
ef7c79ed 4200static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4201 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 4202 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 4203 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4204 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4205 [RTA_PRIORITY] = { .type = NLA_U32 },
4206 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4207 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4208 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4209 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4210 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4211 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4212 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4213 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 4214 [RTA_TABLE] = { .type = NLA_U32 },
eacb9384
RP
4215 [RTA_IP_PROTO] = { .type = NLA_U8 },
4216 [RTA_SPORT] = { .type = NLA_U16 },
4217 [RTA_DPORT] = { .type = NLA_U16 },
86872cb5
TG
4218};
4219
4220static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4221 struct fib6_config *cfg,
4222 struct netlink_ext_ack *extack)
1da177e4 4223{
86872cb5
TG
4224 struct rtmsg *rtm;
4225 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4226 unsigned int pref;
86872cb5 4227 int err;
1da177e4 4228
fceb6435 4229 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
dac9c979 4230 extack);
86872cb5
TG
4231 if (err < 0)
4232 goto errout;
1da177e4 4233
86872cb5
TG
4234 err = -EINVAL;
4235 rtm = nlmsg_data(nlh);
86872cb5 4236
84db8407
4237 *cfg = (struct fib6_config){
4238 .fc_table = rtm->rtm_table,
4239 .fc_dst_len = rtm->rtm_dst_len,
4240 .fc_src_len = rtm->rtm_src_len,
4241 .fc_flags = RTF_UP,
4242 .fc_protocol = rtm->rtm_protocol,
4243 .fc_type = rtm->rtm_type,
4244
4245 .fc_nlinfo.portid = NETLINK_CB(skb).portid,
4246 .fc_nlinfo.nlh = nlh,
4247 .fc_nlinfo.nl_net = sock_net(skb->sk),
4248 };
86872cb5 4249
ef2c7d7b
ND
4250 if (rtm->rtm_type == RTN_UNREACHABLE ||
4251 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4252 rtm->rtm_type == RTN_PROHIBIT ||
4253 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4254 cfg->fc_flags |= RTF_REJECT;
4255
ab79ad14
4256 if (rtm->rtm_type == RTN_LOCAL)
4257 cfg->fc_flags |= RTF_LOCAL;
4258
1f56a01f
MKL
4259 if (rtm->rtm_flags & RTM_F_CLONED)
4260 cfg->fc_flags |= RTF_CACHE;
4261
fc1e64e1
DA
4262 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4263
86872cb5 4264 if (tb[RTA_GATEWAY]) {
67b61f6c 4265 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4266 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4267 }
e3818541
DA
4268 if (tb[RTA_VIA]) {
4269 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4270 goto errout;
4271 }
86872cb5
TG
4272
4273 if (tb[RTA_DST]) {
4274 int plen = (rtm->rtm_dst_len + 7) >> 3;
4275
4276 if (nla_len(tb[RTA_DST]) < plen)
4277 goto errout;
4278
4279 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4280 }
86872cb5
TG
4281
4282 if (tb[RTA_SRC]) {
4283 int plen = (rtm->rtm_src_len + 7) >> 3;
4284
4285 if (nla_len(tb[RTA_SRC]) < plen)
4286 goto errout;
4287
4288 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4289 }
86872cb5 4290
c3968a85 4291 if (tb[RTA_PREFSRC])
67b61f6c 4292 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4293
86872cb5
TG
4294 if (tb[RTA_OIF])
4295 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4296
4297 if (tb[RTA_PRIORITY])
4298 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4299
4300 if (tb[RTA_METRICS]) {
4301 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4302 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4303 }
86872cb5
TG
4304
4305 if (tb[RTA_TABLE])
4306 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4307
51ebd318
ND
4308 if (tb[RTA_MULTIPATH]) {
4309 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4310 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4311
4312 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4313 cfg->fc_mp_len, extack);
9ed59592
DA
4314 if (err < 0)
4315 goto errout;
51ebd318
ND
4316 }
4317
c78ba6d6
LR
4318 if (tb[RTA_PREF]) {
4319 pref = nla_get_u8(tb[RTA_PREF]);
4320 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4321 pref != ICMPV6_ROUTER_PREF_HIGH)
4322 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4323 cfg->fc_flags |= RTF_PREF(pref);
4324 }
4325
19e42e45
RP
4326 if (tb[RTA_ENCAP])
4327 cfg->fc_encap = tb[RTA_ENCAP];
4328
9ed59592 4329 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4330 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4331
c255bd68 4332 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4333 if (err < 0)
4334 goto errout;
4335 }
4336
32bc201e
XL
4337 if (tb[RTA_EXPIRES]) {
4338 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4339
4340 if (addrconf_finite_timeout(timeout)) {
4341 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4342 cfg->fc_flags |= RTF_EXPIRES;
4343 }
4344 }
4345
86872cb5
TG
4346 err = 0;
4347errout:
4348 return err;
1da177e4
LT
4349}
4350
6b9ea5a6 4351struct rt6_nh {
8d1c802b 4352 struct fib6_info *fib6_info;
6b9ea5a6 4353 struct fib6_config r_cfg;
6b9ea5a6
RP
4354 struct list_head next;
4355};
4356
d4ead6b3
DA
4357static int ip6_route_info_append(struct net *net,
4358 struct list_head *rt6_nh_list,
8d1c802b
DA
4359 struct fib6_info *rt,
4360 struct fib6_config *r_cfg)
6b9ea5a6
RP
4361{
4362 struct rt6_nh *nh;
6b9ea5a6
RP
4363 int err = -EEXIST;
4364
4365 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4366 /* check if fib6_info already exists */
4367 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4368 return err;
4369 }
4370
4371 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4372 if (!nh)
4373 return -ENOMEM;
8d1c802b 4374 nh->fib6_info = rt;
6b9ea5a6
RP
4375 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4376 list_add_tail(&nh->next, rt6_nh_list);
4377
4378 return 0;
4379}
4380
8d1c802b
DA
4381static void ip6_route_mpath_notify(struct fib6_info *rt,
4382 struct fib6_info *rt_last,
3b1137fe
DA
4383 struct nl_info *info,
4384 __u16 nlflags)
4385{
4386 /* if this is an APPEND route, then rt points to the first route
4387 * inserted and rt_last points to last route inserted. Userspace
4388 * wants a consistent dump of the route which starts at the first
4389 * nexthop. Since sibling routes are always added at the end of
4390 * the list, find the first sibling of the last route appended
4391 */
93c2fb25
DA
4392 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4393 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4394 struct fib6_info,
93c2fb25 4395 fib6_siblings);
3b1137fe
DA
4396 }
4397
4398 if (rt)
4399 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4400}
4401
333c4301
DA
4402static int ip6_route_multipath_add(struct fib6_config *cfg,
4403 struct netlink_ext_ack *extack)
51ebd318 4404{
8d1c802b 4405 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4406 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4407 struct fib6_config r_cfg;
4408 struct rtnexthop *rtnh;
8d1c802b 4409 struct fib6_info *rt;
6b9ea5a6
RP
4410 struct rt6_nh *err_nh;
4411 struct rt6_nh *nh, *nh_safe;
3b1137fe 4412 __u16 nlflags;
51ebd318
ND
4413 int remaining;
4414 int attrlen;
6b9ea5a6
RP
4415 int err = 1;
4416 int nhn = 0;
4417 int replace = (cfg->fc_nlinfo.nlh &&
4418 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4419 LIST_HEAD(rt6_nh_list);
51ebd318 4420
3b1137fe
DA
4421 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4422 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4423 nlflags |= NLM_F_APPEND;
4424
35f1b4e9 4425 remaining = cfg->fc_mp_len;
51ebd318 4426 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4427
6b9ea5a6 4428 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4429 * fib6_info structs per nexthop
6b9ea5a6 4430 */
51ebd318
ND
4431 while (rtnh_ok(rtnh, remaining)) {
4432 memcpy(&r_cfg, cfg, sizeof(*cfg));
4433 if (rtnh->rtnh_ifindex)
4434 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4435
4436 attrlen = rtnh_attrlen(rtnh);
4437 if (attrlen > 0) {
4438 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4439
4440 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4441 if (nla) {
67b61f6c 4442 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4443 r_cfg.fc_flags |= RTF_GATEWAY;
4444 }
19e42e45
RP
4445 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4446 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4447 if (nla)
4448 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4449 }
6b9ea5a6 4450
68e2ffde 4451 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4452 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4453 if (IS_ERR(rt)) {
4454 err = PTR_ERR(rt);
4455 rt = NULL;
6b9ea5a6 4456 goto cleanup;
8c5b83f0 4457 }
b5d2d75e
DA
4458 if (!rt6_qualify_for_ecmp(rt)) {
4459 err = -EINVAL;
4460 NL_SET_ERR_MSG(extack,
4461 "Device only routes can not be added for IPv6 using the multipath API.");
4462 fib6_info_release(rt);
4463 goto cleanup;
4464 }
6b9ea5a6 4465
ad1601ae 4466 rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
398958ae 4467
d4ead6b3
DA
4468 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4469 rt, &r_cfg);
51ebd318 4470 if (err) {
93531c67 4471 fib6_info_release(rt);
6b9ea5a6
RP
4472 goto cleanup;
4473 }
4474
4475 rtnh = rtnh_next(rtnh, &remaining);
4476 }
4477
3b1137fe
DA
4478 /* for add and replace send one notification with all nexthops.
4479 * Skip the notification in fib6_add_rt2node and send one with
4480 * the full route when done
4481 */
4482 info->skip_notify = 1;
4483
6b9ea5a6
RP
4484 err_nh = NULL;
4485 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4486 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4487 fib6_info_release(nh->fib6_info);
93531c67 4488
f7225172
DA
4489 if (!err) {
4490 /* save reference to last route successfully inserted */
4491 rt_last = nh->fib6_info;
4492
4493 /* save reference to first route for notification */
4494 if (!rt_notif)
4495 rt_notif = nh->fib6_info;
4496 }
3b1137fe 4497
8d1c802b
DA
4498 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4499 nh->fib6_info = NULL;
6b9ea5a6
RP
4500 if (err) {
4501 if (replace && nhn)
a5a82d84
JK
4502 NL_SET_ERR_MSG_MOD(extack,
4503 "multipath route replace failed (check consistency of installed routes)");
6b9ea5a6
RP
4504 err_nh = nh;
4505 goto add_errout;
51ebd318 4506 }
6b9ea5a6 4507
1a72418b 4508 /* Because each route is added like a single route we remove
27596472
MK
4509 * these flags after the first nexthop: if there is a collision,
4510 * we have already failed to add the first nexthop:
4511 * fib6_add_rt2node() has rejected it; when replacing, old
4512 * nexthops have been replaced by first new, the rest should
4513 * be added to it.
1a72418b 4514 */
27596472
MK
4515 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4516 NLM_F_REPLACE);
6b9ea5a6
RP
4517 nhn++;
4518 }
4519
3b1137fe
DA
4520 /* success ... tell user about new route */
4521 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4522 goto cleanup;
4523
4524add_errout:
3b1137fe
DA
4525 /* send notification for routes that were added so that
4526 * the delete notifications sent by ip6_route_del are
4527 * coherent
4528 */
4529 if (rt_notif)
4530 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4531
6b9ea5a6
RP
4532 /* Delete routes that were already added */
4533 list_for_each_entry(nh, &rt6_nh_list, next) {
4534 if (err_nh == nh)
4535 break;
333c4301 4536 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4537 }
4538
4539cleanup:
4540 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4541 if (nh->fib6_info)
4542 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4543 list_del(&nh->next);
4544 kfree(nh);
4545 }
4546
4547 return err;
4548}
4549
333c4301
DA
4550static int ip6_route_multipath_del(struct fib6_config *cfg,
4551 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4552{
4553 struct fib6_config r_cfg;
4554 struct rtnexthop *rtnh;
4555 int remaining;
4556 int attrlen;
4557 int err = 1, last_err = 0;
4558
4559 remaining = cfg->fc_mp_len;
4560 rtnh = (struct rtnexthop *)cfg->fc_mp;
4561
4562 /* Parse a Multipath Entry */
4563 while (rtnh_ok(rtnh, remaining)) {
4564 memcpy(&r_cfg, cfg, sizeof(*cfg));
4565 if (rtnh->rtnh_ifindex)
4566 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4567
4568 attrlen = rtnh_attrlen(rtnh);
4569 if (attrlen > 0) {
4570 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4571
4572 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4573 if (nla) {
4574 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4575 r_cfg.fc_flags |= RTF_GATEWAY;
4576 }
4577 }
333c4301 4578 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4579 if (err)
4580 last_err = err;
4581
51ebd318
ND
4582 rtnh = rtnh_next(rtnh, &remaining);
4583 }
4584
4585 return last_err;
4586}
4587
c21ef3e3
DA
4588static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4589 struct netlink_ext_ack *extack)
1da177e4 4590{
86872cb5
TG
4591 struct fib6_config cfg;
4592 int err;
1da177e4 4593
333c4301 4594 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4595 if (err < 0)
4596 return err;
4597
51ebd318 4598 if (cfg.fc_mp)
333c4301 4599 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4600 else {
4601 cfg.fc_delete_all_nh = 1;
333c4301 4602 return ip6_route_del(&cfg, extack);
0ae81335 4603 }
1da177e4
LT
4604}
4605
c21ef3e3
DA
4606static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4607 struct netlink_ext_ack *extack)
1da177e4 4608{
86872cb5
TG
4609 struct fib6_config cfg;
4610 int err;
1da177e4 4611
333c4301 4612 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4613 if (err < 0)
4614 return err;
4615
67f69513
DA
4616 if (cfg.fc_metric == 0)
4617 cfg.fc_metric = IP6_RT_PRIO_USER;
4618
51ebd318 4619 if (cfg.fc_mp)
333c4301 4620 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4621 else
acb54e3c 4622 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4623}
4624
8d1c802b 4625static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4626{
beb1afac
DA
4627 int nexthop_len = 0;
4628
93c2fb25 4629 if (rt->fib6_nsiblings) {
beb1afac
DA
4630 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4631 + NLA_ALIGN(sizeof(struct rtnexthop))
4632 + nla_total_size(16) /* RTA_GATEWAY */
ad1601ae 4633 + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
beb1afac 4634
93c2fb25 4635 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4636 }
4637
339bf98f
TG
4638 return NLMSG_ALIGN(sizeof(struct rtmsg))
4639 + nla_total_size(16) /* RTA_SRC */
4640 + nla_total_size(16) /* RTA_DST */
4641 + nla_total_size(16) /* RTA_GATEWAY */
4642 + nla_total_size(16) /* RTA_PREFSRC */
4643 + nla_total_size(4) /* RTA_TABLE */
4644 + nla_total_size(4) /* RTA_IIF */
4645 + nla_total_size(4) /* RTA_OIF */
4646 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4647 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4648 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4649 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4650 + nla_total_size(1) /* RTA_PREF */
ad1601ae 4651 + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
beb1afac
DA
4652 + nexthop_len;
4653}
4654
d4ead6b3 4655static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4656 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4657 struct in6_addr *dest, struct in6_addr *src,
15e47304 4658 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4659 unsigned int flags)
1da177e4 4660{
22d0bd82
XL
4661 struct rt6_info *rt6 = (struct rt6_info *)dst;
4662 struct rt6key *rt6_dst, *rt6_src;
4663 u32 *pmetrics, table, rt6_flags;
2d7202bf 4664 struct nlmsghdr *nlh;
22d0bd82 4665 struct rtmsg *rtm;
d4ead6b3 4666 long expires = 0;
1da177e4 4667
15e47304 4668 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4669 if (!nlh)
26932566 4670 return -EMSGSIZE;
2d7202bf 4671
22d0bd82
XL
4672 if (rt6) {
4673 rt6_dst = &rt6->rt6i_dst;
4674 rt6_src = &rt6->rt6i_src;
4675 rt6_flags = rt6->rt6i_flags;
4676 } else {
4677 rt6_dst = &rt->fib6_dst;
4678 rt6_src = &rt->fib6_src;
4679 rt6_flags = rt->fib6_flags;
4680 }
4681
2d7202bf 4682 rtm = nlmsg_data(nlh);
1da177e4 4683 rtm->rtm_family = AF_INET6;
22d0bd82
XL
4684 rtm->rtm_dst_len = rt6_dst->plen;
4685 rtm->rtm_src_len = rt6_src->plen;
1da177e4 4686 rtm->rtm_tos = 0;
93c2fb25
DA
4687 if (rt->fib6_table)
4688 table = rt->fib6_table->tb6_id;
c71099ac 4689 else
9e762a4a 4690 table = RT6_TABLE_UNSPEC;
97f0082a 4691 rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
c78679e8
DM
4692 if (nla_put_u32(skb, RTA_TABLE, table))
4693 goto nla_put_failure;
e8478e80
DA
4694
4695 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4696 rtm->rtm_flags = 0;
4697 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4698 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4699
22d0bd82 4700 if (rt6_flags & RTF_CACHE)
1da177e4
LT
4701 rtm->rtm_flags |= RTM_F_CLONED;
4702
d4ead6b3
DA
4703 if (dest) {
4704 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4705 goto nla_put_failure;
1ab1457c 4706 rtm->rtm_dst_len = 128;
1da177e4 4707 } else if (rtm->rtm_dst_len)
22d0bd82 4708 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
c78679e8 4709 goto nla_put_failure;
1da177e4
LT
4710#ifdef CONFIG_IPV6_SUBTREES
4711 if (src) {
930345ea 4712 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4713 goto nla_put_failure;
1ab1457c 4714 rtm->rtm_src_len = 128;
c78679e8 4715 } else if (rtm->rtm_src_len &&
22d0bd82 4716 nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
c78679e8 4717 goto nla_put_failure;
1da177e4 4718#endif
7bc570c8
YH
4719 if (iif) {
4720#ifdef CONFIG_IPV6_MROUTE
22d0bd82 4721 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
fd61c6ba
DA
4722 int err = ip6mr_get_route(net, skb, rtm, portid);
4723
4724 if (err == 0)
4725 return 0;
4726 if (err < 0)
4727 goto nla_put_failure;
7bc570c8
YH
4728 } else
4729#endif
c78679e8
DM
4730 if (nla_put_u32(skb, RTA_IIF, iif))
4731 goto nla_put_failure;
d4ead6b3 4732 } else if (dest) {
1da177e4 4733 struct in6_addr saddr_buf;
d4ead6b3 4734 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4735 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4736 goto nla_put_failure;
1da177e4 4737 }
2d7202bf 4738
93c2fb25 4739 if (rt->fib6_prefsrc.plen) {
c3968a85 4740 struct in6_addr saddr_buf;
93c2fb25 4741 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4742 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4743 goto nla_put_failure;
c3968a85
DW
4744 }
4745
d4ead6b3
DA
4746 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4747 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4748 goto nla_put_failure;
4749
93c2fb25 4750 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 4751 goto nla_put_failure;
8253947e 4752
beb1afac
DA
4753 /* For multipath routes, walk the siblings list and add
4754 * each as a nexthop within RTA_MULTIPATH.
4755 */
22d0bd82
XL
4756 if (rt6) {
4757 if (rt6_flags & RTF_GATEWAY &&
4758 nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4759 goto nla_put_failure;
4760
4761 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4762 goto nla_put_failure;
4763 } else if (rt->fib6_nsiblings) {
8d1c802b 4764 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
4765 struct nlattr *mp;
4766
4767 mp = nla_nest_start(skb, RTA_MULTIPATH);
4768 if (!mp)
4769 goto nla_put_failure;
4770
c0a72077
DA
4771 if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common,
4772 rt->fib6_nh.fib_nh_weight) < 0)
beb1afac
DA
4773 goto nla_put_failure;
4774
4775 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 4776 &rt->fib6_siblings, fib6_siblings) {
c0a72077
DA
4777 if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common,
4778 sibling->fib6_nh.fib_nh_weight) < 0)
beb1afac
DA
4779 goto nla_put_failure;
4780 }
4781
4782 nla_nest_end(skb, mp);
4783 } else {
c0a72077
DA
4784 if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
4785 &rtm->rtm_flags, false) < 0)
beb1afac
DA
4786 goto nla_put_failure;
4787 }
4788
22d0bd82 4789 if (rt6_flags & RTF_EXPIRES) {
14895687
DA
4790 expires = dst ? dst->expires : rt->expires;
4791 expires -= jiffies;
4792 }
69cdf8f9 4793
d4ead6b3 4794 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4795 goto nla_put_failure;
2d7202bf 4796
22d0bd82 4797 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
c78ba6d6
LR
4798 goto nla_put_failure;
4799
19e42e45 4800
053c095a
JB
4801 nlmsg_end(skb, nlh);
4802 return 0;
2d7202bf
TG
4803
4804nla_put_failure:
26932566
PM
4805 nlmsg_cancel(skb, nlh);
4806 return -EMSGSIZE;
1da177e4
LT
4807}
4808
13e38901
DA
4809static bool fib6_info_uses_dev(const struct fib6_info *f6i,
4810 const struct net_device *dev)
4811{
ad1601ae 4812 if (f6i->fib6_nh.fib_nh_dev == dev)
13e38901
DA
4813 return true;
4814
4815 if (f6i->fib6_nsiblings) {
4816 struct fib6_info *sibling, *next_sibling;
4817
4818 list_for_each_entry_safe(sibling, next_sibling,
4819 &f6i->fib6_siblings, fib6_siblings) {
ad1601ae 4820 if (sibling->fib6_nh.fib_nh_dev == dev)
13e38901
DA
4821 return true;
4822 }
4823 }
4824
4825 return false;
4826}
4827
8d1c802b 4828int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4829{
4830 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
13e38901
DA
4831 struct fib_dump_filter *filter = &arg->filter;
4832 unsigned int flags = NLM_F_MULTI;
1f17e2f2
DA
4833 struct net *net = arg->net;
4834
421842ed 4835 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4836 return 0;
1da177e4 4837
13e38901
DA
4838 if ((filter->flags & RTM_F_PREFIX) &&
4839 !(rt->fib6_flags & RTF_PREFIX_RT)) {
4840 /* success since this is not a prefix route */
4841 return 1;
4842 }
4843 if (filter->filter_set) {
4844 if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
4845 (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
4846 (filter->protocol && rt->fib6_protocol != filter->protocol)) {
f8cfe2ce
DA
4847 return 1;
4848 }
13e38901 4849 flags |= NLM_F_DUMP_FILTERED;
f8cfe2ce 4850 }
1da177e4 4851
d4ead6b3
DA
4852 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4853 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
13e38901 4854 arg->cb->nlh->nlmsg_seq, flags);
1da177e4
LT
4855}
4856
0eff0a27
JK
4857static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
4858 const struct nlmsghdr *nlh,
4859 struct nlattr **tb,
4860 struct netlink_ext_ack *extack)
4861{
4862 struct rtmsg *rtm;
4863 int i, err;
4864
4865 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
4866 NL_SET_ERR_MSG_MOD(extack,
4867 "Invalid header for get route request");
4868 return -EINVAL;
4869 }
4870
4871 if (!netlink_strict_get_check(skb))
4872 return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
4873 rtm_ipv6_policy, extack);
4874
4875 rtm = nlmsg_data(nlh);
4876 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
4877 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
4878 rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
4879 rtm->rtm_type) {
4880 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
4881 return -EINVAL;
4882 }
4883 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
4884 NL_SET_ERR_MSG_MOD(extack,
4885 "Invalid flags for get route request");
4886 return -EINVAL;
4887 }
4888
4889 err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
4890 rtm_ipv6_policy, extack);
4891 if (err)
4892 return err;
4893
4894 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
4895 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
4896 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
4897 return -EINVAL;
4898 }
4899
4900 for (i = 0; i <= RTA_MAX; i++) {
4901 if (!tb[i])
4902 continue;
4903
4904 switch (i) {
4905 case RTA_SRC:
4906 case RTA_DST:
4907 case RTA_IIF:
4908 case RTA_OIF:
4909 case RTA_MARK:
4910 case RTA_UID:
4911 case RTA_SPORT:
4912 case RTA_DPORT:
4913 case RTA_IP_PROTO:
4914 break;
4915 default:
4916 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
4917 return -EINVAL;
4918 }
4919 }
4920
4921 return 0;
4922}
4923
c21ef3e3
DA
4924static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4925 struct netlink_ext_ack *extack)
1da177e4 4926{
3b1e0a65 4927 struct net *net = sock_net(in_skb->sk);
ab364a6f 4928 struct nlattr *tb[RTA_MAX+1];
18c3a61c 4929 int err, iif = 0, oif = 0;
a68886a6 4930 struct fib6_info *from;
18c3a61c 4931 struct dst_entry *dst;
ab364a6f 4932 struct rt6_info *rt;
1da177e4 4933 struct sk_buff *skb;
ab364a6f 4934 struct rtmsg *rtm;
744486d4 4935 struct flowi6 fl6 = {};
18c3a61c 4936 bool fibmatch;
1da177e4 4937
0eff0a27 4938 err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
ab364a6f
TG
4939 if (err < 0)
4940 goto errout;
1da177e4 4941
ab364a6f 4942 err = -EINVAL;
38b7097b
HFS
4943 rtm = nlmsg_data(nlh);
4944 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4945 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4946
ab364a6f
TG
4947 if (tb[RTA_SRC]) {
4948 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4949 goto errout;
4950
4e3fd7a0 4951 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4952 }
4953
4954 if (tb[RTA_DST]) {
4955 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4956 goto errout;
4957
4e3fd7a0 4958 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4959 }
4960
4961 if (tb[RTA_IIF])
4962 iif = nla_get_u32(tb[RTA_IIF]);
4963
4964 if (tb[RTA_OIF])
72331bc0 4965 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4966
2e47b291
LC
4967 if (tb[RTA_MARK])
4968 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4969
622ec2c9
LC
4970 if (tb[RTA_UID])
4971 fl6.flowi6_uid = make_kuid(current_user_ns(),
4972 nla_get_u32(tb[RTA_UID]));
4973 else
4974 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4975
eacb9384
RP
4976 if (tb[RTA_SPORT])
4977 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4978
4979 if (tb[RTA_DPORT])
4980 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4981
4982 if (tb[RTA_IP_PROTO]) {
4983 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
5e1a99ea
HL
4984 &fl6.flowi6_proto, AF_INET6,
4985 extack);
eacb9384
RP
4986 if (err)
4987 goto errout;
4988 }
4989
1da177e4
LT
4990 if (iif) {
4991 struct net_device *dev;
72331bc0
SL
4992 int flags = 0;
4993
121622db
FW
4994 rcu_read_lock();
4995
4996 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4997 if (!dev) {
121622db 4998 rcu_read_unlock();
1da177e4 4999 err = -ENODEV;
ab364a6f 5000 goto errout;
1da177e4 5001 }
72331bc0
SL
5002
5003 fl6.flowi6_iif = iif;
5004
5005 if (!ipv6_addr_any(&fl6.saddr))
5006 flags |= RT6_LOOKUP_F_HAS_SADDR;
5007
b75cc8f9 5008 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
5009
5010 rcu_read_unlock();
72331bc0
SL
5011 } else {
5012 fl6.flowi6_oif = oif;
5013
58acfd71 5014 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
5015 }
5016
18c3a61c
RP
5017
5018 rt = container_of(dst, struct rt6_info, dst);
5019 if (rt->dst.error) {
5020 err = rt->dst.error;
5021 ip6_rt_put(rt);
5022 goto errout;
1da177e4
LT
5023 }
5024
9d6acb3b
WC
5025 if (rt == net->ipv6.ip6_null_entry) {
5026 err = rt->dst.error;
5027 ip6_rt_put(rt);
5028 goto errout;
5029 }
5030
ab364a6f 5031 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 5032 if (!skb) {
94e187c0 5033 ip6_rt_put(rt);
ab364a6f
TG
5034 err = -ENOBUFS;
5035 goto errout;
5036 }
1da177e4 5037
d8d1f30b 5038 skb_dst_set(skb, &rt->dst);
a68886a6
DA
5039
5040 rcu_read_lock();
5041 from = rcu_dereference(rt->from);
5042
18c3a61c 5043 if (fibmatch)
a68886a6 5044 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
18c3a61c
RP
5045 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
5046 nlh->nlmsg_seq, 0);
5047 else
a68886a6
DA
5048 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5049 &fl6.saddr, iif, RTM_NEWROUTE,
d4ead6b3
DA
5050 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
5051 0);
a68886a6
DA
5052 rcu_read_unlock();
5053
1da177e4 5054 if (err < 0) {
ab364a6f
TG
5055 kfree_skb(skb);
5056 goto errout;
1da177e4
LT
5057 }
5058
15e47304 5059 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 5060errout:
1da177e4 5061 return err;
1da177e4
LT
5062}
5063
8d1c802b 5064void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 5065 unsigned int nlm_flags)
1da177e4
LT
5066{
5067 struct sk_buff *skb;
5578689a 5068 struct net *net = info->nl_net;
528c4ceb
DL
5069 u32 seq;
5070 int err;
5071
5072 err = -ENOBUFS;
38308473 5073 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 5074
19e42e45 5075 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 5076 if (!skb)
21713ebc
TG
5077 goto errout;
5078
d4ead6b3
DA
5079 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5080 event, info->portid, seq, nlm_flags);
26932566
PM
5081 if (err < 0) {
5082 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5083 WARN_ON(err == -EMSGSIZE);
5084 kfree_skb(skb);
5085 goto errout;
5086 }
15e47304 5087 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
5088 info->nlh, gfp_any());
5089 return;
21713ebc
TG
5090errout:
5091 if (err < 0)
5578689a 5092 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
5093}
5094
8ed67789 5095static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 5096 unsigned long event, void *ptr)
8ed67789 5097{
351638e7 5098 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 5099 struct net *net = dev_net(dev);
8ed67789 5100
242d3a49
WC
5101 if (!(dev->flags & IFF_LOOPBACK))
5102 return NOTIFY_OK;
5103
5104 if (event == NETDEV_REGISTER) {
ad1601ae 5105 net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
d8d1f30b 5106 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
5107 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5108#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 5109 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 5110 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 5111 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 5112 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 5113#endif
76da0704
WC
5114 } else if (event == NETDEV_UNREGISTER &&
5115 dev->reg_state != NETREG_UNREGISTERED) {
5116 /* NETDEV_UNREGISTER could be fired for multiple times by
5117 * netdev_wait_allrefs(). Make sure we only call this once.
5118 */
12d94a80 5119 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 5120#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
5121 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5122 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
5123#endif
5124 }
5125
5126 return NOTIFY_OK;
5127}
5128
1da177e4
LT
5129/*
5130 * /proc
5131 */
5132
5133#ifdef CONFIG_PROC_FS
1da177e4
LT
5134static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5135{
69ddb805 5136 struct net *net = (struct net *)seq->private;
1da177e4 5137 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
5138 net->ipv6.rt6_stats->fib_nodes,
5139 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 5140 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
5141 net->ipv6.rt6_stats->fib_rt_entries,
5142 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 5143 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 5144 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
5145
5146 return 0;
5147}
1da177e4
LT
5148#endif /* CONFIG_PROC_FS */
5149
5150#ifdef CONFIG_SYSCTL
5151
1da177e4 5152static
fe2c6338 5153int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
5154 void __user *buffer, size_t *lenp, loff_t *ppos)
5155{
c486da34
LAG
5156 struct net *net;
5157 int delay;
f0fb9b28 5158 int ret;
c486da34 5159 if (!write)
1da177e4 5160 return -EINVAL;
c486da34
LAG
5161
5162 net = (struct net *)ctl->extra1;
5163 delay = net->ipv6.sysctl.flush_delay;
f0fb9b28
AP
5164 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5165 if (ret)
5166 return ret;
5167
2ac3ac8f 5168 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 5169 return 0;
1da177e4
LT
5170}
5171
7c6bb7d2
DA
5172static int zero;
5173static int one = 1;
5174
ed792e28 5175static struct ctl_table ipv6_route_table_template[] = {
1ab1457c 5176 {
1da177e4 5177 .procname = "flush",
4990509f 5178 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 5179 .maxlen = sizeof(int),
89c8b3a1 5180 .mode = 0200,
6d9f239a 5181 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
5182 },
5183 {
1da177e4 5184 .procname = "gc_thresh",
9a7ec3a9 5185 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
5186 .maxlen = sizeof(int),
5187 .mode = 0644,
6d9f239a 5188 .proc_handler = proc_dointvec,
1da177e4
LT
5189 },
5190 {
1da177e4 5191 .procname = "max_size",
4990509f 5192 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
5193 .maxlen = sizeof(int),
5194 .mode = 0644,
6d9f239a 5195 .proc_handler = proc_dointvec,
1da177e4
LT
5196 },
5197 {
1da177e4 5198 .procname = "gc_min_interval",
4990509f 5199 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5200 .maxlen = sizeof(int),
5201 .mode = 0644,
6d9f239a 5202 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5203 },
5204 {
1da177e4 5205 .procname = "gc_timeout",
4990509f 5206 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5207 .maxlen = sizeof(int),
5208 .mode = 0644,
6d9f239a 5209 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5210 },
5211 {
1da177e4 5212 .procname = "gc_interval",
4990509f 5213 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5214 .maxlen = sizeof(int),
5215 .mode = 0644,
6d9f239a 5216 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5217 },
5218 {
1da177e4 5219 .procname = "gc_elasticity",
4990509f 5220 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5221 .maxlen = sizeof(int),
5222 .mode = 0644,
f3d3f616 5223 .proc_handler = proc_dointvec,
1da177e4
LT
5224 },
5225 {
1da177e4 5226 .procname = "mtu_expires",
4990509f 5227 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5228 .maxlen = sizeof(int),
5229 .mode = 0644,
6d9f239a 5230 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5231 },
5232 {
1da177e4 5233 .procname = "min_adv_mss",
4990509f 5234 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5235 .maxlen = sizeof(int),
5236 .mode = 0644,
f3d3f616 5237 .proc_handler = proc_dointvec,
1da177e4
LT
5238 },
5239 {
1da177e4 5240 .procname = "gc_min_interval_ms",
4990509f 5241 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5242 .maxlen = sizeof(int),
5243 .mode = 0644,
6d9f239a 5244 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5245 },
7c6bb7d2
DA
5246 {
5247 .procname = "skip_notify_on_dev_down",
5248 .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5249 .maxlen = sizeof(int),
5250 .mode = 0644,
5251 .proc_handler = proc_dointvec,
5252 .extra1 = &zero,
5253 .extra2 = &one,
5254 },
f8572d8f 5255 { }
1da177e4
LT
5256};
5257
2c8c1e72 5258struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5259{
5260 struct ctl_table *table;
5261
5262 table = kmemdup(ipv6_route_table_template,
5263 sizeof(ipv6_route_table_template),
5264 GFP_KERNEL);
5ee09105
YH
5265
5266 if (table) {
5267 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5268 table[0].extra1 = net;
86393e52 5269 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5270 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5271 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5272 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5273 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5274 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5275 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5276 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5277 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
7c6bb7d2 5278 table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
464dc801
EB
5279
5280 /* Don't export sysctls to unprivileged users */
5281 if (net->user_ns != &init_user_ns)
5282 table[0].procname = NULL;
5ee09105
YH
5283 }
5284
760f2d01
DL
5285 return table;
5286}
1da177e4
LT
5287#endif
5288
2c8c1e72 5289static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5290{
633d424b 5291 int ret = -ENOMEM;
8ed67789 5292
86393e52
AD
5293 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5294 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5295
fc66f95c
ED
5296 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5297 goto out_ip6_dst_ops;
5298
421842ed
DA
5299 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5300 sizeof(*net->ipv6.fib6_null_entry),
5301 GFP_KERNEL);
5302 if (!net->ipv6.fib6_null_entry)
5303 goto out_ip6_dst_entries;
5304
8ed67789
DL
5305 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5306 sizeof(*net->ipv6.ip6_null_entry),
5307 GFP_KERNEL);
5308 if (!net->ipv6.ip6_null_entry)
421842ed 5309 goto out_fib6_null_entry;
d8d1f30b 5310 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5311 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5312 ip6_template_metrics, true);
8ed67789
DL
5313
5314#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5315 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5316 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5317 sizeof(*net->ipv6.ip6_prohibit_entry),
5318 GFP_KERNEL);
68fffc67
PZ
5319 if (!net->ipv6.ip6_prohibit_entry)
5320 goto out_ip6_null_entry;
d8d1f30b 5321 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5322 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5323 ip6_template_metrics, true);
8ed67789
DL
5324
5325 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5326 sizeof(*net->ipv6.ip6_blk_hole_entry),
5327 GFP_KERNEL);
68fffc67
PZ
5328 if (!net->ipv6.ip6_blk_hole_entry)
5329 goto out_ip6_prohibit_entry;
d8d1f30b 5330 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5331 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5332 ip6_template_metrics, true);
8ed67789
DL
5333#endif
5334
b339a47c
PZ
5335 net->ipv6.sysctl.flush_delay = 0;
5336 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5337 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5338 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5339 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5340 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5341 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5342 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
7c6bb7d2 5343 net->ipv6.sysctl.skip_notify_on_dev_down = 0;
b339a47c 5344
6891a346
BT
5345 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5346
8ed67789
DL
5347 ret = 0;
5348out:
5349 return ret;
f2fc6a54 5350
68fffc67
PZ
5351#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5352out_ip6_prohibit_entry:
5353 kfree(net->ipv6.ip6_prohibit_entry);
5354out_ip6_null_entry:
5355 kfree(net->ipv6.ip6_null_entry);
5356#endif
421842ed
DA
5357out_fib6_null_entry:
5358 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5359out_ip6_dst_entries:
5360 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5361out_ip6_dst_ops:
f2fc6a54 5362 goto out;
cdb18761
DL
5363}
5364
2c8c1e72 5365static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5366{
421842ed 5367 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5368 kfree(net->ipv6.ip6_null_entry);
5369#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5370 kfree(net->ipv6.ip6_prohibit_entry);
5371 kfree(net->ipv6.ip6_blk_hole_entry);
5372#endif
41bb78b4 5373 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5374}
5375
d189634e
TG
5376static int __net_init ip6_route_net_init_late(struct net *net)
5377{
5378#ifdef CONFIG_PROC_FS
c3506372
CH
5379 proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5380 sizeof(struct ipv6_route_iter));
3617d949
CH
5381 proc_create_net_single("rt6_stats", 0444, net->proc_net,
5382 rt6_stats_seq_show, NULL);
d189634e
TG
5383#endif
5384 return 0;
5385}
5386
5387static void __net_exit ip6_route_net_exit_late(struct net *net)
5388{
5389#ifdef CONFIG_PROC_FS
ece31ffd
G
5390 remove_proc_entry("ipv6_route", net->proc_net);
5391 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5392#endif
5393}
5394
cdb18761
DL
5395static struct pernet_operations ip6_route_net_ops = {
5396 .init = ip6_route_net_init,
5397 .exit = ip6_route_net_exit,
5398};
5399
c3426b47
DM
5400static int __net_init ipv6_inetpeer_init(struct net *net)
5401{
5402 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5403
5404 if (!bp)
5405 return -ENOMEM;
5406 inet_peer_base_init(bp);
5407 net->ipv6.peers = bp;
5408 return 0;
5409}
5410
5411static void __net_exit ipv6_inetpeer_exit(struct net *net)
5412{
5413 struct inet_peer_base *bp = net->ipv6.peers;
5414
5415 net->ipv6.peers = NULL;
56a6b248 5416 inetpeer_invalidate_tree(bp);
c3426b47
DM
5417 kfree(bp);
5418}
5419
2b823f72 5420static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5421 .init = ipv6_inetpeer_init,
5422 .exit = ipv6_inetpeer_exit,
5423};
5424
d189634e
TG
5425static struct pernet_operations ip6_route_net_late_ops = {
5426 .init = ip6_route_net_init_late,
5427 .exit = ip6_route_net_exit_late,
5428};
5429
8ed67789
DL
5430static struct notifier_block ip6_route_dev_notifier = {
5431 .notifier_call = ip6_route_dev_notify,
242d3a49 5432 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5433};
5434
2f460933
WC
5435void __init ip6_route_init_special_entries(void)
5436{
5437 /* Registering of the loopback is done before this portion of code,
5438 * the loopback reference in rt6_info will not be taken, do it
5439 * manually for init_net */
ad1601ae 5440 init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
2f460933
WC
5441 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5442 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5443 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5444 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5445 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5446 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5447 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5448 #endif
5449}
5450
433d49c3 5451int __init ip6_route_init(void)
1da177e4 5452{
433d49c3 5453 int ret;
8d0b94af 5454 int cpu;
433d49c3 5455
9a7ec3a9
DL
5456 ret = -ENOMEM;
5457 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5458 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5459 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5460 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5461 goto out;
14e50e57 5462
fc66f95c 5463 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5464 if (ret)
bdb3289f 5465 goto out_kmem_cache;
bdb3289f 5466
c3426b47
DM
5467 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5468 if (ret)
e8803b6c 5469 goto out_dst_entries;
2a0c451a 5470
7e52b33b
DM
5471 ret = register_pernet_subsys(&ip6_route_net_ops);
5472 if (ret)
5473 goto out_register_inetpeer;
c3426b47 5474
5dc121e9
AE
5475 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5476
e8803b6c 5477 ret = fib6_init();
433d49c3 5478 if (ret)
8ed67789 5479 goto out_register_subsys;
433d49c3 5480
433d49c3
DL
5481 ret = xfrm6_init();
5482 if (ret)
e8803b6c 5483 goto out_fib6_init;
c35b7e72 5484
433d49c3
DL
5485 ret = fib6_rules_init();
5486 if (ret)
5487 goto xfrm6_init;
7e5449c2 5488
d189634e
TG
5489 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5490 if (ret)
5491 goto fib6_rules_init;
5492
16feebcf
FW
5493 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5494 inet6_rtm_newroute, NULL, 0);
5495 if (ret < 0)
5496 goto out_register_late_subsys;
5497
5498 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5499 inet6_rtm_delroute, NULL, 0);
5500 if (ret < 0)
5501 goto out_register_late_subsys;
5502
5503 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5504 inet6_rtm_getroute, NULL,
5505 RTNL_FLAG_DOIT_UNLOCKED);
5506 if (ret < 0)
d189634e 5507 goto out_register_late_subsys;
c127ea2c 5508
8ed67789 5509 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5510 if (ret)
d189634e 5511 goto out_register_late_subsys;
8ed67789 5512
8d0b94af
MKL
5513 for_each_possible_cpu(cpu) {
5514 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5515
5516 INIT_LIST_HEAD(&ul->head);
5517 spin_lock_init(&ul->lock);
5518 }
5519
433d49c3
DL
5520out:
5521 return ret;
5522
d189634e 5523out_register_late_subsys:
16feebcf 5524 rtnl_unregister_all(PF_INET6);
d189634e 5525 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5526fib6_rules_init:
433d49c3
DL
5527 fib6_rules_cleanup();
5528xfrm6_init:
433d49c3 5529 xfrm6_fini();
2a0c451a
TG
5530out_fib6_init:
5531 fib6_gc_cleanup();
8ed67789
DL
5532out_register_subsys:
5533 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5534out_register_inetpeer:
5535 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5536out_dst_entries:
5537 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5538out_kmem_cache:
f2fc6a54 5539 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5540 goto out;
1da177e4
LT
5541}
5542
5543void ip6_route_cleanup(void)
5544{
8ed67789 5545 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5546 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5547 fib6_rules_cleanup();
1da177e4 5548 xfrm6_fini();
1da177e4 5549 fib6_gc_cleanup();
c3426b47 5550 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5551 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5552 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5553 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5554}