ipv6: Rename fib6_multipath_select and pass fib6_result
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
eacb9384 66#include <net/ip.h>
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
30d444d3
DA
73static int ip6_rt_type_to_error(u8 fib6_type);
74
75#define CREATE_TRACE_POINTS
76#include <trace/events/fib6.h>
77EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78#undef CREATE_TRACE_POINTS
79
afc154e9 80enum rt6_nud_state {
7e980569
JB
81 RT6_NUD_FAIL_HARD = -3,
82 RT6_NUD_FAIL_PROBE = -2,
83 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
84 RT6_NUD_SUCCEED = 1
85};
86
1da177e4 87static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 88static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 89static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
569d3645 94static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
95
96static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 97static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 98static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 99static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 100static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
101static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102 struct sk_buff *skb, u32 mtu);
103static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
104 struct sk_buff *skb);
702cea56
DA
105static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
106 int strict);
8d1c802b 107static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 108static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 109 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 110 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
111 int iif, int type, u32 portid, u32 seq,
112 unsigned int flags);
8d1c802b 113static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
114 struct in6_addr *daddr,
115 struct in6_addr *saddr);
1da177e4 116
70ceb4f5 117#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 118static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 119 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
120 const struct in6_addr *gwaddr,
121 struct net_device *dev,
95c96174 122 unsigned int pref);
8d1c802b 123static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 124 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
125 const struct in6_addr *gwaddr,
126 struct net_device *dev);
70ceb4f5
YH
127#endif
128
8d0b94af
MKL
129struct uncached_list {
130 spinlock_t lock;
131 struct list_head head;
132};
133
134static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
135
510c321b 136void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
137{
138 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
139
8d0b94af
MKL
140 rt->rt6i_uncached_list = ul;
141
142 spin_lock_bh(&ul->lock);
143 list_add_tail(&rt->rt6i_uncached, &ul->head);
144 spin_unlock_bh(&ul->lock);
145}
146
510c321b 147void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
148{
149 if (!list_empty(&rt->rt6i_uncached)) {
150 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 151 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
152
153 spin_lock_bh(&ul->lock);
154 list_del(&rt->rt6i_uncached);
81eb8447 155 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
156 spin_unlock_bh(&ul->lock);
157 }
158}
159
160static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
161{
162 struct net_device *loopback_dev = net->loopback_dev;
163 int cpu;
164
e332bc67
EB
165 if (dev == loopback_dev)
166 return;
167
8d0b94af
MKL
168 for_each_possible_cpu(cpu) {
169 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
170 struct rt6_info *rt;
171
172 spin_lock_bh(&ul->lock);
173 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
174 struct inet6_dev *rt_idev = rt->rt6i_idev;
175 struct net_device *rt_dev = rt->dst.dev;
176
e332bc67 177 if (rt_idev->dev == dev) {
8d0b94af
MKL
178 rt->rt6i_idev = in6_dev_get(loopback_dev);
179 in6_dev_put(rt_idev);
180 }
181
e332bc67 182 if (rt_dev == dev) {
8d0b94af
MKL
183 rt->dst.dev = loopback_dev;
184 dev_hold(rt->dst.dev);
185 dev_put(rt_dev);
186 }
187 }
188 spin_unlock_bh(&ul->lock);
189 }
190}
191
f8a1b43b 192static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
193 struct sk_buff *skb,
194 const void *daddr)
39232973 195{
a7563f34 196 if (!ipv6_addr_any(p))
39232973 197 return (const void *) p;
f894cbf8
DM
198 else if (skb)
199 return &ipv6_hdr(skb)->daddr;
39232973
DM
200 return daddr;
201}
202
f8a1b43b
DA
203struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
204 struct net_device *dev,
205 struct sk_buff *skb,
206 const void *daddr)
d3aaeb38 207{
39232973
DM
208 struct neighbour *n;
209
f8a1b43b
DA
210 daddr = choose_neigh_daddr(gw, skb, daddr);
211 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
212 if (n)
213 return n;
7adf3246
SB
214
215 n = neigh_create(&nd_tbl, daddr, dev);
216 return IS_ERR(n) ? NULL : n;
f8a1b43b
DA
217}
218
219static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
220 struct sk_buff *skb,
221 const void *daddr)
222{
223 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
224
225 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
226}
227
63fca65d
JA
228static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
229{
230 struct net_device *dev = dst->dev;
231 struct rt6_info *rt = (struct rt6_info *)dst;
232
f8a1b43b 233 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
234 if (!daddr)
235 return;
236 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
237 return;
238 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
239 return;
240 __ipv6_confirm_neigh(dev, daddr);
241}
242
9a7ec3a9 243static struct dst_ops ip6_dst_ops_template = {
1da177e4 244 .family = AF_INET6,
1da177e4
LT
245 .gc = ip6_dst_gc,
246 .gc_thresh = 1024,
247 .check = ip6_dst_check,
0dbaee3b 248 .default_advmss = ip6_default_advmss,
ebb762f2 249 .mtu = ip6_mtu,
d4ead6b3 250 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
251 .destroy = ip6_dst_destroy,
252 .ifdown = ip6_dst_ifdown,
253 .negative_advice = ip6_negative_advice,
254 .link_failure = ip6_link_failure,
255 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 256 .redirect = rt6_do_redirect,
9f8955cc 257 .local_out = __ip6_local_out,
f8a1b43b 258 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 259 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
260};
261
ebb762f2 262static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 263{
618f9bc7
SK
264 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
265
266 return mtu ? : dst->dev->mtu;
ec831ea7
RD
267}
268
6700c270
DM
269static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
270 struct sk_buff *skb, u32 mtu)
14e50e57
DM
271{
272}
273
6700c270
DM
274static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
275 struct sk_buff *skb)
b587ee3b
DM
276{
277}
278
14e50e57
DM
279static struct dst_ops ip6_dst_blackhole_ops = {
280 .family = AF_INET6,
14e50e57
DM
281 .destroy = ip6_dst_destroy,
282 .check = ip6_dst_check,
ebb762f2 283 .mtu = ip6_blackhole_mtu,
214f45c9 284 .default_advmss = ip6_default_advmss,
14e50e57 285 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 286 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 287 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 288 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
289};
290
62fa8a84 291static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 292 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
293};
294
8d1c802b 295static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
296 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
297 .fib6_protocol = RTPROT_KERNEL,
298 .fib6_metric = ~(u32)0,
299 .fib6_ref = ATOMIC_INIT(1),
421842ed
DA
300 .fib6_type = RTN_UNREACHABLE,
301 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
302};
303
fb0af4c7 304static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
305 .dst = {
306 .__refcnt = ATOMIC_INIT(1),
307 .__use = 1,
2c20cbd7 308 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 309 .error = -ENETUNREACH,
d8d1f30b
CG
310 .input = ip6_pkt_discard,
311 .output = ip6_pkt_discard_out,
1da177e4
LT
312 },
313 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
314};
315
101367c2
TG
316#ifdef CONFIG_IPV6_MULTIPLE_TABLES
317
fb0af4c7 318static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
319 .dst = {
320 .__refcnt = ATOMIC_INIT(1),
321 .__use = 1,
2c20cbd7 322 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 323 .error = -EACCES,
d8d1f30b
CG
324 .input = ip6_pkt_prohibit,
325 .output = ip6_pkt_prohibit_out,
101367c2
TG
326 },
327 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
328};
329
fb0af4c7 330static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
331 .dst = {
332 .__refcnt = ATOMIC_INIT(1),
333 .__use = 1,
2c20cbd7 334 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 335 .error = -EINVAL,
d8d1f30b 336 .input = dst_discard,
ede2059d 337 .output = dst_discard_out,
101367c2
TG
338 },
339 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
340};
341
342#endif
343
ebfa45f0
MKL
344static void rt6_info_init(struct rt6_info *rt)
345{
346 struct dst_entry *dst = &rt->dst;
347
348 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
349 INIT_LIST_HEAD(&rt->rt6i_uncached);
350}
351
1da177e4 352/* allocate dst with ip6_dst_ops */
93531c67
DA
353struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
354 int flags)
1da177e4 355{
97bab73f 356 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 357 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 358
81eb8447 359 if (rt) {
ebfa45f0 360 rt6_info_init(rt);
81eb8447
WW
361 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
362 }
8104891b 363
cf911662 364 return rt;
1da177e4 365}
9ab179d8 366EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 367
1da177e4
LT
368static void ip6_dst_destroy(struct dst_entry *dst)
369{
370 struct rt6_info *rt = (struct rt6_info *)dst;
a68886a6 371 struct fib6_info *from;
8d0b94af 372 struct inet6_dev *idev;
1da177e4 373
1620a336 374 ip_dst_metrics_put(dst);
8d0b94af
MKL
375 rt6_uncached_list_del(rt);
376
377 idev = rt->rt6i_idev;
38308473 378 if (idev) {
1da177e4
LT
379 rt->rt6i_idev = NULL;
380 in6_dev_put(idev);
1ab1457c 381 }
1716a961 382
a68886a6
DA
383 rcu_read_lock();
384 from = rcu_dereference(rt->from);
385 rcu_assign_pointer(rt->from, NULL);
93531c67 386 fib6_info_release(from);
a68886a6 387 rcu_read_unlock();
b3419363
DM
388}
389
1da177e4
LT
390static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
391 int how)
392{
393 struct rt6_info *rt = (struct rt6_info *)dst;
394 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 395 struct net_device *loopback_dev =
c346dca1 396 dev_net(dev)->loopback_dev;
1da177e4 397
e5645f51
WW
398 if (idev && idev->dev != loopback_dev) {
399 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
400 if (loopback_idev) {
401 rt->rt6i_idev = loopback_idev;
402 in6_dev_put(idev);
97cac082 403 }
1da177e4
LT
404 }
405}
406
5973fb1e
MKL
407static bool __rt6_check_expired(const struct rt6_info *rt)
408{
409 if (rt->rt6i_flags & RTF_EXPIRES)
410 return time_after(jiffies, rt->dst.expires);
411 else
412 return false;
413}
414
a50feda5 415static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 416{
a68886a6
DA
417 struct fib6_info *from;
418
419 from = rcu_dereference(rt->from);
420
1716a961
G
421 if (rt->rt6i_flags & RTF_EXPIRES) {
422 if (time_after(jiffies, rt->dst.expires))
a50feda5 423 return true;
a68886a6 424 } else if (from) {
1e2ea8ad 425 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 426 fib6_check_expired(from);
1716a961 427 }
a50feda5 428 return false;
1da177e4
LT
429}
430
b1d40991
DA
431void fib6_select_path(const struct net *net, struct fib6_result *res,
432 struct flowi6 *fl6, int oif, bool have_oif_match,
433 const struct sk_buff *skb, int strict)
51ebd318 434{
8d1c802b 435 struct fib6_info *sibling, *next_sibling;
b1d40991
DA
436 struct fib6_info *match = res->f6i;
437
438 if (!match->fib6_nsiblings || have_oif_match)
439 goto out;
51ebd318 440
b673d6cc
JS
441 /* We might have already computed the hash for ICMPv6 errors. In such
442 * case it will always be non-zero. Otherwise now is the time to do it.
443 */
444 if (!fl6->mp_hash)
b4bac172 445 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 446
ad1601ae 447 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
b1d40991 448 goto out;
3d709f69 449
93c2fb25
DA
450 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
451 fib6_siblings) {
702cea56 452 const struct fib6_nh *nh = &sibling->fib6_nh;
5e670d84
DA
453 int nh_upper_bound;
454
702cea56 455 nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
5e670d84 456 if (fl6->mp_hash > nh_upper_bound)
3d709f69 457 continue;
702cea56 458 if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
3d709f69
IS
459 break;
460 match = sibling;
461 break;
462 }
463
b1d40991
DA
464out:
465 res->f6i = match;
466 res->nh = &match->fib6_nh;
51ebd318
ND
467}
468
1da177e4 469/*
66f5d6ce 470 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
471 */
472
0c59d006
DA
473static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
474 const struct in6_addr *saddr, int oif, int flags)
475{
476 const struct net_device *dev;
477
478 if (nh->fib_nh_flags & RTNH_F_DEAD)
479 return false;
480
481 dev = nh->fib_nh_dev;
482 if (oif) {
483 if (dev->ifindex == oif)
484 return true;
485 } else {
486 if (ipv6_chk_addr(net, saddr, dev,
487 flags & RT6_LOOKUP_F_IFACE))
488 return true;
489 }
490
491 return false;
492}
493
8d1c802b
DA
494static inline struct fib6_info *rt6_device_match(struct net *net,
495 struct fib6_info *rt,
b71d1d42 496 const struct in6_addr *saddr,
1da177e4 497 int oif,
d420895e 498 int flags)
1da177e4 499{
0c59d006 500 const struct fib6_nh *nh;
8d1c802b 501 struct fib6_info *sprt;
1da177e4 502
5e670d84 503 if (!oif && ipv6_addr_any(saddr) &&
ad1601ae 504 !(rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD))
8067bb8c 505 return rt;
dd3abc4e 506
8fb11a9a 507 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
0c59d006
DA
508 nh = &sprt->fib6_nh;
509 if (__rt6_device_match(net, nh, saddr, oif, flags))
510 return sprt;
dd3abc4e 511 }
1da177e4 512
eea68cd3
DA
513 if (oif && flags & RT6_LOOKUP_F_IFACE)
514 return net->ipv6.fib6_null_entry;
8067bb8c 515
ad1601ae 516 return rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
517}
518
27097255 519#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
520struct __rt6_probe_work {
521 struct work_struct work;
522 struct in6_addr target;
523 struct net_device *dev;
524};
525
526static void rt6_probe_deferred(struct work_struct *w)
527{
528 struct in6_addr mcaddr;
529 struct __rt6_probe_work *work =
530 container_of(w, struct __rt6_probe_work, work);
531
532 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 533 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 534 dev_put(work->dev);
662f5533 535 kfree(work);
c2f17e82
HFS
536}
537
cc3a86c8 538static void rt6_probe(struct fib6_nh *fib6_nh)
27097255 539{
f547fac6 540 struct __rt6_probe_work *work = NULL;
5e670d84 541 const struct in6_addr *nh_gw;
f2c31e32 542 struct neighbour *neigh;
5e670d84 543 struct net_device *dev;
f547fac6 544 struct inet6_dev *idev;
5e670d84 545
27097255
YH
546 /*
547 * Okay, this does not seem to be appropriate
548 * for now, however, we need to check if it
549 * is really so; aka Router Reachability Probing.
550 *
551 * Router Reachability Probe MUST be rate-limited
552 * to no more than one per minute.
553 */
cc3a86c8 554 if (fib6_nh->fib_nh_gw_family)
7ff74a59 555 return;
5e670d84 556
cc3a86c8
DA
557 nh_gw = &fib6_nh->fib_nh_gw6;
558 dev = fib6_nh->fib_nh_dev;
2152caea 559 rcu_read_lock_bh();
f547fac6 560 idev = __in6_dev_get(dev);
5e670d84 561 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 562 if (neigh) {
8d6c31bf
MKL
563 if (neigh->nud_state & NUD_VALID)
564 goto out;
565
2152caea 566 write_lock(&neigh->lock);
990edb42
MKL
567 if (!(neigh->nud_state & NUD_VALID) &&
568 time_after(jiffies,
dcd1f572 569 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
570 work = kmalloc(sizeof(*work), GFP_ATOMIC);
571 if (work)
572 __neigh_set_probe_once(neigh);
c2f17e82 573 }
2152caea 574 write_unlock(&neigh->lock);
cc3a86c8 575 } else if (time_after(jiffies, fib6_nh->last_probe +
f547fac6 576 idev->cnf.rtr_probe_interval)) {
990edb42 577 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 578 }
990edb42
MKL
579
580 if (work) {
cc3a86c8 581 fib6_nh->last_probe = jiffies;
990edb42 582 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
583 work->target = *nh_gw;
584 dev_hold(dev);
585 work->dev = dev;
990edb42
MKL
586 schedule_work(&work->work);
587 }
588
8d6c31bf 589out:
2152caea 590 rcu_read_unlock_bh();
27097255
YH
591}
592#else
cc3a86c8 593static inline void rt6_probe(struct fib6_nh *fib6_nh)
27097255 594{
27097255
YH
595}
596#endif
597
1da177e4 598/*
554cfb7e 599 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 600 */
1ba9a895 601static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
1da177e4 602{
afc154e9 603 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 604 struct neighbour *neigh;
f2c31e32 605
145a3621 606 rcu_read_lock_bh();
1ba9a895
DA
607 neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
608 &fib6_nh->fib_nh_gw6);
145a3621
YH
609 if (neigh) {
610 read_lock(&neigh->lock);
554cfb7e 611 if (neigh->nud_state & NUD_VALID)
afc154e9 612 ret = RT6_NUD_SUCCEED;
398bcbeb 613#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 614 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 615 ret = RT6_NUD_SUCCEED;
7e980569
JB
616 else
617 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 618#endif
145a3621 619 read_unlock(&neigh->lock);
afc154e9
HFS
620 } else {
621 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 622 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 623 }
145a3621
YH
624 rcu_read_unlock_bh();
625
a5a81f0b 626 return ret;
1da177e4
LT
627}
628
702cea56
DA
629static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
630 int strict)
1da177e4 631{
6e1809a5
DA
632 int m = 0;
633
634 if (!oif || nh->fib_nh_dev->ifindex == oif)
635 m = 2;
1ab1457c 636
77d16f45 637 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 638 return RT6_NUD_FAIL_HARD;
ebacaaa0 639#ifdef CONFIG_IPV6_ROUTER_PREF
702cea56 640 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
ebacaaa0 641#endif
1ba9a895 642 if ((strict & RT6_LOOKUP_F_REACHABLE) &&
702cea56 643 !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
1ba9a895 644 int n = rt6_check_neigh(nh);
afc154e9
HFS
645 if (n < 0)
646 return n;
647 }
554cfb7e
YH
648 return m;
649}
650
28679ed1
DA
651static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
652 int oif, int strict, int *mpri, bool *do_rr)
554cfb7e 653{
afc154e9 654 bool match_do_rr = false;
28679ed1
DA
655 bool rc = false;
656 int m;
35103d11 657
28679ed1 658 if (nh->fib_nh_flags & RTNH_F_DEAD)
8067bb8c
IS
659 goto out;
660
28679ed1
DA
661 if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
662 nh->fib_nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 663 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 664 goto out;
f11e6659 665
28679ed1 666 m = rt6_score_route(nh, fib6_flags, oif, strict);
7e980569 667 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
668 match_do_rr = true;
669 m = 0; /* lowest valid score */
7e980569 670 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 671 goto out;
afc154e9
HFS
672 }
673
674 if (strict & RT6_LOOKUP_F_REACHABLE)
28679ed1 675 rt6_probe(nh);
f11e6659 676
7e980569 677 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 678 if (m > *mpri) {
afc154e9 679 *do_rr = match_do_rr;
f11e6659 680 *mpri = m;
28679ed1 681 rc = true;
f11e6659 682 }
f11e6659 683out:
28679ed1 684 return rc;
f11e6659
DM
685}
686
30c15f03
DA
687static void __find_rr_leaf(struct fib6_info *rt_start,
688 struct fib6_info *nomatch, u32 metric,
689 struct fib6_info **match, struct fib6_info **cont,
690 int oif, int strict, bool *do_rr, int *mpri)
f11e6659 691{
30c15f03 692 struct fib6_info *rt;
1da177e4 693
30c15f03
DA
694 for (rt = rt_start;
695 rt && rt != nomatch;
696 rt = rcu_dereference(rt->fib6_next)) {
697 struct fib6_nh *nh;
698
699 if (cont && rt->fib6_metric != metric) {
700 *cont = rt;
701 return;
9fbdcfaf
SK
702 }
703
28679ed1
DA
704 if (fib6_check_expired(rt))
705 continue;
706
707 nh = &rt->fib6_nh;
30c15f03
DA
708 if (find_match(nh, rt->fib6_flags, oif, strict, mpri, do_rr))
709 *match = rt;
9fbdcfaf 710 }
30c15f03 711}
9fbdcfaf 712
30c15f03
DA
713static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
714 struct fib6_info *leaf,
715 struct fib6_info *rr_head,
716 u32 metric, int oif, int strict,
717 bool *do_rr)
718{
719 struct fib6_info *match = NULL, *cont = NULL;
720 int mpri = -1;
9fbdcfaf 721
30c15f03
DA
722 __find_rr_leaf(rr_head, NULL, metric, &match, &cont,
723 oif, strict, do_rr, &mpri);
28679ed1 724
30c15f03
DA
725 __find_rr_leaf(leaf, rr_head, metric, &match, &cont,
726 oif, strict, do_rr, &mpri);
9fbdcfaf
SK
727
728 if (match || !cont)
729 return match;
730
30c15f03
DA
731 __find_rr_leaf(cont, NULL, metric, &match, NULL,
732 oif, strict, do_rr, &mpri);
1da177e4 733
f11e6659
DM
734 return match;
735}
1da177e4 736
8d1c802b 737static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
8d1040e8 738 int oif, int strict)
f11e6659 739{
8d1c802b
DA
740 struct fib6_info *leaf = rcu_dereference(fn->leaf);
741 struct fib6_info *match, *rt0;
afc154e9 742 bool do_rr = false;
17ecf590 743 int key_plen;
1da177e4 744
421842ed
DA
745 if (!leaf || leaf == net->ipv6.fib6_null_entry)
746 return net->ipv6.fib6_null_entry;
8d1040e8 747
66f5d6ce 748 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 749 if (!rt0)
66f5d6ce 750 rt0 = leaf;
1da177e4 751
17ecf590
WW
752 /* Double check to make sure fn is not an intermediate node
753 * and fn->leaf does not points to its child's leaf
754 * (This might happen if all routes under fn are deleted from
755 * the tree and fib6_repair_tree() is called on the node.)
756 */
93c2fb25 757 key_plen = rt0->fib6_dst.plen;
17ecf590 758#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
759 if (rt0->fib6_src.plen)
760 key_plen = rt0->fib6_src.plen;
17ecf590
WW
761#endif
762 if (fn->fn_bit != key_plen)
421842ed 763 return net->ipv6.fib6_null_entry;
17ecf590 764
93c2fb25 765 match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
afc154e9 766 &do_rr);
1da177e4 767
afc154e9 768 if (do_rr) {
8fb11a9a 769 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
f11e6659 770
554cfb7e 771 /* no entries matched; do round-robin */
93c2fb25 772 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 773 next = leaf;
f11e6659 774
66f5d6ce 775 if (next != rt0) {
93c2fb25 776 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 777 /* make sure next is not being deleted from the tree */
93c2fb25 778 if (next->fib6_node)
66f5d6ce 779 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 780 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 781 }
1da177e4 782 }
1da177e4 783
421842ed 784 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
785}
786
8d1c802b 787static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
8b9df265 788{
bdf00467 789 return (rt->fib6_flags & RTF_NONEXTHOP) || rt->fib6_nh.fib_nh_gw_family;
8b9df265
MKL
790}
791
70ceb4f5
YH
792#ifdef CONFIG_IPV6_ROUTE_INFO
793int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 794 const struct in6_addr *gwaddr)
70ceb4f5 795{
c346dca1 796 struct net *net = dev_net(dev);
70ceb4f5
YH
797 struct route_info *rinfo = (struct route_info *) opt;
798 struct in6_addr prefix_buf, *prefix;
799 unsigned int pref;
4bed72e4 800 unsigned long lifetime;
8d1c802b 801 struct fib6_info *rt;
70ceb4f5
YH
802
803 if (len < sizeof(struct route_info)) {
804 return -EINVAL;
805 }
806
807 /* Sanity check for prefix_len and length */
808 if (rinfo->length > 3) {
809 return -EINVAL;
810 } else if (rinfo->prefix_len > 128) {
811 return -EINVAL;
812 } else if (rinfo->prefix_len > 64) {
813 if (rinfo->length < 2) {
814 return -EINVAL;
815 }
816 } else if (rinfo->prefix_len > 0) {
817 if (rinfo->length < 1) {
818 return -EINVAL;
819 }
820 }
821
822 pref = rinfo->route_pref;
823 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 824 return -EINVAL;
70ceb4f5 825
4bed72e4 826 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
827
828 if (rinfo->length == 3)
829 prefix = (struct in6_addr *)rinfo->prefix;
830 else {
831 /* this function is safe */
832 ipv6_addr_prefix(&prefix_buf,
833 (struct in6_addr *)rinfo->prefix,
834 rinfo->prefix_len);
835 prefix = &prefix_buf;
836 }
837
f104a567 838 if (rinfo->prefix_len == 0)
afb1d4b5 839 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
840 else
841 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 842 gwaddr, dev);
70ceb4f5
YH
843
844 if (rt && !lifetime) {
afb1d4b5 845 ip6_del_rt(net, rt);
70ceb4f5
YH
846 rt = NULL;
847 }
848
849 if (!rt && lifetime)
830218c1
DA
850 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
851 dev, pref);
70ceb4f5 852 else if (rt)
93c2fb25
DA
853 rt->fib6_flags = RTF_ROUTEINFO |
854 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
855
856 if (rt) {
1716a961 857 if (!addrconf_finite_timeout(lifetime))
14895687 858 fib6_clean_expires(rt);
1716a961 859 else
14895687 860 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 861
93531c67 862 fib6_info_release(rt);
70ceb4f5
YH
863 }
864 return 0;
865}
866#endif
867
ae90d867
DA
868/*
869 * Misc support functions
870 */
871
872/* called with rcu_lock held */
8d1c802b 873static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
ae90d867 874{
ad1601ae 875 struct net_device *dev = rt->fib6_nh.fib_nh_dev;
ae90d867 876
93c2fb25 877 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
878 /* for copies of local routes, dst->dev needs to be the
879 * device if it is a master device, the master device if
880 * device is enslaved, and the loopback as the default
881 */
882 if (netif_is_l3_slave(dev) &&
93c2fb25 883 !rt6_need_strict(&rt->fib6_dst.addr))
ae90d867
DA
884 dev = l3mdev_master_dev_rcu(dev);
885 else if (!netif_is_l3_master(dev))
886 dev = dev_net(dev)->loopback_dev;
887 /* last case is netif_is_l3_master(dev) is true in which
888 * case we want dev returned to be dev
889 */
890 }
891
892 return dev;
893}
894
6edb3c96
DA
895static const int fib6_prop[RTN_MAX + 1] = {
896 [RTN_UNSPEC] = 0,
897 [RTN_UNICAST] = 0,
898 [RTN_LOCAL] = 0,
899 [RTN_BROADCAST] = 0,
900 [RTN_ANYCAST] = 0,
901 [RTN_MULTICAST] = 0,
902 [RTN_BLACKHOLE] = -EINVAL,
903 [RTN_UNREACHABLE] = -EHOSTUNREACH,
904 [RTN_PROHIBIT] = -EACCES,
905 [RTN_THROW] = -EAGAIN,
906 [RTN_NAT] = -EINVAL,
907 [RTN_XRESOLVE] = -EINVAL,
908};
909
910static int ip6_rt_type_to_error(u8 fib6_type)
911{
912 return fib6_prop[fib6_type];
913}
914
8d1c802b 915static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
916{
917 unsigned short flags = 0;
918
919 if (rt->dst_nocount)
920 flags |= DST_NOCOUNT;
921 if (rt->dst_nopolicy)
922 flags |= DST_NOPOLICY;
923 if (rt->dst_host)
924 flags |= DST_HOST;
925
926 return flags;
927}
928
8d1c802b 929static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96
DA
930{
931 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
932
933 switch (ort->fib6_type) {
934 case RTN_BLACKHOLE:
935 rt->dst.output = dst_discard_out;
936 rt->dst.input = dst_discard;
937 break;
938 case RTN_PROHIBIT:
939 rt->dst.output = ip6_pkt_prohibit_out;
940 rt->dst.input = ip6_pkt_prohibit;
941 break;
942 case RTN_THROW:
943 case RTN_UNREACHABLE:
944 default:
945 rt->dst.output = ip6_pkt_discard_out;
946 rt->dst.input = ip6_pkt_discard;
947 break;
948 }
949}
950
8d1c802b 951static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96 952{
93c2fb25 953 if (ort->fib6_flags & RTF_REJECT) {
6edb3c96
DA
954 ip6_rt_init_dst_reject(rt, ort);
955 return;
956 }
957
958 rt->dst.error = 0;
959 rt->dst.output = ip6_output;
960
d23c4b63 961 if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
6edb3c96 962 rt->dst.input = ip6_input;
93c2fb25 963 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
964 rt->dst.input = ip6_mc_input;
965 } else {
966 rt->dst.input = ip6_forward;
967 }
968
ad1601ae
DA
969 if (ort->fib6_nh.fib_nh_lws) {
970 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.fib_nh_lws);
6edb3c96
DA
971 lwtunnel_set_redirect(&rt->dst);
972 }
973
974 rt->dst.lastuse = jiffies;
975}
976
e873e4b9 977/* Caller must already hold reference to @from */
8d1c802b 978static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 979{
ae90d867 980 rt->rt6i_flags &= ~RTF_EXPIRES;
a68886a6 981 rcu_assign_pointer(rt->from, from);
e1255ed4 982 ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
ae90d867
DA
983}
984
e873e4b9 985/* Caller must already hold reference to @ort */
8d1c802b 986static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
ae90d867 987{
dcd1f572
DA
988 struct net_device *dev = fib6_info_nh_dev(ort);
989
6edb3c96
DA
990 ip6_rt_init_dst(rt, ort);
991
93c2fb25 992 rt->rt6i_dst = ort->fib6_dst;
dcd1f572 993 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
93c2fb25 994 rt->rt6i_flags = ort->fib6_flags;
bdf00467 995 if (ort->fib6_nh.fib_nh_gw_family) {
ad1601ae 996 rt->rt6i_gateway = ort->fib6_nh.fib_nh_gw6;
2b2450ca
DA
997 rt->rt6i_flags |= RTF_GATEWAY;
998 }
ae90d867 999 rt6_set_from(rt, ort);
ae90d867 1000#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 1001 rt->rt6i_src = ort->fib6_src;
ae90d867 1002#endif
ae90d867
DA
1003}
1004
a3c00e46
MKL
1005static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1006 struct in6_addr *saddr)
1007{
66f5d6ce 1008 struct fib6_node *pn, *sn;
a3c00e46
MKL
1009 while (1) {
1010 if (fn->fn_flags & RTN_TL_ROOT)
1011 return NULL;
66f5d6ce
WW
1012 pn = rcu_dereference(fn->parent);
1013 sn = FIB6_SUBTREE(pn);
1014 if (sn && sn != fn)
6454743b 1015 fn = fib6_node_lookup(sn, NULL, saddr);
a3c00e46
MKL
1016 else
1017 fn = pn;
1018 if (fn->fn_flags & RTN_RTINFO)
1019 return fn;
1020 }
1021}
c71099ac 1022
10585b43 1023static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
d3843fe5
WW
1024{
1025 struct rt6_info *rt = *prt;
1026
1027 if (dst_hold_safe(&rt->dst))
1028 return true;
10585b43 1029 if (net) {
d3843fe5
WW
1030 rt = net->ipv6.ip6_null_entry;
1031 dst_hold(&rt->dst);
1032 } else {
1033 rt = NULL;
1034 }
1035 *prt = rt;
1036 return false;
1037}
1038
dec9b0e2 1039/* called with rcu_lock held */
8d1c802b 1040static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
dec9b0e2 1041{
3b6761d1 1042 unsigned short flags = fib6_info_dst_flags(rt);
ad1601ae 1043 struct net_device *dev = rt->fib6_nh.fib_nh_dev;
dec9b0e2
DA
1044 struct rt6_info *nrt;
1045
e873e4b9 1046 if (!fib6_info_hold_safe(rt))
1c87e79a 1047 goto fallback;
e873e4b9 1048
93531c67 1049 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1c87e79a 1050 if (!nrt) {
e873e4b9 1051 fib6_info_release(rt);
1c87e79a
XL
1052 goto fallback;
1053 }
dec9b0e2 1054
1c87e79a
XL
1055 ip6_rt_copy_init(nrt, rt);
1056 return nrt;
1057
1058fallback:
1059 nrt = dev_net(dev)->ipv6.ip6_null_entry;
1060 dst_hold(&nrt->dst);
dec9b0e2
DA
1061 return nrt;
1062}
1063
8ed67789
DL
1064static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1065 struct fib6_table *table,
b75cc8f9
DA
1066 struct flowi6 *fl6,
1067 const struct sk_buff *skb,
1068 int flags)
1da177e4 1069{
b1d40991 1070 struct fib6_result res = {};
1da177e4 1071 struct fib6_node *fn;
23fb93a4 1072 struct rt6_info *rt;
1da177e4 1073
b6cdbc85
DA
1074 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1075 flags &= ~RT6_LOOKUP_F_IFACE;
1076
66f5d6ce 1077 rcu_read_lock();
6454743b 1078 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1079restart:
b1d40991
DA
1080 res.f6i = rcu_dereference(fn->leaf);
1081 if (!res.f6i)
1082 res.f6i = net->ipv6.fib6_null_entry;
af52a52c 1083 else
b1d40991
DA
1084 res.f6i = rt6_device_match(net, res.f6i, &fl6->saddr,
1085 fl6->flowi6_oif, flags);
af52a52c 1086
b1d40991 1087 if (res.f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1088 fn = fib6_backtrack(fn, &fl6->saddr);
1089 if (fn)
1090 goto restart;
2b760fcf 1091
af52a52c
DA
1092 rt = net->ipv6.ip6_null_entry;
1093 dst_hold(&rt->dst);
1094 goto out;
1095 }
d3843fe5 1096
b1d40991
DA
1097 fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
1098 fl6->flowi6_oif != 0, skb, flags);
1099
2b760fcf 1100 /* Search through exception table */
b1d40991 1101 rt = rt6_find_cached_rt(res.f6i, &fl6->daddr, &fl6->saddr);
23fb93a4 1102 if (rt) {
10585b43 1103 if (ip6_hold_safe(net, &rt))
dec9b0e2 1104 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1105 } else {
b1d40991 1106 rt = ip6_create_rt_rcu(res.f6i);
dec9b0e2 1107 }
b811580d 1108
af52a52c 1109out:
b1d40991 1110 trace_fib6_table_lookup(net, res.f6i, table, fl6);
af52a52c 1111
66f5d6ce 1112 rcu_read_unlock();
b811580d 1113
c71099ac 1114 return rt;
c71099ac
TG
1115}
1116
67ba4152 1117struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1118 const struct sk_buff *skb, int flags)
ea6e574e 1119{
b75cc8f9 1120 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1121}
1122EXPORT_SYMBOL_GPL(ip6_route_lookup);
1123
9acd9f3a 1124struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1125 const struct in6_addr *saddr, int oif,
1126 const struct sk_buff *skb, int strict)
c71099ac 1127{
4c9483b2
DM
1128 struct flowi6 fl6 = {
1129 .flowi6_oif = oif,
1130 .daddr = *daddr,
c71099ac
TG
1131 };
1132 struct dst_entry *dst;
77d16f45 1133 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1134
adaa70bb 1135 if (saddr) {
4c9483b2 1136 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1137 flags |= RT6_LOOKUP_F_HAS_SADDR;
1138 }
1139
b75cc8f9 1140 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1141 if (dst->error == 0)
1142 return (struct rt6_info *) dst;
1143
1144 dst_release(dst);
1145
1da177e4
LT
1146 return NULL;
1147}
7159039a
YH
1148EXPORT_SYMBOL(rt6_lookup);
1149
c71099ac 1150/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1151 * It takes new route entry, the addition fails by any reason the
1152 * route is released.
1153 * Caller must hold dst before calling it.
1da177e4
LT
1154 */
1155
8d1c802b 1156static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1157 struct netlink_ext_ack *extack)
1da177e4
LT
1158{
1159 int err;
c71099ac 1160 struct fib6_table *table;
1da177e4 1161
93c2fb25 1162 table = rt->fib6_table;
66f5d6ce 1163 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1164 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1165 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1166
1167 return err;
1168}
1169
8d1c802b 1170int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1171{
afb1d4b5 1172 struct nl_info info = { .nl_net = net, };
e715b6d3 1173
d4ead6b3 1174 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1175}
1176
8d1c802b 1177static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
8b9df265
MKL
1178 const struct in6_addr *daddr,
1179 const struct in6_addr *saddr)
1da177e4 1180{
4832c30d 1181 struct net_device *dev;
1da177e4
LT
1182 struct rt6_info *rt;
1183
1184 /*
1185 * Clone the route.
1186 */
1187
e873e4b9
WW
1188 if (!fib6_info_hold_safe(ort))
1189 return NULL;
1190
4832c30d 1191 dev = ip6_rt_get_dev_rcu(ort);
93531c67 1192 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
e873e4b9
WW
1193 if (!rt) {
1194 fib6_info_release(ort);
83a09abd 1195 return NULL;
e873e4b9 1196 }
83a09abd
MKL
1197
1198 ip6_rt_copy_init(rt, ort);
1199 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1200 rt->dst.flags |= DST_HOST;
1201 rt->rt6i_dst.addr = *daddr;
1202 rt->rt6i_dst.plen = 128;
1da177e4 1203
83a09abd 1204 if (!rt6_is_gw_or_nonexthop(ort)) {
93c2fb25
DA
1205 if (ort->fib6_dst.plen != 128 &&
1206 ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
83a09abd 1207 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1208#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1209 if (rt->rt6i_src.plen && saddr) {
1210 rt->rt6i_src.addr = *saddr;
1211 rt->rt6i_src.plen = 128;
8b9df265 1212 }
83a09abd 1213#endif
95a9a5ba 1214 }
1da177e4 1215
95a9a5ba
YH
1216 return rt;
1217}
1da177e4 1218
8d1c802b 1219static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
d52d3997 1220{
3b6761d1 1221 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1222 struct net_device *dev;
d52d3997
MKL
1223 struct rt6_info *pcpu_rt;
1224
e873e4b9
WW
1225 if (!fib6_info_hold_safe(rt))
1226 return NULL;
1227
4832c30d
DA
1228 rcu_read_lock();
1229 dev = ip6_rt_get_dev_rcu(rt);
93531c67 1230 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1231 rcu_read_unlock();
e873e4b9
WW
1232 if (!pcpu_rt) {
1233 fib6_info_release(rt);
d52d3997 1234 return NULL;
e873e4b9 1235 }
d52d3997 1236 ip6_rt_copy_init(pcpu_rt, rt);
d52d3997
MKL
1237 pcpu_rt->rt6i_flags |= RTF_PCPU;
1238 return pcpu_rt;
1239}
1240
66f5d6ce 1241/* It should be called with rcu_read_lock() acquired */
8d1c802b 1242static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
d52d3997 1243{
a73e4195 1244 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1245
1246 p = this_cpu_ptr(rt->rt6i_pcpu);
1247 pcpu_rt = *p;
1248
d4ead6b3 1249 if (pcpu_rt)
10585b43 1250 ip6_hold_safe(NULL, &pcpu_rt);
d3843fe5 1251
a73e4195
MKL
1252 return pcpu_rt;
1253}
1254
afb1d4b5 1255static struct rt6_info *rt6_make_pcpu_route(struct net *net,
8d1c802b 1256 struct fib6_info *rt)
a73e4195
MKL
1257{
1258 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1259
1260 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1261 if (!pcpu_rt) {
9c7370a1
MKL
1262 dst_hold(&net->ipv6.ip6_null_entry->dst);
1263 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1264 }
1265
a94b9367
WW
1266 dst_hold(&pcpu_rt->dst);
1267 p = this_cpu_ptr(rt->rt6i_pcpu);
1268 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1269 BUG_ON(prev);
a94b9367 1270
d52d3997
MKL
1271 return pcpu_rt;
1272}
1273
35732d01
WW
1274/* exception hash table implementation
1275 */
1276static DEFINE_SPINLOCK(rt6_exception_lock);
1277
1278/* Remove rt6_ex from hash table and free the memory
1279 * Caller must hold rt6_exception_lock
1280 */
1281static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1282 struct rt6_exception *rt6_ex)
1283{
f5b51fe8 1284 struct fib6_info *from;
b2427e67 1285 struct net *net;
81eb8447 1286
35732d01
WW
1287 if (!bucket || !rt6_ex)
1288 return;
b2427e67
CIK
1289
1290 net = dev_net(rt6_ex->rt6i->dst.dev);
f5b51fe8
PA
1291 net->ipv6.rt6_stats->fib_rt_cache--;
1292
1293 /* purge completely the exception to allow releasing the held resources:
1294 * some [sk] cache may keep the dst around for unlimited time
1295 */
1296 from = rcu_dereference_protected(rt6_ex->rt6i->from,
1297 lockdep_is_held(&rt6_exception_lock));
1298 rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
1299 fib6_info_release(from);
1300 dst_dev_put(&rt6_ex->rt6i->dst);
1301
35732d01 1302 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1303 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1304 kfree_rcu(rt6_ex, rcu);
1305 WARN_ON_ONCE(!bucket->depth);
1306 bucket->depth--;
1307}
1308
1309/* Remove oldest rt6_ex in bucket and free the memory
1310 * Caller must hold rt6_exception_lock
1311 */
1312static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1313{
1314 struct rt6_exception *rt6_ex, *oldest = NULL;
1315
1316 if (!bucket)
1317 return;
1318
1319 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1320 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1321 oldest = rt6_ex;
1322 }
1323 rt6_remove_exception(bucket, oldest);
1324}
1325
1326static u32 rt6_exception_hash(const struct in6_addr *dst,
1327 const struct in6_addr *src)
1328{
1329 static u32 seed __read_mostly;
1330 u32 val;
1331
1332 net_get_random_once(&seed, sizeof(seed));
1333 val = jhash(dst, sizeof(*dst), seed);
1334
1335#ifdef CONFIG_IPV6_SUBTREES
1336 if (src)
1337 val = jhash(src, sizeof(*src), val);
1338#endif
1339 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1340}
1341
1342/* Helper function to find the cached rt in the hash table
1343 * and update bucket pointer to point to the bucket for this
1344 * (daddr, saddr) pair
1345 * Caller must hold rt6_exception_lock
1346 */
1347static struct rt6_exception *
1348__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1349 const struct in6_addr *daddr,
1350 const struct in6_addr *saddr)
1351{
1352 struct rt6_exception *rt6_ex;
1353 u32 hval;
1354
1355 if (!(*bucket) || !daddr)
1356 return NULL;
1357
1358 hval = rt6_exception_hash(daddr, saddr);
1359 *bucket += hval;
1360
1361 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1362 struct rt6_info *rt6 = rt6_ex->rt6i;
1363 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1364
1365#ifdef CONFIG_IPV6_SUBTREES
1366 if (matched && saddr)
1367 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1368#endif
1369 if (matched)
1370 return rt6_ex;
1371 }
1372 return NULL;
1373}
1374
1375/* Helper function to find the cached rt in the hash table
1376 * and update bucket pointer to point to the bucket for this
1377 * (daddr, saddr) pair
1378 * Caller must hold rcu_read_lock()
1379 */
1380static struct rt6_exception *
1381__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1382 const struct in6_addr *daddr,
1383 const struct in6_addr *saddr)
1384{
1385 struct rt6_exception *rt6_ex;
1386 u32 hval;
1387
1388 WARN_ON_ONCE(!rcu_read_lock_held());
1389
1390 if (!(*bucket) || !daddr)
1391 return NULL;
1392
1393 hval = rt6_exception_hash(daddr, saddr);
1394 *bucket += hval;
1395
1396 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1397 struct rt6_info *rt6 = rt6_ex->rt6i;
1398 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1399
1400#ifdef CONFIG_IPV6_SUBTREES
1401 if (matched && saddr)
1402 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1403#endif
1404 if (matched)
1405 return rt6_ex;
1406 }
1407 return NULL;
1408}
1409
8d1c802b 1410static unsigned int fib6_mtu(const struct fib6_info *rt)
d4ead6b3
DA
1411{
1412 unsigned int mtu;
1413
dcd1f572
DA
1414 if (rt->fib6_pmtu) {
1415 mtu = rt->fib6_pmtu;
1416 } else {
1417 struct net_device *dev = fib6_info_nh_dev(rt);
1418 struct inet6_dev *idev;
1419
1420 rcu_read_lock();
1421 idev = __in6_dev_get(dev);
1422 mtu = idev->cnf.mtu6;
1423 rcu_read_unlock();
1424 }
1425
d4ead6b3
DA
1426 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1427
ad1601ae 1428 return mtu - lwtunnel_headroom(rt->fib6_nh.fib_nh_lws, mtu);
d4ead6b3
DA
1429}
1430
35732d01 1431static int rt6_insert_exception(struct rt6_info *nrt,
8d1c802b 1432 struct fib6_info *ort)
35732d01 1433{
5e670d84 1434 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1435 struct rt6_exception_bucket *bucket;
1436 struct in6_addr *src_key = NULL;
1437 struct rt6_exception *rt6_ex;
1438 int err = 0;
1439
35732d01
WW
1440 spin_lock_bh(&rt6_exception_lock);
1441
1442 if (ort->exception_bucket_flushed) {
1443 err = -EINVAL;
1444 goto out;
1445 }
1446
1447 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1448 lockdep_is_held(&rt6_exception_lock));
1449 if (!bucket) {
1450 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1451 GFP_ATOMIC);
1452 if (!bucket) {
1453 err = -ENOMEM;
1454 goto out;
1455 }
1456 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1457 }
1458
1459#ifdef CONFIG_IPV6_SUBTREES
1460 /* rt6i_src.plen != 0 indicates ort is in subtree
1461 * and exception table is indexed by a hash of
1462 * both rt6i_dst and rt6i_src.
1463 * Otherwise, the exception table is indexed by
1464 * a hash of only rt6i_dst.
1465 */
93c2fb25 1466 if (ort->fib6_src.plen)
35732d01
WW
1467 src_key = &nrt->rt6i_src.addr;
1468#endif
f5bbe7ee
WW
1469 /* rt6_mtu_change() might lower mtu on ort.
1470 * Only insert this exception route if its mtu
1471 * is less than ort's mtu value.
1472 */
d4ead6b3 1473 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1474 err = -EINVAL;
1475 goto out;
1476 }
60006a48 1477
35732d01
WW
1478 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1479 src_key);
1480 if (rt6_ex)
1481 rt6_remove_exception(bucket, rt6_ex);
1482
1483 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1484 if (!rt6_ex) {
1485 err = -ENOMEM;
1486 goto out;
1487 }
1488 rt6_ex->rt6i = nrt;
1489 rt6_ex->stamp = jiffies;
35732d01
WW
1490 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1491 bucket->depth++;
81eb8447 1492 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1493
1494 if (bucket->depth > FIB6_MAX_DEPTH)
1495 rt6_exception_remove_oldest(bucket);
1496
1497out:
1498 spin_unlock_bh(&rt6_exception_lock);
1499
1500 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1501 if (!err) {
93c2fb25 1502 spin_lock_bh(&ort->fib6_table->tb6_lock);
7aef6859 1503 fib6_update_sernum(net, ort);
93c2fb25 1504 spin_unlock_bh(&ort->fib6_table->tb6_lock);
b886d5f2
PA
1505 fib6_force_start_gc(net);
1506 }
35732d01
WW
1507
1508 return err;
1509}
1510
8d1c802b 1511void rt6_flush_exceptions(struct fib6_info *rt)
35732d01
WW
1512{
1513 struct rt6_exception_bucket *bucket;
1514 struct rt6_exception *rt6_ex;
1515 struct hlist_node *tmp;
1516 int i;
1517
1518 spin_lock_bh(&rt6_exception_lock);
1519 /* Prevent rt6_insert_exception() to recreate the bucket list */
1520 rt->exception_bucket_flushed = 1;
1521
1522 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1523 lockdep_is_held(&rt6_exception_lock));
1524 if (!bucket)
1525 goto out;
1526
1527 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1528 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1529 rt6_remove_exception(bucket, rt6_ex);
1530 WARN_ON_ONCE(bucket->depth);
1531 bucket++;
1532 }
1533
1534out:
1535 spin_unlock_bh(&rt6_exception_lock);
1536}
1537
1538/* Find cached rt in the hash table inside passed in rt
1539 * Caller has to hold rcu_read_lock()
1540 */
8d1c802b 1541static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
1542 struct in6_addr *daddr,
1543 struct in6_addr *saddr)
1544{
1545 struct rt6_exception_bucket *bucket;
1546 struct in6_addr *src_key = NULL;
1547 struct rt6_exception *rt6_ex;
1548 struct rt6_info *res = NULL;
1549
1550 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1551
1552#ifdef CONFIG_IPV6_SUBTREES
1553 /* rt6i_src.plen != 0 indicates rt is in subtree
1554 * and exception table is indexed by a hash of
1555 * both rt6i_dst and rt6i_src.
1556 * Otherwise, the exception table is indexed by
1557 * a hash of only rt6i_dst.
1558 */
93c2fb25 1559 if (rt->fib6_src.plen)
35732d01
WW
1560 src_key = saddr;
1561#endif
1562 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1563
1564 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1565 res = rt6_ex->rt6i;
1566
1567 return res;
1568}
1569
1570/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1571static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1572{
35732d01
WW
1573 struct rt6_exception_bucket *bucket;
1574 struct in6_addr *src_key = NULL;
1575 struct rt6_exception *rt6_ex;
8a14e46f 1576 struct fib6_info *from;
35732d01
WW
1577 int err;
1578
091311de 1579 from = rcu_dereference(rt->from);
35732d01 1580 if (!from ||
442d713b 1581 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1582 return -EINVAL;
1583
1584 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1585 return -ENOENT;
1586
1587 spin_lock_bh(&rt6_exception_lock);
1588 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1589 lockdep_is_held(&rt6_exception_lock));
1590#ifdef CONFIG_IPV6_SUBTREES
1591 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1592 * and exception table is indexed by a hash of
1593 * both rt6i_dst and rt6i_src.
1594 * Otherwise, the exception table is indexed by
1595 * a hash of only rt6i_dst.
1596 */
93c2fb25 1597 if (from->fib6_src.plen)
35732d01
WW
1598 src_key = &rt->rt6i_src.addr;
1599#endif
1600 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1601 &rt->rt6i_dst.addr,
1602 src_key);
1603 if (rt6_ex) {
1604 rt6_remove_exception(bucket, rt6_ex);
1605 err = 0;
1606 } else {
1607 err = -ENOENT;
1608 }
1609
1610 spin_unlock_bh(&rt6_exception_lock);
1611 return err;
1612}
1613
1614/* Find rt6_ex which contains the passed in rt cache and
1615 * refresh its stamp
1616 */
1617static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1618{
35732d01
WW
1619 struct rt6_exception_bucket *bucket;
1620 struct in6_addr *src_key = NULL;
1621 struct rt6_exception *rt6_ex;
193f3685 1622 struct fib6_info *from;
35732d01
WW
1623
1624 rcu_read_lock();
193f3685
PA
1625 from = rcu_dereference(rt->from);
1626 if (!from || !(rt->rt6i_flags & RTF_CACHE))
1627 goto unlock;
1628
35732d01
WW
1629 bucket = rcu_dereference(from->rt6i_exception_bucket);
1630
1631#ifdef CONFIG_IPV6_SUBTREES
1632 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1633 * and exception table is indexed by a hash of
1634 * both rt6i_dst and rt6i_src.
1635 * Otherwise, the exception table is indexed by
1636 * a hash of only rt6i_dst.
1637 */
93c2fb25 1638 if (from->fib6_src.plen)
35732d01
WW
1639 src_key = &rt->rt6i_src.addr;
1640#endif
1641 rt6_ex = __rt6_find_exception_rcu(&bucket,
1642 &rt->rt6i_dst.addr,
1643 src_key);
1644 if (rt6_ex)
1645 rt6_ex->stamp = jiffies;
1646
193f3685 1647unlock:
35732d01
WW
1648 rcu_read_unlock();
1649}
1650
e9fa1495
SB
1651static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1652 struct rt6_info *rt, int mtu)
1653{
1654 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1655 * lowest MTU in the path: always allow updating the route PMTU to
1656 * reflect PMTU decreases.
1657 *
1658 * If the new MTU is higher, and the route PMTU is equal to the local
1659 * MTU, this means the old MTU is the lowest in the path, so allow
1660 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1661 * handle this.
1662 */
1663
1664 if (dst_mtu(&rt->dst) >= mtu)
1665 return true;
1666
1667 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1668 return true;
1669
1670 return false;
1671}
1672
1673static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
8d1c802b 1674 struct fib6_info *rt, int mtu)
f5bbe7ee
WW
1675{
1676 struct rt6_exception_bucket *bucket;
1677 struct rt6_exception *rt6_ex;
1678 int i;
1679
1680 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1681 lockdep_is_held(&rt6_exception_lock));
1682
e9fa1495
SB
1683 if (!bucket)
1684 return;
1685
1686 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1687 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1688 struct rt6_info *entry = rt6_ex->rt6i;
1689
1690 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1691 * route), the metrics of its rt->from have already
e9fa1495
SB
1692 * been updated.
1693 */
d4ead6b3 1694 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1695 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1696 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1697 }
e9fa1495 1698 bucket++;
f5bbe7ee
WW
1699 }
1700}
1701
b16cb459
WW
1702#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1703
8d1c802b 1704static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
b16cb459
WW
1705 struct in6_addr *gateway)
1706{
1707 struct rt6_exception_bucket *bucket;
1708 struct rt6_exception *rt6_ex;
1709 struct hlist_node *tmp;
1710 int i;
1711
1712 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1713 return;
1714
1715 spin_lock_bh(&rt6_exception_lock);
1716 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1717 lockdep_is_held(&rt6_exception_lock));
1718
1719 if (bucket) {
1720 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1721 hlist_for_each_entry_safe(rt6_ex, tmp,
1722 &bucket->chain, hlist) {
1723 struct rt6_info *entry = rt6_ex->rt6i;
1724
1725 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1726 RTF_CACHE_GATEWAY &&
1727 ipv6_addr_equal(gateway,
1728 &entry->rt6i_gateway)) {
1729 rt6_remove_exception(bucket, rt6_ex);
1730 }
1731 }
1732 bucket++;
1733 }
1734 }
1735
1736 spin_unlock_bh(&rt6_exception_lock);
1737}
1738
c757faa8
WW
1739static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1740 struct rt6_exception *rt6_ex,
1741 struct fib6_gc_args *gc_args,
1742 unsigned long now)
1743{
1744 struct rt6_info *rt = rt6_ex->rt6i;
1745
1859bac0
PA
1746 /* we are pruning and obsoleting aged-out and non gateway exceptions
1747 * even if others have still references to them, so that on next
1748 * dst_check() such references can be dropped.
1749 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1750 * expired, independently from their aging, as per RFC 8201 section 4
1751 */
31afeb42
WW
1752 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1753 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1754 RT6_TRACE("aging clone %p\n", rt);
1755 rt6_remove_exception(bucket, rt6_ex);
1756 return;
1757 }
1758 } else if (time_after(jiffies, rt->dst.expires)) {
1759 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1760 rt6_remove_exception(bucket, rt6_ex);
1761 return;
31afeb42
WW
1762 }
1763
1764 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1765 struct neighbour *neigh;
1766 __u8 neigh_flags = 0;
1767
1bfa26ff
ED
1768 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1769 if (neigh)
c757faa8 1770 neigh_flags = neigh->flags;
1bfa26ff 1771
c757faa8
WW
1772 if (!(neigh_flags & NTF_ROUTER)) {
1773 RT6_TRACE("purging route %p via non-router but gateway\n",
1774 rt);
1775 rt6_remove_exception(bucket, rt6_ex);
1776 return;
1777 }
1778 }
31afeb42 1779
c757faa8
WW
1780 gc_args->more++;
1781}
1782
8d1c802b 1783void rt6_age_exceptions(struct fib6_info *rt,
c757faa8
WW
1784 struct fib6_gc_args *gc_args,
1785 unsigned long now)
1786{
1787 struct rt6_exception_bucket *bucket;
1788 struct rt6_exception *rt6_ex;
1789 struct hlist_node *tmp;
1790 int i;
1791
1792 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1793 return;
1794
1bfa26ff
ED
1795 rcu_read_lock_bh();
1796 spin_lock(&rt6_exception_lock);
c757faa8
WW
1797 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1798 lockdep_is_held(&rt6_exception_lock));
1799
1800 if (bucket) {
1801 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1802 hlist_for_each_entry_safe(rt6_ex, tmp,
1803 &bucket->chain, hlist) {
1804 rt6_age_examine_exception(bucket, rt6_ex,
1805 gc_args, now);
1806 }
1807 bucket++;
1808 }
1809 }
1bfa26ff
ED
1810 spin_unlock(&rt6_exception_lock);
1811 rcu_read_unlock_bh();
c757faa8
WW
1812}
1813
1d053da9
DA
1814/* must be called with rcu lock held */
1815struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1816 int oif, struct flowi6 *fl6, int strict)
1da177e4 1817{
367efcb9 1818 struct fib6_node *fn, *saved_fn;
8d1c802b 1819 struct fib6_info *f6i;
1da177e4 1820
6454743b 1821 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1822 saved_fn = fn;
1da177e4 1823
ca254490
DA
1824 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1825 oif = 0;
1826
a3c00e46 1827redo_rt6_select:
23fb93a4 1828 f6i = rt6_select(net, fn, oif, strict);
23fb93a4 1829 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1830 fn = fib6_backtrack(fn, &fl6->saddr);
1831 if (fn)
1832 goto redo_rt6_select;
367efcb9
MKL
1833 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1834 /* also consider unreachable route */
1835 strict &= ~RT6_LOOKUP_F_REACHABLE;
1836 fn = saved_fn;
1837 goto redo_rt6_select;
367efcb9 1838 }
a3c00e46
MKL
1839 }
1840
d4bea421 1841 trace_fib6_table_lookup(net, f6i, table, fl6);
fb9de91e 1842
1d053da9
DA
1843 return f6i;
1844}
1845
1846struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1847 int oif, struct flowi6 *fl6,
1848 const struct sk_buff *skb, int flags)
1849{
b1d40991 1850 struct fib6_result res = {};
1d053da9
DA
1851 struct rt6_info *rt;
1852 int strict = 0;
1853
1854 strict |= flags & RT6_LOOKUP_F_IFACE;
1855 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1856 if (net->ipv6.devconf_all->forwarding == 0)
1857 strict |= RT6_LOOKUP_F_REACHABLE;
1858
1859 rcu_read_lock();
1860
b1d40991
DA
1861 res.f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1862 if (res.f6i == net->ipv6.fib6_null_entry) {
421842ed 1863 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1864 rcu_read_unlock();
d3843fe5 1865 dst_hold(&rt->dst);
d3843fe5 1866 return rt;
23fb93a4
DA
1867 }
1868
b1d40991 1869 fib6_select_path(net, &res, fl6, oif, false, skb, strict);
d83009d4 1870
23fb93a4 1871 /*Search through exception table */
b1d40991 1872 rt = rt6_find_cached_rt(res.f6i, &fl6->daddr, &fl6->saddr);
23fb93a4 1873 if (rt) {
10585b43 1874 if (ip6_hold_safe(net, &rt))
d3843fe5 1875 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1876
66f5d6ce 1877 rcu_read_unlock();
d52d3997 1878 return rt;
3da59bd9 1879 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
b1d40991 1880 !res.nh->fib_nh_gw_family)) {
3da59bd9
MKL
1881 /* Create a RTF_CACHE clone which will not be
1882 * owned by the fib6 tree. It is for the special case where
1883 * the daddr in the skb during the neighbor look-up is different
1884 * from the fl6->daddr used to look-up route here.
1885 */
3da59bd9
MKL
1886 struct rt6_info *uncached_rt;
1887
b1d40991 1888 uncached_rt = ip6_rt_cache_alloc(res.f6i, &fl6->daddr, NULL);
d52d3997 1889
4d85cd0c 1890 rcu_read_unlock();
c71099ac 1891
1cfb71ee
WW
1892 if (uncached_rt) {
1893 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1894 * No need for another dst_hold()
1895 */
8d0b94af 1896 rt6_uncached_list_add(uncached_rt);
81eb8447 1897 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1898 } else {
3da59bd9 1899 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1900 dst_hold(&uncached_rt->dst);
1901 }
b811580d 1902
3da59bd9 1903 return uncached_rt;
d52d3997
MKL
1904 } else {
1905 /* Get a percpu copy */
1906
1907 struct rt6_info *pcpu_rt;
1908
951f788a 1909 local_bh_disable();
b1d40991 1910 pcpu_rt = rt6_get_pcpu_route(res.f6i);
d52d3997 1911
93531c67 1912 if (!pcpu_rt)
b1d40991 1913 pcpu_rt = rt6_make_pcpu_route(net, res.f6i);
93531c67 1914
951f788a
ED
1915 local_bh_enable();
1916 rcu_read_unlock();
d4bea421 1917
d52d3997
MKL
1918 return pcpu_rt;
1919 }
1da177e4 1920}
9ff74384 1921EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1922
b75cc8f9
DA
1923static struct rt6_info *ip6_pol_route_input(struct net *net,
1924 struct fib6_table *table,
1925 struct flowi6 *fl6,
1926 const struct sk_buff *skb,
1927 int flags)
4acad72d 1928{
b75cc8f9 1929 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1930}
1931
d409b847
MB
1932struct dst_entry *ip6_route_input_lookup(struct net *net,
1933 struct net_device *dev,
b75cc8f9
DA
1934 struct flowi6 *fl6,
1935 const struct sk_buff *skb,
1936 int flags)
72331bc0
SL
1937{
1938 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1939 flags |= RT6_LOOKUP_F_IFACE;
1940
b75cc8f9 1941 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1942}
d409b847 1943EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1944
23aebdac 1945static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1946 struct flow_keys *keys,
1947 struct flow_keys *flkeys)
23aebdac
JS
1948{
1949 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1950 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1951 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1952 const struct ipv6hdr *inner_iph;
1953 const struct icmp6hdr *icmph;
1954 struct ipv6hdr _inner_iph;
cea67a2d 1955 struct icmp6hdr _icmph;
23aebdac
JS
1956
1957 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1958 goto out;
1959
cea67a2d
ED
1960 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1961 sizeof(_icmph), &_icmph);
1962 if (!icmph)
1963 goto out;
1964
23aebdac
JS
1965 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1966 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1967 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1968 icmph->icmp6_type != ICMPV6_PARAMPROB)
1969 goto out;
1970
1971 inner_iph = skb_header_pointer(skb,
1972 skb_transport_offset(skb) + sizeof(*icmph),
1973 sizeof(_inner_iph), &_inner_iph);
1974 if (!inner_iph)
1975 goto out;
1976
1977 key_iph = inner_iph;
5e5d6fed 1978 _flkeys = NULL;
23aebdac 1979out:
5e5d6fed
RP
1980 if (_flkeys) {
1981 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1982 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1983 keys->tags.flow_label = _flkeys->tags.flow_label;
1984 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1985 } else {
1986 keys->addrs.v6addrs.src = key_iph->saddr;
1987 keys->addrs.v6addrs.dst = key_iph->daddr;
fa1be7e0 1988 keys->tags.flow_label = ip6_flowlabel(key_iph);
5e5d6fed
RP
1989 keys->basic.ip_proto = key_iph->nexthdr;
1990 }
23aebdac
JS
1991}
1992
1993/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
1994u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1995 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
1996{
1997 struct flow_keys hash_keys;
9a2a537a 1998 u32 mhash;
23aebdac 1999
bbfa047a 2000 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
2001 case 0:
2002 memset(&hash_keys, 0, sizeof(hash_keys));
2003 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2004 if (skb) {
2005 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2006 } else {
2007 hash_keys.addrs.v6addrs.src = fl6->saddr;
2008 hash_keys.addrs.v6addrs.dst = fl6->daddr;
fa1be7e0 2009 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
b4bac172
DA
2010 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2011 }
2012 break;
2013 case 1:
2014 if (skb) {
2015 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2016 struct flow_keys keys;
2017
2018 /* short-circuit if we already have L4 hash present */
2019 if (skb->l4_hash)
2020 return skb_get_hash_raw(skb) >> 1;
2021
2022 memset(&hash_keys, 0, sizeof(hash_keys));
2023
2024 if (!flkeys) {
2025 skb_flow_dissect_flow_keys(skb, &keys, flag);
2026 flkeys = &keys;
2027 }
2028 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2029 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2030 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2031 hash_keys.ports.src = flkeys->ports.src;
2032 hash_keys.ports.dst = flkeys->ports.dst;
2033 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2034 } else {
2035 memset(&hash_keys, 0, sizeof(hash_keys));
2036 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2037 hash_keys.addrs.v6addrs.src = fl6->saddr;
2038 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2039 hash_keys.ports.src = fl6->fl6_sport;
2040 hash_keys.ports.dst = fl6->fl6_dport;
2041 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2042 }
2043 break;
23aebdac 2044 }
9a2a537a 2045 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2046
9a2a537a 2047 return mhash >> 1;
23aebdac
JS
2048}
2049
c71099ac
TG
2050void ip6_route_input(struct sk_buff *skb)
2051{
b71d1d42 2052 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2053 struct net *net = dev_net(skb->dev);
adaa70bb 2054 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2055 struct ip_tunnel_info *tun_info;
4c9483b2 2056 struct flowi6 fl6 = {
e0d56fdd 2057 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2058 .daddr = iph->daddr,
2059 .saddr = iph->saddr,
6502ca52 2060 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2061 .flowi6_mark = skb->mark,
2062 .flowi6_proto = iph->nexthdr,
c71099ac 2063 };
5e5d6fed 2064 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2065
904af04d 2066 tun_info = skb_tunnel_info(skb);
46fa062a 2067 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2068 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2069
2070 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2071 flkeys = &_flkeys;
2072
23aebdac 2073 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2074 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2075 skb_dst_drop(skb);
b75cc8f9
DA
2076 skb_dst_set(skb,
2077 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2078}
2079
b75cc8f9
DA
2080static struct rt6_info *ip6_pol_route_output(struct net *net,
2081 struct fib6_table *table,
2082 struct flowi6 *fl6,
2083 const struct sk_buff *skb,
2084 int flags)
1da177e4 2085{
b75cc8f9 2086 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2087}
2088
6f21c96a
PA
2089struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2090 struct flowi6 *fl6, int flags)
c71099ac 2091{
d46a9d67 2092 bool any_src;
c71099ac 2093
3ede0bbc
RS
2094 if (ipv6_addr_type(&fl6->daddr) &
2095 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
4c1feac5
DA
2096 struct dst_entry *dst;
2097
2098 dst = l3mdev_link_scope_lookup(net, fl6);
2099 if (dst)
2100 return dst;
2101 }
ca254490 2102
1fb9489b 2103 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2104
d46a9d67 2105 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2106 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2107 (fl6->flowi6_oif && any_src))
77d16f45 2108 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2109
d46a9d67 2110 if (!any_src)
adaa70bb 2111 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2112 else if (sk)
2113 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2114
b75cc8f9 2115 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2116}
6f21c96a 2117EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2118
2774c131 2119struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2120{
5c1e6aa3 2121 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2122 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2123 struct dst_entry *new = NULL;
2124
1dbe3252 2125 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2126 DST_OBSOLETE_DEAD, 0);
14e50e57 2127 if (rt) {
0a1f5962 2128 rt6_info_init(rt);
81eb8447 2129 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2130
0a1f5962 2131 new = &rt->dst;
14e50e57 2132 new->__use = 1;
352e512c 2133 new->input = dst_discard;
ede2059d 2134 new->output = dst_discard_out;
14e50e57 2135
0a1f5962 2136 dst_copy_metrics(new, &ort->dst);
14e50e57 2137
1dbe3252 2138 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2139 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2140 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2141
2142 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2143#ifdef CONFIG_IPV6_SUBTREES
2144 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2145#endif
14e50e57
DM
2146 }
2147
69ead7af
DM
2148 dst_release(dst_orig);
2149 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2150}
14e50e57 2151
1da177e4
LT
2152/*
2153 * Destination cache support functions
2154 */
2155
8d1c802b 2156static bool fib6_check(struct fib6_info *f6i, u32 cookie)
4b32b5ad 2157{
93531c67
DA
2158 u32 rt_cookie = 0;
2159
8ae86971 2160 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
93531c67
DA
2161 return false;
2162
2163 if (fib6_check_expired(f6i))
2164 return false;
2165
2166 return true;
4b32b5ad
MKL
2167}
2168
a68886a6
DA
2169static struct dst_entry *rt6_check(struct rt6_info *rt,
2170 struct fib6_info *from,
2171 u32 cookie)
3da59bd9 2172{
36143645 2173 u32 rt_cookie = 0;
c5cff856 2174
a68886a6 2175 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
93531c67 2176 rt_cookie != cookie)
3da59bd9
MKL
2177 return NULL;
2178
2179 if (rt6_check_expired(rt))
2180 return NULL;
2181
2182 return &rt->dst;
2183}
2184
a68886a6
DA
2185static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2186 struct fib6_info *from,
2187 u32 cookie)
3da59bd9 2188{
5973fb1e
MKL
2189 if (!__rt6_check_expired(rt) &&
2190 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2191 fib6_check(from, cookie))
3da59bd9
MKL
2192 return &rt->dst;
2193 else
2194 return NULL;
2195}
2196
1da177e4
LT
2197static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2198{
a87b7dc9 2199 struct dst_entry *dst_ret;
a68886a6 2200 struct fib6_info *from;
1da177e4
LT
2201 struct rt6_info *rt;
2202
a87b7dc9
DA
2203 rt = container_of(dst, struct rt6_info, dst);
2204
2205 rcu_read_lock();
1da177e4 2206
6f3118b5
ND
2207 /* All IPV6 dsts are created with ->obsolete set to the value
2208 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2209 * into this function always.
2210 */
e3bc10bd 2211
a68886a6 2212 from = rcu_dereference(rt->from);
4b32b5ad 2213
a68886a6
DA
2214 if (from && (rt->rt6i_flags & RTF_PCPU ||
2215 unlikely(!list_empty(&rt->rt6i_uncached))))
2216 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2217 else
a68886a6 2218 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2219
2220 rcu_read_unlock();
2221
2222 return dst_ret;
1da177e4
LT
2223}
2224
2225static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2226{
2227 struct rt6_info *rt = (struct rt6_info *) dst;
2228
2229 if (rt) {
54c1a859 2230 if (rt->rt6i_flags & RTF_CACHE) {
c3c14da0 2231 rcu_read_lock();
54c1a859 2232 if (rt6_check_expired(rt)) {
93531c67 2233 rt6_remove_exception_rt(rt);
54c1a859
YH
2234 dst = NULL;
2235 }
c3c14da0 2236 rcu_read_unlock();
54c1a859 2237 } else {
1da177e4 2238 dst_release(dst);
54c1a859
YH
2239 dst = NULL;
2240 }
1da177e4 2241 }
54c1a859 2242 return dst;
1da177e4
LT
2243}
2244
2245static void ip6_link_failure(struct sk_buff *skb)
2246{
2247 struct rt6_info *rt;
2248
3ffe533c 2249 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2250
adf30907 2251 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2252 if (rt) {
8a14e46f 2253 rcu_read_lock();
1eb4f758 2254 if (rt->rt6i_flags & RTF_CACHE) {
761f6026 2255 rt6_remove_exception_rt(rt);
c5cff856 2256 } else {
a68886a6 2257 struct fib6_info *from;
c5cff856
WW
2258 struct fib6_node *fn;
2259
a68886a6
DA
2260 from = rcu_dereference(rt->from);
2261 if (from) {
2262 fn = rcu_dereference(from->fib6_node);
2263 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2264 fn->fn_sernum = -1;
2265 }
1eb4f758 2266 }
8a14e46f 2267 rcu_read_unlock();
1da177e4
LT
2268 }
2269}
2270
6a3e030f
DA
2271static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2272{
a68886a6
DA
2273 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2274 struct fib6_info *from;
2275
2276 rcu_read_lock();
2277 from = rcu_dereference(rt0->from);
2278 if (from)
2279 rt0->dst.expires = from->expires;
2280 rcu_read_unlock();
2281 }
6a3e030f
DA
2282
2283 dst_set_expires(&rt0->dst, timeout);
2284 rt0->rt6i_flags |= RTF_EXPIRES;
2285}
2286
45e4fd26
MKL
2287static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2288{
2289 struct net *net = dev_net(rt->dst.dev);
2290
d4ead6b3 2291 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2292 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2293 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2294}
2295
0d3f6d29
MKL
2296static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2297{
2298 return !(rt->rt6i_flags & RTF_CACHE) &&
1490ed2a 2299 (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
0d3f6d29
MKL
2300}
2301
45e4fd26
MKL
2302static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2303 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2304{
0dec879f 2305 const struct in6_addr *daddr, *saddr;
67ba4152 2306 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2307
19bda36c
XL
2308 if (dst_metric_locked(dst, RTAX_MTU))
2309 return;
2310
0dec879f
JA
2311 if (iph) {
2312 daddr = &iph->daddr;
2313 saddr = &iph->saddr;
2314 } else if (sk) {
2315 daddr = &sk->sk_v6_daddr;
2316 saddr = &inet6_sk(sk)->saddr;
2317 } else {
2318 daddr = NULL;
2319 saddr = NULL;
2320 }
2321 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2322 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2323 if (mtu >= dst_mtu(dst))
2324 return;
9d289715 2325
0d3f6d29 2326 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2327 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2328 /* update rt6_ex->stamp for cache */
2329 if (rt6->rt6i_flags & RTF_CACHE)
2330 rt6_update_exception_stamp_rt(rt6);
0dec879f 2331 } else if (daddr) {
a68886a6 2332 struct fib6_info *from;
45e4fd26
MKL
2333 struct rt6_info *nrt6;
2334
4d85cd0c 2335 rcu_read_lock();
a68886a6 2336 from = rcu_dereference(rt6->from);
9c69a132
JL
2337 if (!from) {
2338 rcu_read_unlock();
2339 return;
2340 }
a68886a6 2341 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
45e4fd26
MKL
2342 if (nrt6) {
2343 rt6_do_update_pmtu(nrt6, mtu);
a68886a6 2344 if (rt6_insert_exception(nrt6, from))
2b760fcf 2345 dst_release_immediate(&nrt6->dst);
45e4fd26 2346 }
a68886a6 2347 rcu_read_unlock();
1da177e4
LT
2348 }
2349}
2350
45e4fd26
MKL
2351static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2352 struct sk_buff *skb, u32 mtu)
2353{
2354 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2355}
2356
42ae66c8 2357void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2358 int oif, u32 mark, kuid_t uid)
81aded24
DM
2359{
2360 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2361 struct dst_entry *dst;
dc92095d
2362 struct flowi6 fl6 = {
2363 .flowi6_oif = oif,
2364 .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2365 .daddr = iph->daddr,
2366 .saddr = iph->saddr,
2367 .flowlabel = ip6_flowinfo(iph),
2368 .flowi6_uid = uid,
2369 };
81aded24
DM
2370
2371 dst = ip6_route_output(net, NULL, &fl6);
2372 if (!dst->error)
45e4fd26 2373 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2374 dst_release(dst);
2375}
2376EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2377
2378void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2379{
7ddacfa5 2380 int oif = sk->sk_bound_dev_if;
33c162a9
MKL
2381 struct dst_entry *dst;
2382
7ddacfa5
DA
2383 if (!oif && skb->dev)
2384 oif = l3mdev_master_ifindex(skb->dev);
2385
2386 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2387
2388 dst = __sk_dst_get(sk);
2389 if (!dst || !dst->obsolete ||
2390 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2391 return;
2392
2393 bh_lock_sock(sk);
2394 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2395 ip6_datagram_dst_update(sk, false);
2396 bh_unlock_sock(sk);
81aded24
DM
2397}
2398EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2399
7d6850f7
AK
2400void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2401 const struct flowi6 *fl6)
2402{
2403#ifdef CONFIG_IPV6_SUBTREES
2404 struct ipv6_pinfo *np = inet6_sk(sk);
2405#endif
2406
2407 ip6_dst_store(sk, dst,
2408 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2409 &sk->sk_v6_daddr : NULL,
2410#ifdef CONFIG_IPV6_SUBTREES
2411 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2412 &np->saddr :
2413#endif
2414 NULL);
2415}
2416
0b34eb00
DA
2417static bool ip6_redirect_nh_match(struct fib6_info *f6i,
2418 struct fib6_nh *nh,
2419 struct flowi6 *fl6,
2420 const struct in6_addr *gw,
2421 struct rt6_info **ret)
2422{
2423 if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
2424 fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
2425 return false;
2426
2427 /* rt_cache's gateway might be different from its 'parent'
2428 * in the case of an ip redirect.
2429 * So we keep searching in the exception table if the gateway
2430 * is different.
2431 */
2432 if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
2433 struct rt6_info *rt_cache;
2434
2435 rt_cache = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
2436 if (rt_cache &&
2437 ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
2438 *ret = rt_cache;
2439 return true;
2440 }
2441 return false;
2442 }
2443 return true;
2444}
2445
b55b76b2
DJ
2446/* Handle redirects */
2447struct ip6rd_flowi {
2448 struct flowi6 fl6;
2449 struct in6_addr gateway;
2450};
2451
2452static struct rt6_info *__ip6_route_redirect(struct net *net,
2453 struct fib6_table *table,
2454 struct flowi6 *fl6,
b75cc8f9 2455 const struct sk_buff *skb,
b55b76b2
DJ
2456 int flags)
2457{
2458 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
0b34eb00 2459 struct rt6_info *ret = NULL;
8d1c802b 2460 struct fib6_info *rt;
b55b76b2
DJ
2461 struct fib6_node *fn;
2462
2463 /* Get the "current" route for this destination and
67c408cf 2464 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2465 *
2466 * RFC 4861 specifies that redirects should only be
2467 * accepted if they come from the nexthop to the target.
2468 * Due to the way the routes are chosen, this notion
2469 * is a bit fuzzy and one might need to check all possible
2470 * routes.
2471 */
2472
66f5d6ce 2473 rcu_read_lock();
6454743b 2474 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
b55b76b2 2475restart:
66f5d6ce 2476 for_each_fib6_node_rt_rcu(fn) {
14895687 2477 if (fib6_check_expired(rt))
b55b76b2 2478 continue;
93c2fb25 2479 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2480 break;
0b34eb00
DA
2481 if (ip6_redirect_nh_match(rt, &rt->fib6_nh, fl6,
2482 &rdfl->gateway, &ret))
2483 goto out;
b55b76b2
DJ
2484 }
2485
2486 if (!rt)
421842ed 2487 rt = net->ipv6.fib6_null_entry;
93c2fb25 2488 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2489 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2490 goto out;
2491 }
2492
421842ed 2493 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2494 fn = fib6_backtrack(fn, &fl6->saddr);
2495 if (fn)
2496 goto restart;
b55b76b2 2497 }
a3c00e46 2498
b0a1ba59 2499out:
23fb93a4 2500 if (ret)
10585b43 2501 ip6_hold_safe(net, &ret);
23fb93a4
DA
2502 else
2503 ret = ip6_create_rt_rcu(rt);
b55b76b2 2504
66f5d6ce 2505 rcu_read_unlock();
b55b76b2 2506
b65f164d 2507 trace_fib6_table_lookup(net, rt, table, fl6);
23fb93a4 2508 return ret;
b55b76b2
DJ
2509};
2510
2511static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2512 const struct flowi6 *fl6,
2513 const struct sk_buff *skb,
2514 const struct in6_addr *gateway)
b55b76b2
DJ
2515{
2516 int flags = RT6_LOOKUP_F_HAS_SADDR;
2517 struct ip6rd_flowi rdfl;
2518
2519 rdfl.fl6 = *fl6;
2520 rdfl.gateway = *gateway;
2521
b75cc8f9 2522 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2523 flags, __ip6_route_redirect);
2524}
2525
e2d118a1
LC
2526void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2527 kuid_t uid)
3a5ad2ee
DM
2528{
2529 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2530 struct dst_entry *dst;
1f7f10ac
2531 struct flowi6 fl6 = {
2532 .flowi6_iif = LOOPBACK_IFINDEX,
2533 .flowi6_oif = oif,
2534 .flowi6_mark = mark,
2535 .daddr = iph->daddr,
2536 .saddr = iph->saddr,
2537 .flowlabel = ip6_flowinfo(iph),
2538 .flowi6_uid = uid,
2539 };
3a5ad2ee 2540
b75cc8f9 2541 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2542 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2543 dst_release(dst);
2544}
2545EXPORT_SYMBOL_GPL(ip6_redirect);
2546
d456336d 2547void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
c92a59ec
DJ
2548{
2549 const struct ipv6hdr *iph = ipv6_hdr(skb);
2550 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2551 struct dst_entry *dst;
0b26fb17
2552 struct flowi6 fl6 = {
2553 .flowi6_iif = LOOPBACK_IFINDEX,
2554 .flowi6_oif = oif,
0b26fb17
2555 .daddr = msg->dest,
2556 .saddr = iph->daddr,
2557 .flowi6_uid = sock_net_uid(net, NULL),
2558 };
c92a59ec 2559
b75cc8f9 2560 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2561 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2562 dst_release(dst);
2563}
2564
3a5ad2ee
DM
2565void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2566{
e2d118a1
LC
2567 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2568 sk->sk_uid);
3a5ad2ee
DM
2569}
2570EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2571
0dbaee3b 2572static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2573{
0dbaee3b
DM
2574 struct net_device *dev = dst->dev;
2575 unsigned int mtu = dst_mtu(dst);
2576 struct net *net = dev_net(dev);
2577
1da177e4
LT
2578 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2579
5578689a
DL
2580 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2581 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2582
2583 /*
1ab1457c
YH
2584 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2585 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2586 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2587 * rely only on pmtu discovery"
2588 */
2589 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2590 mtu = IPV6_MAXPLEN;
2591 return mtu;
2592}
2593
ebb762f2 2594static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2595{
d33e4553 2596 struct inet6_dev *idev;
d4ead6b3 2597 unsigned int mtu;
4b32b5ad
MKL
2598
2599 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2600 if (mtu)
30f78d8e 2601 goto out;
618f9bc7
SK
2602
2603 mtu = IPV6_MIN_MTU;
d33e4553
DM
2604
2605 rcu_read_lock();
2606 idev = __in6_dev_get(dst->dev);
2607 if (idev)
2608 mtu = idev->cnf.mtu6;
2609 rcu_read_unlock();
2610
30f78d8e 2611out:
14972cbd
RP
2612 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2613
2614 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2615}
2616
901731b8
DA
2617/* MTU selection:
2618 * 1. mtu on route is locked - use it
2619 * 2. mtu from nexthop exception
2620 * 3. mtu from egress device
2621 *
2622 * based on ip6_dst_mtu_forward and exception logic of
2623 * rt6_find_cached_rt; called with rcu_read_lock
2624 */
2625u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2626 struct in6_addr *saddr)
2627{
2628 struct rt6_exception_bucket *bucket;
2629 struct rt6_exception *rt6_ex;
2630 struct in6_addr *src_key;
2631 struct inet6_dev *idev;
2632 u32 mtu = 0;
2633
2634 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2635 mtu = f6i->fib6_pmtu;
2636 if (mtu)
2637 goto out;
2638 }
2639
2640 src_key = NULL;
2641#ifdef CONFIG_IPV6_SUBTREES
2642 if (f6i->fib6_src.plen)
2643 src_key = saddr;
2644#endif
2645
2646 bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2647 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2648 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2649 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2650
2651 if (likely(!mtu)) {
2652 struct net_device *dev = fib6_info_nh_dev(f6i);
2653
2654 mtu = IPV6_MIN_MTU;
2655 idev = __in6_dev_get(dev);
2656 if (idev && idev->cnf.mtu6 > mtu)
2657 mtu = idev->cnf.mtu6;
2658 }
2659
2660 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2661out:
2662 return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2663}
2664
3b00944c 2665struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2666 struct flowi6 *fl6)
1da177e4 2667{
87a11578 2668 struct dst_entry *dst;
1da177e4
LT
2669 struct rt6_info *rt;
2670 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2671 struct net *net = dev_net(dev);
1da177e4 2672
38308473 2673 if (unlikely(!idev))
122bdf67 2674 return ERR_PTR(-ENODEV);
1da177e4 2675
ad706862 2676 rt = ip6_dst_alloc(net, dev, 0);
38308473 2677 if (unlikely(!rt)) {
1da177e4 2678 in6_dev_put(idev);
87a11578 2679 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2680 goto out;
2681 }
2682
8e2ec639 2683 rt->dst.flags |= DST_HOST;
588753f1 2684 rt->dst.input = ip6_input;
8e2ec639 2685 rt->dst.output = ip6_output;
550bab42 2686 rt->rt6i_gateway = fl6->daddr;
87a11578 2687 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2688 rt->rt6i_dst.plen = 128;
2689 rt->rt6i_idev = idev;
14edd87d 2690 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2691
4c981e28 2692 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2693 * do proper release of the net_device
2694 */
2695 rt6_uncached_list_add(rt);
81eb8447 2696 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2697
87a11578
DM
2698 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2699
1da177e4 2700out:
87a11578 2701 return dst;
1da177e4
LT
2702}
2703
569d3645 2704static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2705{
86393e52 2706 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2707 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2708 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2709 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2710 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2711 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2712 int entries;
7019b78e 2713
fc66f95c 2714 entries = dst_entries_get_fast(ops);
49a18d86 2715 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2716 entries <= rt_max_size)
1da177e4
LT
2717 goto out;
2718
6891a346 2719 net->ipv6.ip6_rt_gc_expire++;
14956643 2720 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2721 entries = dst_entries_get_slow(ops);
2722 if (entries < ops->gc_thresh)
7019b78e 2723 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2724out:
7019b78e 2725 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2726 return entries > rt_max_size;
1da177e4
LT
2727}
2728
8c14586f
DA
2729static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2730 struct fib6_config *cfg,
f4797b33
DA
2731 const struct in6_addr *gw_addr,
2732 u32 tbid, int flags)
8c14586f
DA
2733{
2734 struct flowi6 fl6 = {
2735 .flowi6_oif = cfg->fc_ifindex,
2736 .daddr = *gw_addr,
2737 .saddr = cfg->fc_prefsrc,
2738 };
2739 struct fib6_table *table;
2740 struct rt6_info *rt;
8c14586f 2741
f4797b33 2742 table = fib6_get_table(net, tbid);
8c14586f
DA
2743 if (!table)
2744 return NULL;
2745
2746 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2747 flags |= RT6_LOOKUP_F_HAS_SADDR;
2748
f4797b33 2749 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2750 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2751
2752 /* if table lookup failed, fall back to full lookup */
2753 if (rt == net->ipv6.ip6_null_entry) {
2754 ip6_rt_put(rt);
2755 rt = NULL;
2756 }
2757
2758 return rt;
2759}
2760
fc1e64e1
DA
2761static int ip6_route_check_nh_onlink(struct net *net,
2762 struct fib6_config *cfg,
9fbb704c 2763 const struct net_device *dev,
fc1e64e1
DA
2764 struct netlink_ext_ack *extack)
2765{
44750f84 2766 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2767 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2768 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
bf1dc8ba 2769 struct fib6_info *from;
fc1e64e1
DA
2770 struct rt6_info *grt;
2771 int err;
2772
2773 err = 0;
2774 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2775 if (grt) {
bf1dc8ba
PA
2776 rcu_read_lock();
2777 from = rcu_dereference(grt->from);
58e354c0 2778 if (!grt->dst.error &&
4ed591c8 2779 /* ignore match if it is the default route */
bf1dc8ba 2780 from && !ipv6_addr_any(&from->fib6_dst.addr) &&
58e354c0 2781 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2782 NL_SET_ERR_MSG(extack,
2783 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2784 err = -EINVAL;
2785 }
bf1dc8ba 2786 rcu_read_unlock();
fc1e64e1
DA
2787
2788 ip6_rt_put(grt);
2789 }
2790
2791 return err;
2792}
2793
1edce99f
DA
2794static int ip6_route_check_nh(struct net *net,
2795 struct fib6_config *cfg,
2796 struct net_device **_dev,
2797 struct inet6_dev **idev)
2798{
2799 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2800 struct net_device *dev = _dev ? *_dev : NULL;
2801 struct rt6_info *grt = NULL;
2802 int err = -EHOSTUNREACH;
2803
2804 if (cfg->fc_table) {
f4797b33
DA
2805 int flags = RT6_LOOKUP_F_IFACE;
2806
2807 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2808 cfg->fc_table, flags);
1edce99f
DA
2809 if (grt) {
2810 if (grt->rt6i_flags & RTF_GATEWAY ||
2811 (dev && dev != grt->dst.dev)) {
2812 ip6_rt_put(grt);
2813 grt = NULL;
2814 }
2815 }
2816 }
2817
2818 if (!grt)
b75cc8f9 2819 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2820
2821 if (!grt)
2822 goto out;
2823
2824 if (dev) {
2825 if (dev != grt->dst.dev) {
2826 ip6_rt_put(grt);
2827 goto out;
2828 }
2829 } else {
2830 *_dev = dev = grt->dst.dev;
2831 *idev = grt->rt6i_idev;
2832 dev_hold(dev);
2833 in6_dev_hold(grt->rt6i_idev);
2834 }
2835
2836 if (!(grt->rt6i_flags & RTF_GATEWAY))
2837 err = 0;
2838
2839 ip6_rt_put(grt);
2840
2841out:
2842 return err;
2843}
2844
9fbb704c
DA
2845static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2846 struct net_device **_dev, struct inet6_dev **idev,
2847 struct netlink_ext_ack *extack)
2848{
2849 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2850 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2851 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2852 const struct net_device *dev = *_dev;
232378e8 2853 bool need_addr_check = !dev;
9fbb704c
DA
2854 int err = -EINVAL;
2855
2856 /* if gw_addr is local we will fail to detect this in case
2857 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2858 * will return already-added prefix route via interface that
2859 * prefix route was assigned to, which might be non-loopback.
2860 */
232378e8
DA
2861 if (dev &&
2862 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2863 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2864 goto out;
2865 }
2866
2867 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2868 /* IPv6 strictly inhibits using not link-local
2869 * addresses as nexthop address.
2870 * Otherwise, router will not able to send redirects.
2871 * It is very good, but in some (rare!) circumstances
2872 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2873 * some exceptions. --ANK
2874 * We allow IPv4-mapped nexthops to support RFC4798-type
2875 * addressing
2876 */
2877 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2878 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2879 goto out;
2880 }
2881
2882 if (cfg->fc_flags & RTNH_F_ONLINK)
2883 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2884 else
2885 err = ip6_route_check_nh(net, cfg, _dev, idev);
2886
2887 if (err)
2888 goto out;
2889 }
2890
2891 /* reload in case device was changed */
2892 dev = *_dev;
2893
2894 err = -EINVAL;
2895 if (!dev) {
2896 NL_SET_ERR_MSG(extack, "Egress device not specified");
2897 goto out;
2898 } else if (dev->flags & IFF_LOOPBACK) {
2899 NL_SET_ERR_MSG(extack,
2900 "Egress device can not be loopback device for this route");
2901 goto out;
2902 }
232378e8
DA
2903
2904 /* if we did not check gw_addr above, do so now that the
2905 * egress device has been resolved.
2906 */
2907 if (need_addr_check &&
2908 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2909 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2910 goto out;
2911 }
2912
9fbb704c
DA
2913 err = 0;
2914out:
2915 return err;
2916}
2917
83c44251
DA
2918static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
2919{
2920 if ((flags & RTF_REJECT) ||
2921 (dev && (dev->flags & IFF_LOOPBACK) &&
2922 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2923 !(flags & RTF_LOCAL)))
2924 return true;
2925
2926 return false;
2927}
2928
2929int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
2930 struct fib6_config *cfg, gfp_t gfp_flags,
2931 struct netlink_ext_ack *extack)
2932{
2933 struct net_device *dev = NULL;
2934 struct inet6_dev *idev = NULL;
2935 int addr_type;
2936 int err;
2937
f1741730
DA
2938 fib6_nh->fib_nh_family = AF_INET6;
2939
83c44251
DA
2940 err = -ENODEV;
2941 if (cfg->fc_ifindex) {
2942 dev = dev_get_by_index(net, cfg->fc_ifindex);
2943 if (!dev)
2944 goto out;
2945 idev = in6_dev_get(dev);
2946 if (!idev)
2947 goto out;
2948 }
2949
2950 if (cfg->fc_flags & RTNH_F_ONLINK) {
2951 if (!dev) {
2952 NL_SET_ERR_MSG(extack,
2953 "Nexthop device required for onlink");
2954 goto out;
2955 }
2956
2957 if (!(dev->flags & IFF_UP)) {
2958 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2959 err = -ENETDOWN;
2960 goto out;
2961 }
2962
ad1601ae 2963 fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
83c44251
DA
2964 }
2965
ad1601ae 2966 fib6_nh->fib_nh_weight = 1;
83c44251
DA
2967
2968 /* We cannot add true routes via loopback here,
2969 * they would result in kernel looping; promote them to reject routes
2970 */
2971 addr_type = ipv6_addr_type(&cfg->fc_dst);
2972 if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
2973 /* hold loopback dev/idev if we haven't done so. */
2974 if (dev != net->loopback_dev) {
2975 if (dev) {
2976 dev_put(dev);
2977 in6_dev_put(idev);
2978 }
2979 dev = net->loopback_dev;
2980 dev_hold(dev);
2981 idev = in6_dev_get(dev);
2982 if (!idev) {
2983 err = -ENODEV;
2984 goto out;
2985 }
2986 }
2987 goto set_dev;
2988 }
2989
2990 if (cfg->fc_flags & RTF_GATEWAY) {
2991 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
2992 if (err)
2993 goto out;
2994
ad1601ae 2995 fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
bdf00467 2996 fib6_nh->fib_nh_gw_family = AF_INET6;
83c44251
DA
2997 }
2998
2999 err = -ENODEV;
3000 if (!dev)
3001 goto out;
3002
3003 if (idev->cnf.disable_ipv6) {
3004 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3005 err = -EACCES;
3006 goto out;
3007 }
3008
3009 if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
3010 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3011 err = -ENETDOWN;
3012 goto out;
3013 }
3014
3015 if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3016 !netif_carrier_ok(dev))
ad1601ae 3017 fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
83c44251 3018
979e276e
DA
3019 err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
3020 cfg->fc_encap_type, cfg, gfp_flags, extack);
3021 if (err)
3022 goto out;
83c44251 3023set_dev:
ad1601ae 3024 fib6_nh->fib_nh_dev = dev;
f1741730 3025 fib6_nh->fib_nh_oif = dev->ifindex;
83c44251
DA
3026 err = 0;
3027out:
3028 if (idev)
3029 in6_dev_put(idev);
3030
3031 if (err) {
ad1601ae
DA
3032 lwtstate_put(fib6_nh->fib_nh_lws);
3033 fib6_nh->fib_nh_lws = NULL;
83c44251
DA
3034 if (dev)
3035 dev_put(dev);
3036 }
3037
3038 return err;
3039}
3040
dac7d0f2
DA
3041void fib6_nh_release(struct fib6_nh *fib6_nh)
3042{
979e276e 3043 fib_nh_common_release(&fib6_nh->nh_common);
dac7d0f2
DA
3044}
3045
8d1c802b 3046static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 3047 gfp_t gfp_flags,
333c4301 3048 struct netlink_ext_ack *extack)
1da177e4 3049{
5578689a 3050 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 3051 struct fib6_info *rt = NULL;
c71099ac 3052 struct fib6_table *table;
8c5b83f0 3053 int err = -EINVAL;
83c44251 3054 int addr_type;
1da177e4 3055
557c44be 3056 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
3057 if (cfg->fc_flags & RTF_PCPU) {
3058 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 3059 goto out;
d5d531cb 3060 }
557c44be 3061
2ea2352e
WW
3062 /* RTF_CACHE is an internal flag; can not be set by userspace */
3063 if (cfg->fc_flags & RTF_CACHE) {
3064 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3065 goto out;
3066 }
3067
e8478e80
DA
3068 if (cfg->fc_type > RTN_MAX) {
3069 NL_SET_ERR_MSG(extack, "Invalid route type");
3070 goto out;
3071 }
3072
d5d531cb
DA
3073 if (cfg->fc_dst_len > 128) {
3074 NL_SET_ERR_MSG(extack, "Invalid prefix length");
3075 goto out;
3076 }
3077 if (cfg->fc_src_len > 128) {
3078 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 3079 goto out;
d5d531cb 3080 }
1da177e4 3081#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
3082 if (cfg->fc_src_len) {
3083 NL_SET_ERR_MSG(extack,
3084 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 3085 goto out;
d5d531cb 3086 }
1da177e4 3087#endif
fc1e64e1 3088
d71314b4 3089 err = -ENOBUFS;
38308473
DM
3090 if (cfg->fc_nlinfo.nlh &&
3091 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 3092 table = fib6_get_table(net, cfg->fc_table);
38308473 3093 if (!table) {
f3213831 3094 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
3095 table = fib6_new_table(net, cfg->fc_table);
3096 }
3097 } else {
3098 table = fib6_new_table(net, cfg->fc_table);
3099 }
38308473
DM
3100
3101 if (!table)
c71099ac 3102 goto out;
c71099ac 3103
93531c67
DA
3104 err = -ENOMEM;
3105 rt = fib6_info_alloc(gfp_flags);
3106 if (!rt)
1da177e4 3107 goto out;
93531c67 3108
d7e774f3
DA
3109 rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3110 extack);
767a2217
DA
3111 if (IS_ERR(rt->fib6_metrics)) {
3112 err = PTR_ERR(rt->fib6_metrics);
fda21d46
ED
3113 /* Do not leave garbage there. */
3114 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
767a2217
DA
3115 goto out;
3116 }
3117
93531c67
DA
3118 if (cfg->fc_flags & RTF_ADDRCONF)
3119 rt->dst_nocount = true;
1da177e4 3120
1716a961 3121 if (cfg->fc_flags & RTF_EXPIRES)
14895687 3122 fib6_set_expires(rt, jiffies +
1716a961
G
3123 clock_t_to_jiffies(cfg->fc_expires));
3124 else
14895687 3125 fib6_clean_expires(rt);
1da177e4 3126
86872cb5
TG
3127 if (cfg->fc_protocol == RTPROT_UNSPEC)
3128 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 3129 rt->fib6_protocol = cfg->fc_protocol;
86872cb5 3130
83c44251
DA
3131 rt->fib6_table = table;
3132 rt->fib6_metric = cfg->fc_metric;
3133 rt->fib6_type = cfg->fc_type;
2b2450ca 3134 rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
19e42e45 3135
93c2fb25
DA
3136 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3137 rt->fib6_dst.plen = cfg->fc_dst_len;
3138 if (rt->fib6_dst.plen == 128)
3b6761d1 3139 rt->dst_host = true;
e5fd387a 3140
1da177e4 3141#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
3142 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3143 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4 3144#endif
83c44251
DA
3145 err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
3146 if (err)
3147 goto out;
1da177e4
LT
3148
3149 /* We cannot add true routes via loopback here,
83c44251 3150 * they would result in kernel looping; promote them to reject routes
1da177e4 3151 */
83c44251 3152 addr_type = ipv6_addr_type(&cfg->fc_dst);
ad1601ae 3153 if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
83c44251 3154 rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
955ec4cb 3155
c3968a85 3156 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
83c44251
DA
3157 struct net_device *dev = fib6_info_nh_dev(rt);
3158
c3968a85 3159 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3160 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3161 err = -EINVAL;
3162 goto out;
3163 }
93c2fb25
DA
3164 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3165 rt->fib6_prefsrc.plen = 128;
c3968a85 3166 } else
93c2fb25 3167 rt->fib6_prefsrc.plen = 0;
c3968a85 3168
8c5b83f0 3169 return rt;
6b9ea5a6 3170out:
93531c67 3171 fib6_info_release(rt);
8c5b83f0 3172 return ERR_PTR(err);
6b9ea5a6
RP
3173}
3174
acb54e3c 3175int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
333c4301 3176 struct netlink_ext_ack *extack)
6b9ea5a6 3177{
8d1c802b 3178 struct fib6_info *rt;
6b9ea5a6
RP
3179 int err;
3180
acb54e3c 3181 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3182 if (IS_ERR(rt))
3183 return PTR_ERR(rt);
6b9ea5a6 3184
d4ead6b3 3185 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3186 fib6_info_release(rt);
6b9ea5a6 3187
1da177e4
LT
3188 return err;
3189}
3190
8d1c802b 3191static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3192{
afb1d4b5 3193 struct net *net = info->nl_net;
c71099ac 3194 struct fib6_table *table;
afb1d4b5 3195 int err;
1da177e4 3196
421842ed 3197 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3198 err = -ENOENT;
3199 goto out;
3200 }
6c813a72 3201
93c2fb25 3202 table = rt->fib6_table;
66f5d6ce 3203 spin_lock_bh(&table->tb6_lock);
86872cb5 3204 err = fib6_del(rt, info);
66f5d6ce 3205 spin_unlock_bh(&table->tb6_lock);
1da177e4 3206
6825a26c 3207out:
93531c67 3208 fib6_info_release(rt);
1da177e4
LT
3209 return err;
3210}
3211
8d1c802b 3212int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3213{
afb1d4b5
DA
3214 struct nl_info info = { .nl_net = net };
3215
528c4ceb 3216 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3217}
3218
8d1c802b 3219static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3220{
3221 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3222 struct net *net = info->nl_net;
16a16cd3 3223 struct sk_buff *skb = NULL;
0ae81335 3224 struct fib6_table *table;
e3330039 3225 int err = -ENOENT;
0ae81335 3226
421842ed 3227 if (rt == net->ipv6.fib6_null_entry)
e3330039 3228 goto out_put;
93c2fb25 3229 table = rt->fib6_table;
66f5d6ce 3230 spin_lock_bh(&table->tb6_lock);
0ae81335 3231
93c2fb25 3232 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3233 struct fib6_info *sibling, *next_sibling;
0ae81335 3234
16a16cd3
DA
3235 /* prefer to send a single notification with all hops */
3236 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3237 if (skb) {
3238 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3239
d4ead6b3 3240 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3241 NULL, NULL, 0, RTM_DELROUTE,
3242 info->portid, seq, 0) < 0) {
3243 kfree_skb(skb);
3244 skb = NULL;
3245 } else
3246 info->skip_notify = 1;
3247 }
3248
0ae81335 3249 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3250 &rt->fib6_siblings,
3251 fib6_siblings) {
0ae81335
DA
3252 err = fib6_del(sibling, info);
3253 if (err)
e3330039 3254 goto out_unlock;
0ae81335
DA
3255 }
3256 }
3257
3258 err = fib6_del(rt, info);
e3330039 3259out_unlock:
66f5d6ce 3260 spin_unlock_bh(&table->tb6_lock);
e3330039 3261out_put:
93531c67 3262 fib6_info_release(rt);
16a16cd3
DA
3263
3264 if (skb) {
e3330039 3265 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3266 info->nlh, gfp_any());
3267 }
0ae81335
DA
3268 return err;
3269}
3270
23fb93a4
DA
3271static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3272{
3273 int rc = -ESRCH;
3274
3275 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3276 goto out;
3277
3278 if (cfg->fc_flags & RTF_GATEWAY &&
3279 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3280 goto out;
761f6026
XL
3281
3282 rc = rt6_remove_exception_rt(rt);
23fb93a4
DA
3283out:
3284 return rc;
3285}
3286
333c4301
DA
3287static int ip6_route_del(struct fib6_config *cfg,
3288 struct netlink_ext_ack *extack)
1da177e4 3289{
8d1c802b 3290 struct rt6_info *rt_cache;
c71099ac 3291 struct fib6_table *table;
8d1c802b 3292 struct fib6_info *rt;
1da177e4 3293 struct fib6_node *fn;
1da177e4
LT
3294 int err = -ESRCH;
3295
5578689a 3296 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3297 if (!table) {
3298 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3299 return err;
d5d531cb 3300 }
c71099ac 3301
66f5d6ce 3302 rcu_read_lock();
1da177e4 3303
c71099ac 3304 fn = fib6_locate(&table->tb6_root,
86872cb5 3305 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3306 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3307 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3308
1da177e4 3309 if (fn) {
66f5d6ce 3310 for_each_fib6_node_rt_rcu(fn) {
ad1601ae
DA
3311 struct fib6_nh *nh;
3312
2b760fcf 3313 if (cfg->fc_flags & RTF_CACHE) {
23fb93a4
DA
3314 int rc;
3315
2b760fcf
WW
3316 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3317 &cfg->fc_src);
23fb93a4
DA
3318 if (rt_cache) {
3319 rc = ip6_del_cached_rt(rt_cache, cfg);
9e575010
ED
3320 if (rc != -ESRCH) {
3321 rcu_read_unlock();
23fb93a4 3322 return rc;
9e575010 3323 }
23fb93a4
DA
3324 }
3325 continue;
2b760fcf 3326 }
ad1601ae
DA
3327
3328 nh = &rt->fib6_nh;
86872cb5 3329 if (cfg->fc_ifindex &&
ad1601ae
DA
3330 (!nh->fib_nh_dev ||
3331 nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3332 continue;
86872cb5 3333 if (cfg->fc_flags & RTF_GATEWAY &&
ad1601ae 3334 !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
1da177e4 3335 continue;
93c2fb25 3336 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3337 continue;
93c2fb25 3338 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3339 continue;
e873e4b9
WW
3340 if (!fib6_info_hold_safe(rt))
3341 continue;
66f5d6ce 3342 rcu_read_unlock();
1da177e4 3343
0ae81335
DA
3344 /* if gateway was specified only delete the one hop */
3345 if (cfg->fc_flags & RTF_GATEWAY)
3346 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3347
3348 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3349 }
3350 }
66f5d6ce 3351 rcu_read_unlock();
1da177e4
LT
3352
3353 return err;
3354}
3355
6700c270 3356static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3357{
a6279458 3358 struct netevent_redirect netevent;
e8599ff4 3359 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3360 struct ndisc_options ndopts;
3361 struct inet6_dev *in6_dev;
3362 struct neighbour *neigh;
a68886a6 3363 struct fib6_info *from;
71bcdba0 3364 struct rd_msg *msg;
6e157b6a
DM
3365 int optlen, on_link;
3366 u8 *lladdr;
e8599ff4 3367
29a3cad5 3368 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3369 optlen -= sizeof(*msg);
e8599ff4
DM
3370
3371 if (optlen < 0) {
6e157b6a 3372 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3373 return;
3374 }
3375
71bcdba0 3376 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3377
71bcdba0 3378 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3379 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3380 return;
3381 }
3382
6e157b6a 3383 on_link = 0;
71bcdba0 3384 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3385 on_link = 1;
71bcdba0 3386 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3387 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3388 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3389 return;
3390 }
3391
3392 in6_dev = __in6_dev_get(skb->dev);
3393 if (!in6_dev)
3394 return;
3395 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3396 return;
3397
3398 /* RFC2461 8.1:
3399 * The IP source address of the Redirect MUST be the same as the current
3400 * first-hop router for the specified ICMP Destination Address.
3401 */
3402
f997c55c 3403 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3404 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3405 return;
3406 }
6e157b6a
DM
3407
3408 lladdr = NULL;
e8599ff4
DM
3409 if (ndopts.nd_opts_tgt_lladdr) {
3410 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3411 skb->dev);
3412 if (!lladdr) {
3413 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3414 return;
3415 }
3416 }
3417
6e157b6a 3418 rt = (struct rt6_info *) dst;
ec13ad1d 3419 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3420 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3421 return;
6e157b6a 3422 }
e8599ff4 3423
6e157b6a
DM
3424 /* Redirect received -> path was valid.
3425 * Look, redirects are sent only in response to data packets,
3426 * so that this nexthop apparently is reachable. --ANK
3427 */
0dec879f 3428 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3429
71bcdba0 3430 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3431 if (!neigh)
3432 return;
a6279458 3433
1da177e4
LT
3434 /*
3435 * We have finally decided to accept it.
3436 */
3437
f997c55c 3438 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3439 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3440 NEIGH_UPDATE_F_OVERRIDE|
3441 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3442 NEIGH_UPDATE_F_ISROUTER)),
3443 NDISC_REDIRECT, &ndopts);
1da177e4 3444
4d85cd0c 3445 rcu_read_lock();
a68886a6 3446 from = rcu_dereference(rt->from);
e873e4b9
WW
3447 /* This fib6_info_hold() is safe here because we hold reference to rt
3448 * and rt already holds reference to fib6_info.
3449 */
8a14e46f 3450 fib6_info_hold(from);
4d85cd0c 3451 rcu_read_unlock();
8a14e46f
DA
3452
3453 nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
38308473 3454 if (!nrt)
1da177e4
LT
3455 goto out;
3456
3457 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3458 if (on_link)
3459 nrt->rt6i_flags &= ~RTF_GATEWAY;
3460
4e3fd7a0 3461 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3462
2b760fcf
WW
3463 /* No need to remove rt from the exception table if rt is
3464 * a cached route because rt6_insert_exception() will
3465 * takes care of it
3466 */
8a14e46f 3467 if (rt6_insert_exception(nrt, from)) {
2b760fcf
WW
3468 dst_release_immediate(&nrt->dst);
3469 goto out;
3470 }
1da177e4 3471
d8d1f30b
CG
3472 netevent.old = &rt->dst;
3473 netevent.new = &nrt->dst;
71bcdba0 3474 netevent.daddr = &msg->dest;
60592833 3475 netevent.neigh = neigh;
8d71740c
TT
3476 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3477
1da177e4 3478out:
8a14e46f 3479 fib6_info_release(from);
e8599ff4 3480 neigh_release(neigh);
6e157b6a
DM
3481}
3482
70ceb4f5 3483#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3484static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3485 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3486 const struct in6_addr *gwaddr,
3487 struct net_device *dev)
70ceb4f5 3488{
830218c1
DA
3489 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3490 int ifindex = dev->ifindex;
70ceb4f5 3491 struct fib6_node *fn;
8d1c802b 3492 struct fib6_info *rt = NULL;
c71099ac
TG
3493 struct fib6_table *table;
3494
830218c1 3495 table = fib6_get_table(net, tb_id);
38308473 3496 if (!table)
c71099ac 3497 return NULL;
70ceb4f5 3498
66f5d6ce 3499 rcu_read_lock();
38fbeeee 3500 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3501 if (!fn)
3502 goto out;
3503
66f5d6ce 3504 for_each_fib6_node_rt_rcu(fn) {
ad1601ae 3505 if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
70ceb4f5 3506 continue;
2b2450ca 3507 if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
bdf00467 3508 !rt->fib6_nh.fib_nh_gw_family)
70ceb4f5 3509 continue;
ad1601ae 3510 if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
70ceb4f5 3511 continue;
e873e4b9
WW
3512 if (!fib6_info_hold_safe(rt))
3513 continue;
70ceb4f5
YH
3514 break;
3515 }
3516out:
66f5d6ce 3517 rcu_read_unlock();
70ceb4f5
YH
3518 return rt;
3519}
3520
8d1c802b 3521static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3522 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3523 const struct in6_addr *gwaddr,
3524 struct net_device *dev,
95c96174 3525 unsigned int pref)
70ceb4f5 3526{
86872cb5 3527 struct fib6_config cfg = {
238fc7ea 3528 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3529 .fc_ifindex = dev->ifindex,
86872cb5
TG
3530 .fc_dst_len = prefixlen,
3531 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3532 RTF_UP | RTF_PREF(pref),
b91d5329 3533 .fc_protocol = RTPROT_RA,
e8478e80 3534 .fc_type = RTN_UNICAST,
15e47304 3535 .fc_nlinfo.portid = 0,
efa2cea0
DL
3536 .fc_nlinfo.nlh = NULL,
3537 .fc_nlinfo.nl_net = net,
86872cb5
TG
3538 };
3539
830218c1 3540 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3541 cfg.fc_dst = *prefix;
3542 cfg.fc_gateway = *gwaddr;
70ceb4f5 3543
e317da96
YH
3544 /* We should treat it as a default route if prefix length is 0. */
3545 if (!prefixlen)
86872cb5 3546 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3547
acb54e3c 3548 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3549
830218c1 3550 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3551}
3552#endif
3553
8d1c802b 3554struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3555 const struct in6_addr *addr,
3556 struct net_device *dev)
1ab1457c 3557{
830218c1 3558 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3559 struct fib6_info *rt;
c71099ac 3560 struct fib6_table *table;
1da177e4 3561
afb1d4b5 3562 table = fib6_get_table(net, tb_id);
38308473 3563 if (!table)
c71099ac 3564 return NULL;
1da177e4 3565
66f5d6ce
WW
3566 rcu_read_lock();
3567 for_each_fib6_node_rt_rcu(&table->tb6_root) {
ad1601ae
DA
3568 struct fib6_nh *nh = &rt->fib6_nh;
3569
3570 if (dev == nh->fib_nh_dev &&
93c2fb25 3571 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ad1601ae 3572 ipv6_addr_equal(&nh->fib_nh_gw6, addr))
1da177e4
LT
3573 break;
3574 }
e873e4b9
WW
3575 if (rt && !fib6_info_hold_safe(rt))
3576 rt = NULL;
66f5d6ce 3577 rcu_read_unlock();
1da177e4
LT
3578 return rt;
3579}
3580
8d1c802b 3581struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3582 const struct in6_addr *gwaddr,
ebacaaa0
YH
3583 struct net_device *dev,
3584 unsigned int pref)
1da177e4 3585{
86872cb5 3586 struct fib6_config cfg = {
ca254490 3587 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3588 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3589 .fc_ifindex = dev->ifindex,
3590 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3591 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3592 .fc_protocol = RTPROT_RA,
e8478e80 3593 .fc_type = RTN_UNICAST,
15e47304 3594 .fc_nlinfo.portid = 0,
5578689a 3595 .fc_nlinfo.nlh = NULL,
afb1d4b5 3596 .fc_nlinfo.nl_net = net,
86872cb5 3597 };
1da177e4 3598
4e3fd7a0 3599 cfg.fc_gateway = *gwaddr;
1da177e4 3600
acb54e3c 3601 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3602 struct fib6_table *table;
3603
3604 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3605 if (table)
3606 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3607 }
1da177e4 3608
afb1d4b5 3609 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3610}
3611
afb1d4b5
DA
3612static void __rt6_purge_dflt_routers(struct net *net,
3613 struct fib6_table *table)
1da177e4 3614{
8d1c802b 3615 struct fib6_info *rt;
1da177e4
LT
3616
3617restart:
66f5d6ce
WW
3618 rcu_read_lock();
3619 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3620 struct net_device *dev = fib6_info_nh_dev(rt);
3621 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3622
93c2fb25 3623 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
e873e4b9
WW
3624 (!idev || idev->cnf.accept_ra != 2) &&
3625 fib6_info_hold_safe(rt)) {
93531c67
DA
3626 rcu_read_unlock();
3627 ip6_del_rt(net, rt);
1da177e4
LT
3628 goto restart;
3629 }
3630 }
66f5d6ce 3631 rcu_read_unlock();
830218c1
DA
3632
3633 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3634}
3635
3636void rt6_purge_dflt_routers(struct net *net)
3637{
3638 struct fib6_table *table;
3639 struct hlist_head *head;
3640 unsigned int h;
3641
3642 rcu_read_lock();
3643
3644 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3645 head = &net->ipv6.fib_table_hash[h];
3646 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3647 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3648 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3649 }
3650 }
3651
3652 rcu_read_unlock();
1da177e4
LT
3653}
3654
5578689a
DL
3655static void rtmsg_to_fib6_config(struct net *net,
3656 struct in6_rtmsg *rtmsg,
86872cb5
TG
3657 struct fib6_config *cfg)
3658{
8823a3ac
3659 *cfg = (struct fib6_config){
3660 .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3661 : RT6_TABLE_MAIN,
3662 .fc_ifindex = rtmsg->rtmsg_ifindex,
67f69513 3663 .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
8823a3ac
3664 .fc_expires = rtmsg->rtmsg_info,
3665 .fc_dst_len = rtmsg->rtmsg_dst_len,
3666 .fc_src_len = rtmsg->rtmsg_src_len,
3667 .fc_flags = rtmsg->rtmsg_flags,
3668 .fc_type = rtmsg->rtmsg_type,
3669
3670 .fc_nlinfo.nl_net = net,
3671
3672 .fc_dst = rtmsg->rtmsg_dst,
3673 .fc_src = rtmsg->rtmsg_src,
3674 .fc_gateway = rtmsg->rtmsg_gateway,
3675 };
86872cb5
TG
3676}
3677
5578689a 3678int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3679{
86872cb5 3680 struct fib6_config cfg;
1da177e4
LT
3681 struct in6_rtmsg rtmsg;
3682 int err;
3683
67ba4152 3684 switch (cmd) {
1da177e4
LT
3685 case SIOCADDRT: /* Add a route */
3686 case SIOCDELRT: /* Delete a route */
af31f412 3687 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3688 return -EPERM;
3689 err = copy_from_user(&rtmsg, arg,
3690 sizeof(struct in6_rtmsg));
3691 if (err)
3692 return -EFAULT;
86872cb5 3693
5578689a 3694 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3695
1da177e4
LT
3696 rtnl_lock();
3697 switch (cmd) {
3698 case SIOCADDRT:
acb54e3c 3699 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3700 break;
3701 case SIOCDELRT:
333c4301 3702 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3703 break;
3704 default:
3705 err = -EINVAL;
3706 }
3707 rtnl_unlock();
3708
3709 return err;
3ff50b79 3710 }
1da177e4
LT
3711
3712 return -EINVAL;
3713}
3714
3715/*
3716 * Drop the packet on the floor
3717 */
3718
d5fdd6ba 3719static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3720{
612f09e8 3721 int type;
adf30907 3722 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3723 switch (ipstats_mib_noroutes) {
3724 case IPSTATS_MIB_INNOROUTES:
0660e03f 3725 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3726 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3727 IP6_INC_STATS(dev_net(dst->dev),
3728 __in6_dev_get_safely(skb->dev),
3bd653c8 3729 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3730 break;
3731 }
3732 /* FALLTHROUGH */
3733 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3734 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3735 ipstats_mib_noroutes);
612f09e8
YH
3736 break;
3737 }
3ffe533c 3738 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3739 kfree_skb(skb);
3740 return 0;
3741}
3742
9ce8ade0
TG
3743static int ip6_pkt_discard(struct sk_buff *skb)
3744{
612f09e8 3745 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3746}
3747
ede2059d 3748static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3749{
adf30907 3750 skb->dev = skb_dst(skb)->dev;
612f09e8 3751 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3752}
3753
9ce8ade0
TG
3754static int ip6_pkt_prohibit(struct sk_buff *skb)
3755{
612f09e8 3756 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3757}
3758
ede2059d 3759static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3760{
adf30907 3761 skb->dev = skb_dst(skb)->dev;
612f09e8 3762 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3763}
3764
1da177e4
LT
3765/*
3766 * Allocate a dst for local (unicast / anycast) address.
3767 */
3768
360a9887
DA
3769struct fib6_info *addrconf_f6i_alloc(struct net *net,
3770 struct inet6_dev *idev,
3771 const struct in6_addr *addr,
3772 bool anycast, gfp_t gfp_flags)
1da177e4 3773{
c7a1ce39
DA
3774 struct fib6_config cfg = {
3775 .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
3776 .fc_ifindex = idev->dev->ifindex,
3777 .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
3778 .fc_dst = *addr,
3779 .fc_dst_len = 128,
3780 .fc_protocol = RTPROT_KERNEL,
3781 .fc_nlinfo.nl_net = net,
3782 .fc_ignore_dev_down = true,
3783 };
1da177e4 3784
e8478e80 3785 if (anycast) {
c7a1ce39
DA
3786 cfg.fc_type = RTN_ANYCAST;
3787 cfg.fc_flags |= RTF_ANYCAST;
e8478e80 3788 } else {
c7a1ce39
DA
3789 cfg.fc_type = RTN_LOCAL;
3790 cfg.fc_flags |= RTF_LOCAL;
e8478e80 3791 }
1da177e4 3792
c7a1ce39 3793 return ip6_route_info_create(&cfg, gfp_flags, NULL);
1da177e4
LT
3794}
3795
c3968a85
DW
3796/* remove deleted ip from prefsrc entries */
3797struct arg_dev_net_ip {
3798 struct net_device *dev;
3799 struct net *net;
3800 struct in6_addr *addr;
3801};
3802
8d1c802b 3803static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
3804{
3805 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3806 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3807 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3808
ad1601ae 3809 if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
421842ed 3810 rt != net->ipv6.fib6_null_entry &&
93c2fb25 3811 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 3812 spin_lock_bh(&rt6_exception_lock);
c3968a85 3813 /* remove prefsrc entry */
93c2fb25 3814 rt->fib6_prefsrc.plen = 0;
60006a48 3815 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3816 }
3817 return 0;
3818}
3819
3820void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3821{
3822 struct net *net = dev_net(ifp->idev->dev);
3823 struct arg_dev_net_ip adni = {
3824 .dev = ifp->idev->dev,
3825 .net = net,
3826 .addr = &ifp->addr,
3827 };
0c3584d5 3828 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3829}
3830
2b2450ca 3831#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT)
be7a010d
DJ
3832
3833/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 3834static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
3835{
3836 struct in6_addr *gateway = (struct in6_addr *)arg;
3837
93c2fb25 3838 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
bdf00467 3839 rt->fib6_nh.fib_nh_gw_family &&
ad1601ae 3840 ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
be7a010d
DJ
3841 return -1;
3842 }
b16cb459
WW
3843
3844 /* Further clean up cached routes in exception table.
3845 * This is needed because cached route may have a different
3846 * gateway than its 'parent' in the case of an ip redirect.
3847 */
3848 rt6_exceptions_clean_tohost(rt, gateway);
3849
be7a010d
DJ
3850 return 0;
3851}
3852
3853void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3854{
3855 fib6_clean_all(net, fib6_clean_tohost, gateway);
3856}
3857
2127d95a
IS
3858struct arg_netdev_event {
3859 const struct net_device *dev;
4c981e28
IS
3860 union {
3861 unsigned int nh_flags;
3862 unsigned long event;
3863 };
2127d95a
IS
3864};
3865
8d1c802b 3866static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 3867{
8d1c802b 3868 struct fib6_info *iter;
d7dedee1
IS
3869 struct fib6_node *fn;
3870
93c2fb25
DA
3871 fn = rcu_dereference_protected(rt->fib6_node,
3872 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3873 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 3874 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3875 while (iter) {
93c2fb25 3876 if (iter->fib6_metric == rt->fib6_metric &&
33bd5ac5 3877 rt6_qualify_for_ecmp(iter))
d7dedee1 3878 return iter;
8fb11a9a 3879 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 3880 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
3881 }
3882
3883 return NULL;
3884}
3885
8d1c802b 3886static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 3887{
ad1601ae
DA
3888 if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
3889 (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
3890 ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
d7dedee1
IS
3891 return true;
3892
3893 return false;
3894}
3895
8d1c802b 3896static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 3897{
8d1c802b 3898 struct fib6_info *iter;
d7dedee1
IS
3899 int total = 0;
3900
3901 if (!rt6_is_dead(rt))
ad1601ae 3902 total += rt->fib6_nh.fib_nh_weight;
d7dedee1 3903
93c2fb25 3904 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 3905 if (!rt6_is_dead(iter))
ad1601ae 3906 total += iter->fib6_nh.fib_nh_weight;
d7dedee1
IS
3907 }
3908
3909 return total;
3910}
3911
8d1c802b 3912static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
3913{
3914 int upper_bound = -1;
3915
3916 if (!rt6_is_dead(rt)) {
ad1601ae 3917 *weight += rt->fib6_nh.fib_nh_weight;
d7dedee1
IS
3918 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3919 total) - 1;
3920 }
ad1601ae 3921 atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
d7dedee1
IS
3922}
3923
8d1c802b 3924static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 3925{
8d1c802b 3926 struct fib6_info *iter;
d7dedee1
IS
3927 int weight = 0;
3928
3929 rt6_upper_bound_set(rt, &weight, total);
3930
93c2fb25 3931 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
3932 rt6_upper_bound_set(iter, &weight, total);
3933}
3934
8d1c802b 3935void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 3936{
8d1c802b 3937 struct fib6_info *first;
d7dedee1
IS
3938 int total;
3939
3940 /* In case the entire multipath route was marked for flushing,
3941 * then there is no need to rebalance upon the removal of every
3942 * sibling route.
3943 */
93c2fb25 3944 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
3945 return;
3946
3947 /* During lookup routes are evaluated in order, so we need to
3948 * make sure upper bounds are assigned from the first sibling
3949 * onwards.
3950 */
3951 first = rt6_multipath_first_sibling(rt);
3952 if (WARN_ON_ONCE(!first))
3953 return;
3954
3955 total = rt6_multipath_total_weight(first);
3956 rt6_multipath_upper_bound_set(first, total);
3957}
3958
8d1c802b 3959static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
3960{
3961 const struct arg_netdev_event *arg = p_arg;
7aef6859 3962 struct net *net = dev_net(arg->dev);
2127d95a 3963
ad1601ae
DA
3964 if (rt != net->ipv6.fib6_null_entry &&
3965 rt->fib6_nh.fib_nh_dev == arg->dev) {
3966 rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
7aef6859 3967 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3968 rt6_multipath_rebalance(rt);
1de178ed 3969 }
2127d95a
IS
3970
3971 return 0;
3972}
3973
3974void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3975{
3976 struct arg_netdev_event arg = {
3977 .dev = dev,
6802f3ad
IS
3978 {
3979 .nh_flags = nh_flags,
3980 },
2127d95a
IS
3981 };
3982
3983 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3984 arg.nh_flags |= RTNH_F_LINKDOWN;
3985
3986 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3987}
3988
8d1c802b 3989static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
3990 const struct net_device *dev)
3991{
8d1c802b 3992 struct fib6_info *iter;
1de178ed 3993
ad1601ae 3994 if (rt->fib6_nh.fib_nh_dev == dev)
1de178ed 3995 return true;
93c2fb25 3996 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae 3997 if (iter->fib6_nh.fib_nh_dev == dev)
1de178ed
IS
3998 return true;
3999
4000 return false;
4001}
4002
8d1c802b 4003static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 4004{
8d1c802b 4005 struct fib6_info *iter;
1de178ed
IS
4006
4007 rt->should_flush = 1;
93c2fb25 4008 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
4009 iter->should_flush = 1;
4010}
4011
8d1c802b 4012static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
4013 const struct net_device *down_dev)
4014{
8d1c802b 4015 struct fib6_info *iter;
1de178ed
IS
4016 unsigned int dead = 0;
4017
ad1601ae
DA
4018 if (rt->fib6_nh.fib_nh_dev == down_dev ||
4019 rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
1de178ed 4020 dead++;
93c2fb25 4021 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae
DA
4022 if (iter->fib6_nh.fib_nh_dev == down_dev ||
4023 iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
1de178ed
IS
4024 dead++;
4025
4026 return dead;
4027}
4028
8d1c802b 4029static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed
IS
4030 const struct net_device *dev,
4031 unsigned int nh_flags)
4032{
8d1c802b 4033 struct fib6_info *iter;
1de178ed 4034
ad1601ae
DA
4035 if (rt->fib6_nh.fib_nh_dev == dev)
4036 rt->fib6_nh.fib_nh_flags |= nh_flags;
93c2fb25 4037 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae
DA
4038 if (iter->fib6_nh.fib_nh_dev == dev)
4039 iter->fib6_nh.fib_nh_flags |= nh_flags;
1de178ed
IS
4040}
4041
a1a22c12 4042/* called with write lock held for table with rt */
8d1c802b 4043static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 4044{
4c981e28
IS
4045 const struct arg_netdev_event *arg = p_arg;
4046 const struct net_device *dev = arg->dev;
7aef6859 4047 struct net *net = dev_net(dev);
8ed67789 4048
421842ed 4049 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
4050 return 0;
4051
4052 switch (arg->event) {
4053 case NETDEV_UNREGISTER:
ad1601ae 4054 return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
27c6fa73 4055 case NETDEV_DOWN:
1de178ed 4056 if (rt->should_flush)
27c6fa73 4057 return -1;
93c2fb25 4058 if (!rt->fib6_nsiblings)
ad1601ae 4059 return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
1de178ed
IS
4060 if (rt6_multipath_uses_dev(rt, dev)) {
4061 unsigned int count;
4062
4063 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 4064 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
4065 rt6_multipath_flush(rt);
4066 return -1;
4067 }
4068 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4069 RTNH_F_LINKDOWN);
7aef6859 4070 fib6_update_sernum(net, rt);
d7dedee1 4071 rt6_multipath_rebalance(rt);
1de178ed
IS
4072 }
4073 return -2;
27c6fa73 4074 case NETDEV_CHANGE:
ad1601ae 4075 if (rt->fib6_nh.fib_nh_dev != dev ||
93c2fb25 4076 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 4077 break;
ad1601ae 4078 rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 4079 rt6_multipath_rebalance(rt);
27c6fa73 4080 break;
2b241361 4081 }
c159d30c 4082
1da177e4
LT
4083 return 0;
4084}
4085
27c6fa73 4086void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 4087{
4c981e28 4088 struct arg_netdev_event arg = {
8ed67789 4089 .dev = dev,
6802f3ad
IS
4090 {
4091 .event = event,
4092 },
8ed67789 4093 };
7c6bb7d2 4094 struct net *net = dev_net(dev);
8ed67789 4095
7c6bb7d2
DA
4096 if (net->ipv6.sysctl.skip_notify_on_dev_down)
4097 fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4098 else
4099 fib6_clean_all(net, fib6_ifdown, &arg);
4c981e28
IS
4100}
4101
4102void rt6_disable_ip(struct net_device *dev, unsigned long event)
4103{
4104 rt6_sync_down_dev(dev, event);
4105 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4106 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4107}
4108
95c96174 4109struct rt6_mtu_change_arg {
1da177e4 4110 struct net_device *dev;
95c96174 4111 unsigned int mtu;
1da177e4
LT
4112};
4113
8d1c802b 4114static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4115{
4116 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4117 struct inet6_dev *idev;
4118
4119 /* In IPv6 pmtu discovery is not optional,
4120 so that RTAX_MTU lock cannot disable it.
4121 We still use this lock to block changes
4122 caused by addrconf/ndisc.
4123 */
4124
4125 idev = __in6_dev_get(arg->dev);
38308473 4126 if (!idev)
1da177e4
LT
4127 return 0;
4128
4129 /* For administrative MTU increase, there is no way to discover
4130 IPv6 PMTU increase, so PMTU increase should be updated here.
4131 Since RFC 1981 doesn't include administrative MTU increase
4132 update PMTU increase is a MUST. (i.e. jumbo frame)
4133 */
ad1601ae 4134 if (rt->fib6_nh.fib_nh_dev == arg->dev &&
d4ead6b3
DA
4135 !fib6_metric_locked(rt, RTAX_MTU)) {
4136 u32 mtu = rt->fib6_pmtu;
4137
4138 if (mtu >= arg->mtu ||
4139 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4140 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4141
f5bbe7ee 4142 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4143 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4144 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4145 }
1da177e4
LT
4146 return 0;
4147}
4148
95c96174 4149void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4150{
c71099ac
TG
4151 struct rt6_mtu_change_arg arg = {
4152 .dev = dev,
4153 .mtu = mtu,
4154 };
1da177e4 4155
0c3584d5 4156 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4157}
4158
ef7c79ed 4159static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4160 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 4161 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 4162 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4163 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4164 [RTA_PRIORITY] = { .type = NLA_U32 },
4165 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4166 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4167 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4168 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4169 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4170 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4171 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4172 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 4173 [RTA_TABLE] = { .type = NLA_U32 },
eacb9384
RP
4174 [RTA_IP_PROTO] = { .type = NLA_U8 },
4175 [RTA_SPORT] = { .type = NLA_U16 },
4176 [RTA_DPORT] = { .type = NLA_U16 },
86872cb5
TG
4177};
4178
4179static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4180 struct fib6_config *cfg,
4181 struct netlink_ext_ack *extack)
1da177e4 4182{
86872cb5
TG
4183 struct rtmsg *rtm;
4184 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4185 unsigned int pref;
86872cb5 4186 int err;
1da177e4 4187
fceb6435 4188 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
dac9c979 4189 extack);
86872cb5
TG
4190 if (err < 0)
4191 goto errout;
1da177e4 4192
86872cb5
TG
4193 err = -EINVAL;
4194 rtm = nlmsg_data(nlh);
86872cb5 4195
84db8407
4196 *cfg = (struct fib6_config){
4197 .fc_table = rtm->rtm_table,
4198 .fc_dst_len = rtm->rtm_dst_len,
4199 .fc_src_len = rtm->rtm_src_len,
4200 .fc_flags = RTF_UP,
4201 .fc_protocol = rtm->rtm_protocol,
4202 .fc_type = rtm->rtm_type,
4203
4204 .fc_nlinfo.portid = NETLINK_CB(skb).portid,
4205 .fc_nlinfo.nlh = nlh,
4206 .fc_nlinfo.nl_net = sock_net(skb->sk),
4207 };
86872cb5 4208
ef2c7d7b
ND
4209 if (rtm->rtm_type == RTN_UNREACHABLE ||
4210 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4211 rtm->rtm_type == RTN_PROHIBIT ||
4212 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4213 cfg->fc_flags |= RTF_REJECT;
4214
ab79ad14
4215 if (rtm->rtm_type == RTN_LOCAL)
4216 cfg->fc_flags |= RTF_LOCAL;
4217
1f56a01f
MKL
4218 if (rtm->rtm_flags & RTM_F_CLONED)
4219 cfg->fc_flags |= RTF_CACHE;
4220
fc1e64e1
DA
4221 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4222
86872cb5 4223 if (tb[RTA_GATEWAY]) {
67b61f6c 4224 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4225 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4226 }
e3818541
DA
4227 if (tb[RTA_VIA]) {
4228 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4229 goto errout;
4230 }
86872cb5
TG
4231
4232 if (tb[RTA_DST]) {
4233 int plen = (rtm->rtm_dst_len + 7) >> 3;
4234
4235 if (nla_len(tb[RTA_DST]) < plen)
4236 goto errout;
4237
4238 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4239 }
86872cb5
TG
4240
4241 if (tb[RTA_SRC]) {
4242 int plen = (rtm->rtm_src_len + 7) >> 3;
4243
4244 if (nla_len(tb[RTA_SRC]) < plen)
4245 goto errout;
4246
4247 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4248 }
86872cb5 4249
c3968a85 4250 if (tb[RTA_PREFSRC])
67b61f6c 4251 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4252
86872cb5
TG
4253 if (tb[RTA_OIF])
4254 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4255
4256 if (tb[RTA_PRIORITY])
4257 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4258
4259 if (tb[RTA_METRICS]) {
4260 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4261 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4262 }
86872cb5
TG
4263
4264 if (tb[RTA_TABLE])
4265 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4266
51ebd318
ND
4267 if (tb[RTA_MULTIPATH]) {
4268 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4269 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4270
4271 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4272 cfg->fc_mp_len, extack);
9ed59592
DA
4273 if (err < 0)
4274 goto errout;
51ebd318
ND
4275 }
4276
c78ba6d6
LR
4277 if (tb[RTA_PREF]) {
4278 pref = nla_get_u8(tb[RTA_PREF]);
4279 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4280 pref != ICMPV6_ROUTER_PREF_HIGH)
4281 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4282 cfg->fc_flags |= RTF_PREF(pref);
4283 }
4284
19e42e45
RP
4285 if (tb[RTA_ENCAP])
4286 cfg->fc_encap = tb[RTA_ENCAP];
4287
9ed59592 4288 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4289 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4290
c255bd68 4291 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4292 if (err < 0)
4293 goto errout;
4294 }
4295
32bc201e
XL
4296 if (tb[RTA_EXPIRES]) {
4297 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4298
4299 if (addrconf_finite_timeout(timeout)) {
4300 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4301 cfg->fc_flags |= RTF_EXPIRES;
4302 }
4303 }
4304
86872cb5
TG
4305 err = 0;
4306errout:
4307 return err;
1da177e4
LT
4308}
4309
6b9ea5a6 4310struct rt6_nh {
8d1c802b 4311 struct fib6_info *fib6_info;
6b9ea5a6 4312 struct fib6_config r_cfg;
6b9ea5a6
RP
4313 struct list_head next;
4314};
4315
d4ead6b3
DA
4316static int ip6_route_info_append(struct net *net,
4317 struct list_head *rt6_nh_list,
8d1c802b
DA
4318 struct fib6_info *rt,
4319 struct fib6_config *r_cfg)
6b9ea5a6
RP
4320{
4321 struct rt6_nh *nh;
6b9ea5a6
RP
4322 int err = -EEXIST;
4323
4324 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4325 /* check if fib6_info already exists */
4326 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4327 return err;
4328 }
4329
4330 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4331 if (!nh)
4332 return -ENOMEM;
8d1c802b 4333 nh->fib6_info = rt;
6b9ea5a6
RP
4334 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4335 list_add_tail(&nh->next, rt6_nh_list);
4336
4337 return 0;
4338}
4339
8d1c802b
DA
4340static void ip6_route_mpath_notify(struct fib6_info *rt,
4341 struct fib6_info *rt_last,
3b1137fe
DA
4342 struct nl_info *info,
4343 __u16 nlflags)
4344{
4345 /* if this is an APPEND route, then rt points to the first route
4346 * inserted and rt_last points to last route inserted. Userspace
4347 * wants a consistent dump of the route which starts at the first
4348 * nexthop. Since sibling routes are always added at the end of
4349 * the list, find the first sibling of the last route appended
4350 */
93c2fb25
DA
4351 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4352 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4353 struct fib6_info,
93c2fb25 4354 fib6_siblings);
3b1137fe
DA
4355 }
4356
4357 if (rt)
4358 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4359}
4360
333c4301
DA
4361static int ip6_route_multipath_add(struct fib6_config *cfg,
4362 struct netlink_ext_ack *extack)
51ebd318 4363{
8d1c802b 4364 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4365 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4366 struct fib6_config r_cfg;
4367 struct rtnexthop *rtnh;
8d1c802b 4368 struct fib6_info *rt;
6b9ea5a6
RP
4369 struct rt6_nh *err_nh;
4370 struct rt6_nh *nh, *nh_safe;
3b1137fe 4371 __u16 nlflags;
51ebd318
ND
4372 int remaining;
4373 int attrlen;
6b9ea5a6
RP
4374 int err = 1;
4375 int nhn = 0;
4376 int replace = (cfg->fc_nlinfo.nlh &&
4377 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4378 LIST_HEAD(rt6_nh_list);
51ebd318 4379
3b1137fe
DA
4380 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4381 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4382 nlflags |= NLM_F_APPEND;
4383
35f1b4e9 4384 remaining = cfg->fc_mp_len;
51ebd318 4385 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4386
6b9ea5a6 4387 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4388 * fib6_info structs per nexthop
6b9ea5a6 4389 */
51ebd318
ND
4390 while (rtnh_ok(rtnh, remaining)) {
4391 memcpy(&r_cfg, cfg, sizeof(*cfg));
4392 if (rtnh->rtnh_ifindex)
4393 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4394
4395 attrlen = rtnh_attrlen(rtnh);
4396 if (attrlen > 0) {
4397 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4398
4399 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4400 if (nla) {
67b61f6c 4401 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4402 r_cfg.fc_flags |= RTF_GATEWAY;
4403 }
19e42e45
RP
4404 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4405 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4406 if (nla)
4407 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4408 }
6b9ea5a6 4409
68e2ffde 4410 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4411 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4412 if (IS_ERR(rt)) {
4413 err = PTR_ERR(rt);
4414 rt = NULL;
6b9ea5a6 4415 goto cleanup;
8c5b83f0 4416 }
b5d2d75e
DA
4417 if (!rt6_qualify_for_ecmp(rt)) {
4418 err = -EINVAL;
4419 NL_SET_ERR_MSG(extack,
4420 "Device only routes can not be added for IPv6 using the multipath API.");
4421 fib6_info_release(rt);
4422 goto cleanup;
4423 }
6b9ea5a6 4424
ad1601ae 4425 rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
398958ae 4426
d4ead6b3
DA
4427 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4428 rt, &r_cfg);
51ebd318 4429 if (err) {
93531c67 4430 fib6_info_release(rt);
6b9ea5a6
RP
4431 goto cleanup;
4432 }
4433
4434 rtnh = rtnh_next(rtnh, &remaining);
4435 }
4436
3b1137fe
DA
4437 /* for add and replace send one notification with all nexthops.
4438 * Skip the notification in fib6_add_rt2node and send one with
4439 * the full route when done
4440 */
4441 info->skip_notify = 1;
4442
6b9ea5a6
RP
4443 err_nh = NULL;
4444 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4445 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4446 fib6_info_release(nh->fib6_info);
93531c67 4447
f7225172
DA
4448 if (!err) {
4449 /* save reference to last route successfully inserted */
4450 rt_last = nh->fib6_info;
4451
4452 /* save reference to first route for notification */
4453 if (!rt_notif)
4454 rt_notif = nh->fib6_info;
4455 }
3b1137fe 4456
8d1c802b
DA
4457 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4458 nh->fib6_info = NULL;
6b9ea5a6
RP
4459 if (err) {
4460 if (replace && nhn)
a5a82d84
JK
4461 NL_SET_ERR_MSG_MOD(extack,
4462 "multipath route replace failed (check consistency of installed routes)");
6b9ea5a6
RP
4463 err_nh = nh;
4464 goto add_errout;
51ebd318 4465 }
6b9ea5a6 4466
1a72418b 4467 /* Because each route is added like a single route we remove
27596472
MK
4468 * these flags after the first nexthop: if there is a collision,
4469 * we have already failed to add the first nexthop:
4470 * fib6_add_rt2node() has rejected it; when replacing, old
4471 * nexthops have been replaced by first new, the rest should
4472 * be added to it.
1a72418b 4473 */
27596472
MK
4474 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4475 NLM_F_REPLACE);
6b9ea5a6
RP
4476 nhn++;
4477 }
4478
3b1137fe
DA
4479 /* success ... tell user about new route */
4480 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4481 goto cleanup;
4482
4483add_errout:
3b1137fe
DA
4484 /* send notification for routes that were added so that
4485 * the delete notifications sent by ip6_route_del are
4486 * coherent
4487 */
4488 if (rt_notif)
4489 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4490
6b9ea5a6
RP
4491 /* Delete routes that were already added */
4492 list_for_each_entry(nh, &rt6_nh_list, next) {
4493 if (err_nh == nh)
4494 break;
333c4301 4495 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4496 }
4497
4498cleanup:
4499 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4500 if (nh->fib6_info)
4501 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4502 list_del(&nh->next);
4503 kfree(nh);
4504 }
4505
4506 return err;
4507}
4508
333c4301
DA
4509static int ip6_route_multipath_del(struct fib6_config *cfg,
4510 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4511{
4512 struct fib6_config r_cfg;
4513 struct rtnexthop *rtnh;
4514 int remaining;
4515 int attrlen;
4516 int err = 1, last_err = 0;
4517
4518 remaining = cfg->fc_mp_len;
4519 rtnh = (struct rtnexthop *)cfg->fc_mp;
4520
4521 /* Parse a Multipath Entry */
4522 while (rtnh_ok(rtnh, remaining)) {
4523 memcpy(&r_cfg, cfg, sizeof(*cfg));
4524 if (rtnh->rtnh_ifindex)
4525 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4526
4527 attrlen = rtnh_attrlen(rtnh);
4528 if (attrlen > 0) {
4529 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4530
4531 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4532 if (nla) {
4533 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4534 r_cfg.fc_flags |= RTF_GATEWAY;
4535 }
4536 }
333c4301 4537 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4538 if (err)
4539 last_err = err;
4540
51ebd318
ND
4541 rtnh = rtnh_next(rtnh, &remaining);
4542 }
4543
4544 return last_err;
4545}
4546
c21ef3e3
DA
4547static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4548 struct netlink_ext_ack *extack)
1da177e4 4549{
86872cb5
TG
4550 struct fib6_config cfg;
4551 int err;
1da177e4 4552
333c4301 4553 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4554 if (err < 0)
4555 return err;
4556
51ebd318 4557 if (cfg.fc_mp)
333c4301 4558 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4559 else {
4560 cfg.fc_delete_all_nh = 1;
333c4301 4561 return ip6_route_del(&cfg, extack);
0ae81335 4562 }
1da177e4
LT
4563}
4564
c21ef3e3
DA
4565static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4566 struct netlink_ext_ack *extack)
1da177e4 4567{
86872cb5
TG
4568 struct fib6_config cfg;
4569 int err;
1da177e4 4570
333c4301 4571 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4572 if (err < 0)
4573 return err;
4574
67f69513
DA
4575 if (cfg.fc_metric == 0)
4576 cfg.fc_metric = IP6_RT_PRIO_USER;
4577
51ebd318 4578 if (cfg.fc_mp)
333c4301 4579 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4580 else
acb54e3c 4581 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4582}
4583
8d1c802b 4584static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4585{
beb1afac
DA
4586 int nexthop_len = 0;
4587
93c2fb25 4588 if (rt->fib6_nsiblings) {
beb1afac
DA
4589 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4590 + NLA_ALIGN(sizeof(struct rtnexthop))
4591 + nla_total_size(16) /* RTA_GATEWAY */
ad1601ae 4592 + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
beb1afac 4593
93c2fb25 4594 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4595 }
4596
339bf98f
TG
4597 return NLMSG_ALIGN(sizeof(struct rtmsg))
4598 + nla_total_size(16) /* RTA_SRC */
4599 + nla_total_size(16) /* RTA_DST */
4600 + nla_total_size(16) /* RTA_GATEWAY */
4601 + nla_total_size(16) /* RTA_PREFSRC */
4602 + nla_total_size(4) /* RTA_TABLE */
4603 + nla_total_size(4) /* RTA_IIF */
4604 + nla_total_size(4) /* RTA_OIF */
4605 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4606 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4607 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4608 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4609 + nla_total_size(1) /* RTA_PREF */
ad1601ae 4610 + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
beb1afac
DA
4611 + nexthop_len;
4612}
4613
d4ead6b3 4614static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4615 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4616 struct in6_addr *dest, struct in6_addr *src,
15e47304 4617 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4618 unsigned int flags)
1da177e4 4619{
22d0bd82
XL
4620 struct rt6_info *rt6 = (struct rt6_info *)dst;
4621 struct rt6key *rt6_dst, *rt6_src;
4622 u32 *pmetrics, table, rt6_flags;
2d7202bf 4623 struct nlmsghdr *nlh;
22d0bd82 4624 struct rtmsg *rtm;
d4ead6b3 4625 long expires = 0;
1da177e4 4626
15e47304 4627 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4628 if (!nlh)
26932566 4629 return -EMSGSIZE;
2d7202bf 4630
22d0bd82
XL
4631 if (rt6) {
4632 rt6_dst = &rt6->rt6i_dst;
4633 rt6_src = &rt6->rt6i_src;
4634 rt6_flags = rt6->rt6i_flags;
4635 } else {
4636 rt6_dst = &rt->fib6_dst;
4637 rt6_src = &rt->fib6_src;
4638 rt6_flags = rt->fib6_flags;
4639 }
4640
2d7202bf 4641 rtm = nlmsg_data(nlh);
1da177e4 4642 rtm->rtm_family = AF_INET6;
22d0bd82
XL
4643 rtm->rtm_dst_len = rt6_dst->plen;
4644 rtm->rtm_src_len = rt6_src->plen;
1da177e4 4645 rtm->rtm_tos = 0;
93c2fb25
DA
4646 if (rt->fib6_table)
4647 table = rt->fib6_table->tb6_id;
c71099ac 4648 else
9e762a4a 4649 table = RT6_TABLE_UNSPEC;
97f0082a 4650 rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
c78679e8
DM
4651 if (nla_put_u32(skb, RTA_TABLE, table))
4652 goto nla_put_failure;
e8478e80
DA
4653
4654 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4655 rtm->rtm_flags = 0;
4656 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4657 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4658
22d0bd82 4659 if (rt6_flags & RTF_CACHE)
1da177e4
LT
4660 rtm->rtm_flags |= RTM_F_CLONED;
4661
d4ead6b3
DA
4662 if (dest) {
4663 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4664 goto nla_put_failure;
1ab1457c 4665 rtm->rtm_dst_len = 128;
1da177e4 4666 } else if (rtm->rtm_dst_len)
22d0bd82 4667 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
c78679e8 4668 goto nla_put_failure;
1da177e4
LT
4669#ifdef CONFIG_IPV6_SUBTREES
4670 if (src) {
930345ea 4671 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4672 goto nla_put_failure;
1ab1457c 4673 rtm->rtm_src_len = 128;
c78679e8 4674 } else if (rtm->rtm_src_len &&
22d0bd82 4675 nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
c78679e8 4676 goto nla_put_failure;
1da177e4 4677#endif
7bc570c8
YH
4678 if (iif) {
4679#ifdef CONFIG_IPV6_MROUTE
22d0bd82 4680 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
fd61c6ba
DA
4681 int err = ip6mr_get_route(net, skb, rtm, portid);
4682
4683 if (err == 0)
4684 return 0;
4685 if (err < 0)
4686 goto nla_put_failure;
7bc570c8
YH
4687 } else
4688#endif
c78679e8
DM
4689 if (nla_put_u32(skb, RTA_IIF, iif))
4690 goto nla_put_failure;
d4ead6b3 4691 } else if (dest) {
1da177e4 4692 struct in6_addr saddr_buf;
d4ead6b3 4693 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4694 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4695 goto nla_put_failure;
1da177e4 4696 }
2d7202bf 4697
93c2fb25 4698 if (rt->fib6_prefsrc.plen) {
c3968a85 4699 struct in6_addr saddr_buf;
93c2fb25 4700 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4701 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4702 goto nla_put_failure;
c3968a85
DW
4703 }
4704
d4ead6b3
DA
4705 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4706 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4707 goto nla_put_failure;
4708
93c2fb25 4709 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 4710 goto nla_put_failure;
8253947e 4711
beb1afac
DA
4712 /* For multipath routes, walk the siblings list and add
4713 * each as a nexthop within RTA_MULTIPATH.
4714 */
22d0bd82
XL
4715 if (rt6) {
4716 if (rt6_flags & RTF_GATEWAY &&
4717 nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4718 goto nla_put_failure;
4719
4720 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4721 goto nla_put_failure;
4722 } else if (rt->fib6_nsiblings) {
8d1c802b 4723 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
4724 struct nlattr *mp;
4725
4726 mp = nla_nest_start(skb, RTA_MULTIPATH);
4727 if (!mp)
4728 goto nla_put_failure;
4729
c0a72077
DA
4730 if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common,
4731 rt->fib6_nh.fib_nh_weight) < 0)
beb1afac
DA
4732 goto nla_put_failure;
4733
4734 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 4735 &rt->fib6_siblings, fib6_siblings) {
c0a72077
DA
4736 if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common,
4737 sibling->fib6_nh.fib_nh_weight) < 0)
beb1afac
DA
4738 goto nla_put_failure;
4739 }
4740
4741 nla_nest_end(skb, mp);
4742 } else {
c0a72077
DA
4743 if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
4744 &rtm->rtm_flags, false) < 0)
beb1afac
DA
4745 goto nla_put_failure;
4746 }
4747
22d0bd82 4748 if (rt6_flags & RTF_EXPIRES) {
14895687
DA
4749 expires = dst ? dst->expires : rt->expires;
4750 expires -= jiffies;
4751 }
69cdf8f9 4752
d4ead6b3 4753 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4754 goto nla_put_failure;
2d7202bf 4755
22d0bd82 4756 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
c78ba6d6
LR
4757 goto nla_put_failure;
4758
19e42e45 4759
053c095a
JB
4760 nlmsg_end(skb, nlh);
4761 return 0;
2d7202bf
TG
4762
4763nla_put_failure:
26932566
PM
4764 nlmsg_cancel(skb, nlh);
4765 return -EMSGSIZE;
1da177e4
LT
4766}
4767
13e38901
DA
4768static bool fib6_info_uses_dev(const struct fib6_info *f6i,
4769 const struct net_device *dev)
4770{
ad1601ae 4771 if (f6i->fib6_nh.fib_nh_dev == dev)
13e38901
DA
4772 return true;
4773
4774 if (f6i->fib6_nsiblings) {
4775 struct fib6_info *sibling, *next_sibling;
4776
4777 list_for_each_entry_safe(sibling, next_sibling,
4778 &f6i->fib6_siblings, fib6_siblings) {
ad1601ae 4779 if (sibling->fib6_nh.fib_nh_dev == dev)
13e38901
DA
4780 return true;
4781 }
4782 }
4783
4784 return false;
4785}
4786
8d1c802b 4787int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4788{
4789 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
13e38901
DA
4790 struct fib_dump_filter *filter = &arg->filter;
4791 unsigned int flags = NLM_F_MULTI;
1f17e2f2
DA
4792 struct net *net = arg->net;
4793
421842ed 4794 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4795 return 0;
1da177e4 4796
13e38901
DA
4797 if ((filter->flags & RTM_F_PREFIX) &&
4798 !(rt->fib6_flags & RTF_PREFIX_RT)) {
4799 /* success since this is not a prefix route */
4800 return 1;
4801 }
4802 if (filter->filter_set) {
4803 if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
4804 (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
4805 (filter->protocol && rt->fib6_protocol != filter->protocol)) {
f8cfe2ce
DA
4806 return 1;
4807 }
13e38901 4808 flags |= NLM_F_DUMP_FILTERED;
f8cfe2ce 4809 }
1da177e4 4810
d4ead6b3
DA
4811 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4812 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
13e38901 4813 arg->cb->nlh->nlmsg_seq, flags);
1da177e4
LT
4814}
4815
0eff0a27
JK
4816static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
4817 const struct nlmsghdr *nlh,
4818 struct nlattr **tb,
4819 struct netlink_ext_ack *extack)
4820{
4821 struct rtmsg *rtm;
4822 int i, err;
4823
4824 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
4825 NL_SET_ERR_MSG_MOD(extack,
4826 "Invalid header for get route request");
4827 return -EINVAL;
4828 }
4829
4830 if (!netlink_strict_get_check(skb))
4831 return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
4832 rtm_ipv6_policy, extack);
4833
4834 rtm = nlmsg_data(nlh);
4835 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
4836 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
4837 rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
4838 rtm->rtm_type) {
4839 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
4840 return -EINVAL;
4841 }
4842 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
4843 NL_SET_ERR_MSG_MOD(extack,
4844 "Invalid flags for get route request");
4845 return -EINVAL;
4846 }
4847
4848 err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
4849 rtm_ipv6_policy, extack);
4850 if (err)
4851 return err;
4852
4853 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
4854 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
4855 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
4856 return -EINVAL;
4857 }
4858
4859 for (i = 0; i <= RTA_MAX; i++) {
4860 if (!tb[i])
4861 continue;
4862
4863 switch (i) {
4864 case RTA_SRC:
4865 case RTA_DST:
4866 case RTA_IIF:
4867 case RTA_OIF:
4868 case RTA_MARK:
4869 case RTA_UID:
4870 case RTA_SPORT:
4871 case RTA_DPORT:
4872 case RTA_IP_PROTO:
4873 break;
4874 default:
4875 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
4876 return -EINVAL;
4877 }
4878 }
4879
4880 return 0;
4881}
4882
c21ef3e3
DA
4883static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4884 struct netlink_ext_ack *extack)
1da177e4 4885{
3b1e0a65 4886 struct net *net = sock_net(in_skb->sk);
ab364a6f 4887 struct nlattr *tb[RTA_MAX+1];
18c3a61c 4888 int err, iif = 0, oif = 0;
a68886a6 4889 struct fib6_info *from;
18c3a61c 4890 struct dst_entry *dst;
ab364a6f 4891 struct rt6_info *rt;
1da177e4 4892 struct sk_buff *skb;
ab364a6f 4893 struct rtmsg *rtm;
744486d4 4894 struct flowi6 fl6 = {};
18c3a61c 4895 bool fibmatch;
1da177e4 4896
0eff0a27 4897 err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
ab364a6f
TG
4898 if (err < 0)
4899 goto errout;
1da177e4 4900
ab364a6f 4901 err = -EINVAL;
38b7097b
HFS
4902 rtm = nlmsg_data(nlh);
4903 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4904 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4905
ab364a6f
TG
4906 if (tb[RTA_SRC]) {
4907 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4908 goto errout;
4909
4e3fd7a0 4910 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4911 }
4912
4913 if (tb[RTA_DST]) {
4914 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4915 goto errout;
4916
4e3fd7a0 4917 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4918 }
4919
4920 if (tb[RTA_IIF])
4921 iif = nla_get_u32(tb[RTA_IIF]);
4922
4923 if (tb[RTA_OIF])
72331bc0 4924 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4925
2e47b291
LC
4926 if (tb[RTA_MARK])
4927 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4928
622ec2c9
LC
4929 if (tb[RTA_UID])
4930 fl6.flowi6_uid = make_kuid(current_user_ns(),
4931 nla_get_u32(tb[RTA_UID]));
4932 else
4933 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4934
eacb9384
RP
4935 if (tb[RTA_SPORT])
4936 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4937
4938 if (tb[RTA_DPORT])
4939 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4940
4941 if (tb[RTA_IP_PROTO]) {
4942 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
5e1a99ea
HL
4943 &fl6.flowi6_proto, AF_INET6,
4944 extack);
eacb9384
RP
4945 if (err)
4946 goto errout;
4947 }
4948
1da177e4
LT
4949 if (iif) {
4950 struct net_device *dev;
72331bc0
SL
4951 int flags = 0;
4952
121622db
FW
4953 rcu_read_lock();
4954
4955 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4956 if (!dev) {
121622db 4957 rcu_read_unlock();
1da177e4 4958 err = -ENODEV;
ab364a6f 4959 goto errout;
1da177e4 4960 }
72331bc0
SL
4961
4962 fl6.flowi6_iif = iif;
4963
4964 if (!ipv6_addr_any(&fl6.saddr))
4965 flags |= RT6_LOOKUP_F_HAS_SADDR;
4966
b75cc8f9 4967 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4968
4969 rcu_read_unlock();
72331bc0
SL
4970 } else {
4971 fl6.flowi6_oif = oif;
4972
58acfd71 4973 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4974 }
4975
18c3a61c
RP
4976
4977 rt = container_of(dst, struct rt6_info, dst);
4978 if (rt->dst.error) {
4979 err = rt->dst.error;
4980 ip6_rt_put(rt);
4981 goto errout;
1da177e4
LT
4982 }
4983
9d6acb3b
WC
4984 if (rt == net->ipv6.ip6_null_entry) {
4985 err = rt->dst.error;
4986 ip6_rt_put(rt);
4987 goto errout;
4988 }
4989
ab364a6f 4990 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4991 if (!skb) {
94e187c0 4992 ip6_rt_put(rt);
ab364a6f
TG
4993 err = -ENOBUFS;
4994 goto errout;
4995 }
1da177e4 4996
d8d1f30b 4997 skb_dst_set(skb, &rt->dst);
a68886a6
DA
4998
4999 rcu_read_lock();
5000 from = rcu_dereference(rt->from);
5001
18c3a61c 5002 if (fibmatch)
a68886a6 5003 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
18c3a61c
RP
5004 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
5005 nlh->nlmsg_seq, 0);
5006 else
a68886a6
DA
5007 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5008 &fl6.saddr, iif, RTM_NEWROUTE,
d4ead6b3
DA
5009 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
5010 0);
a68886a6
DA
5011 rcu_read_unlock();
5012
1da177e4 5013 if (err < 0) {
ab364a6f
TG
5014 kfree_skb(skb);
5015 goto errout;
1da177e4
LT
5016 }
5017
15e47304 5018 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 5019errout:
1da177e4 5020 return err;
1da177e4
LT
5021}
5022
8d1c802b 5023void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 5024 unsigned int nlm_flags)
1da177e4
LT
5025{
5026 struct sk_buff *skb;
5578689a 5027 struct net *net = info->nl_net;
528c4ceb
DL
5028 u32 seq;
5029 int err;
5030
5031 err = -ENOBUFS;
38308473 5032 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 5033
19e42e45 5034 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 5035 if (!skb)
21713ebc
TG
5036 goto errout;
5037
d4ead6b3
DA
5038 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5039 event, info->portid, seq, nlm_flags);
26932566
PM
5040 if (err < 0) {
5041 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5042 WARN_ON(err == -EMSGSIZE);
5043 kfree_skb(skb);
5044 goto errout;
5045 }
15e47304 5046 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
5047 info->nlh, gfp_any());
5048 return;
21713ebc
TG
5049errout:
5050 if (err < 0)
5578689a 5051 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
5052}
5053
8ed67789 5054static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 5055 unsigned long event, void *ptr)
8ed67789 5056{
351638e7 5057 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 5058 struct net *net = dev_net(dev);
8ed67789 5059
242d3a49
WC
5060 if (!(dev->flags & IFF_LOOPBACK))
5061 return NOTIFY_OK;
5062
5063 if (event == NETDEV_REGISTER) {
ad1601ae 5064 net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
d8d1f30b 5065 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
5066 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5067#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 5068 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 5069 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 5070 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 5071 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 5072#endif
76da0704
WC
5073 } else if (event == NETDEV_UNREGISTER &&
5074 dev->reg_state != NETREG_UNREGISTERED) {
5075 /* NETDEV_UNREGISTER could be fired for multiple times by
5076 * netdev_wait_allrefs(). Make sure we only call this once.
5077 */
12d94a80 5078 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 5079#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
5080 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5081 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
5082#endif
5083 }
5084
5085 return NOTIFY_OK;
5086}
5087
1da177e4
LT
5088/*
5089 * /proc
5090 */
5091
5092#ifdef CONFIG_PROC_FS
1da177e4
LT
5093static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5094{
69ddb805 5095 struct net *net = (struct net *)seq->private;
1da177e4 5096 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
5097 net->ipv6.rt6_stats->fib_nodes,
5098 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 5099 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
5100 net->ipv6.rt6_stats->fib_rt_entries,
5101 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 5102 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 5103 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
5104
5105 return 0;
5106}
1da177e4
LT
5107#endif /* CONFIG_PROC_FS */
5108
5109#ifdef CONFIG_SYSCTL
5110
1da177e4 5111static
fe2c6338 5112int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
5113 void __user *buffer, size_t *lenp, loff_t *ppos)
5114{
c486da34
LAG
5115 struct net *net;
5116 int delay;
f0fb9b28 5117 int ret;
c486da34 5118 if (!write)
1da177e4 5119 return -EINVAL;
c486da34
LAG
5120
5121 net = (struct net *)ctl->extra1;
5122 delay = net->ipv6.sysctl.flush_delay;
f0fb9b28
AP
5123 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5124 if (ret)
5125 return ret;
5126
2ac3ac8f 5127 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 5128 return 0;
1da177e4
LT
5129}
5130
7c6bb7d2
DA
5131static int zero;
5132static int one = 1;
5133
ed792e28 5134static struct ctl_table ipv6_route_table_template[] = {
1ab1457c 5135 {
1da177e4 5136 .procname = "flush",
4990509f 5137 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 5138 .maxlen = sizeof(int),
89c8b3a1 5139 .mode = 0200,
6d9f239a 5140 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
5141 },
5142 {
1da177e4 5143 .procname = "gc_thresh",
9a7ec3a9 5144 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
5145 .maxlen = sizeof(int),
5146 .mode = 0644,
6d9f239a 5147 .proc_handler = proc_dointvec,
1da177e4
LT
5148 },
5149 {
1da177e4 5150 .procname = "max_size",
4990509f 5151 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
5152 .maxlen = sizeof(int),
5153 .mode = 0644,
6d9f239a 5154 .proc_handler = proc_dointvec,
1da177e4
LT
5155 },
5156 {
1da177e4 5157 .procname = "gc_min_interval",
4990509f 5158 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5159 .maxlen = sizeof(int),
5160 .mode = 0644,
6d9f239a 5161 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5162 },
5163 {
1da177e4 5164 .procname = "gc_timeout",
4990509f 5165 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5166 .maxlen = sizeof(int),
5167 .mode = 0644,
6d9f239a 5168 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5169 },
5170 {
1da177e4 5171 .procname = "gc_interval",
4990509f 5172 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5173 .maxlen = sizeof(int),
5174 .mode = 0644,
6d9f239a 5175 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5176 },
5177 {
1da177e4 5178 .procname = "gc_elasticity",
4990509f 5179 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5180 .maxlen = sizeof(int),
5181 .mode = 0644,
f3d3f616 5182 .proc_handler = proc_dointvec,
1da177e4
LT
5183 },
5184 {
1da177e4 5185 .procname = "mtu_expires",
4990509f 5186 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5187 .maxlen = sizeof(int),
5188 .mode = 0644,
6d9f239a 5189 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5190 },
5191 {
1da177e4 5192 .procname = "min_adv_mss",
4990509f 5193 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5194 .maxlen = sizeof(int),
5195 .mode = 0644,
f3d3f616 5196 .proc_handler = proc_dointvec,
1da177e4
LT
5197 },
5198 {
1da177e4 5199 .procname = "gc_min_interval_ms",
4990509f 5200 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5201 .maxlen = sizeof(int),
5202 .mode = 0644,
6d9f239a 5203 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5204 },
7c6bb7d2
DA
5205 {
5206 .procname = "skip_notify_on_dev_down",
5207 .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5208 .maxlen = sizeof(int),
5209 .mode = 0644,
5210 .proc_handler = proc_dointvec,
5211 .extra1 = &zero,
5212 .extra2 = &one,
5213 },
f8572d8f 5214 { }
1da177e4
LT
5215};
5216
2c8c1e72 5217struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5218{
5219 struct ctl_table *table;
5220
5221 table = kmemdup(ipv6_route_table_template,
5222 sizeof(ipv6_route_table_template),
5223 GFP_KERNEL);
5ee09105
YH
5224
5225 if (table) {
5226 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5227 table[0].extra1 = net;
86393e52 5228 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5229 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5230 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5231 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5232 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5233 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5234 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5235 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5236 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
7c6bb7d2 5237 table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
464dc801
EB
5238
5239 /* Don't export sysctls to unprivileged users */
5240 if (net->user_ns != &init_user_ns)
5241 table[0].procname = NULL;
5ee09105
YH
5242 }
5243
760f2d01
DL
5244 return table;
5245}
1da177e4
LT
5246#endif
5247
2c8c1e72 5248static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5249{
633d424b 5250 int ret = -ENOMEM;
8ed67789 5251
86393e52
AD
5252 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5253 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5254
fc66f95c
ED
5255 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5256 goto out_ip6_dst_ops;
5257
421842ed
DA
5258 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5259 sizeof(*net->ipv6.fib6_null_entry),
5260 GFP_KERNEL);
5261 if (!net->ipv6.fib6_null_entry)
5262 goto out_ip6_dst_entries;
5263
8ed67789
DL
5264 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5265 sizeof(*net->ipv6.ip6_null_entry),
5266 GFP_KERNEL);
5267 if (!net->ipv6.ip6_null_entry)
421842ed 5268 goto out_fib6_null_entry;
d8d1f30b 5269 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5270 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5271 ip6_template_metrics, true);
8ed67789
DL
5272
5273#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5274 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5275 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5276 sizeof(*net->ipv6.ip6_prohibit_entry),
5277 GFP_KERNEL);
68fffc67
PZ
5278 if (!net->ipv6.ip6_prohibit_entry)
5279 goto out_ip6_null_entry;
d8d1f30b 5280 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5281 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5282 ip6_template_metrics, true);
8ed67789
DL
5283
5284 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5285 sizeof(*net->ipv6.ip6_blk_hole_entry),
5286 GFP_KERNEL);
68fffc67
PZ
5287 if (!net->ipv6.ip6_blk_hole_entry)
5288 goto out_ip6_prohibit_entry;
d8d1f30b 5289 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5290 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5291 ip6_template_metrics, true);
8ed67789
DL
5292#endif
5293
b339a47c
PZ
5294 net->ipv6.sysctl.flush_delay = 0;
5295 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5296 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5297 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5298 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5299 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5300 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5301 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
7c6bb7d2 5302 net->ipv6.sysctl.skip_notify_on_dev_down = 0;
b339a47c 5303
6891a346
BT
5304 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5305
8ed67789
DL
5306 ret = 0;
5307out:
5308 return ret;
f2fc6a54 5309
68fffc67
PZ
5310#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5311out_ip6_prohibit_entry:
5312 kfree(net->ipv6.ip6_prohibit_entry);
5313out_ip6_null_entry:
5314 kfree(net->ipv6.ip6_null_entry);
5315#endif
421842ed
DA
5316out_fib6_null_entry:
5317 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5318out_ip6_dst_entries:
5319 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5320out_ip6_dst_ops:
f2fc6a54 5321 goto out;
cdb18761
DL
5322}
5323
2c8c1e72 5324static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5325{
421842ed 5326 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5327 kfree(net->ipv6.ip6_null_entry);
5328#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5329 kfree(net->ipv6.ip6_prohibit_entry);
5330 kfree(net->ipv6.ip6_blk_hole_entry);
5331#endif
41bb78b4 5332 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5333}
5334
d189634e
TG
5335static int __net_init ip6_route_net_init_late(struct net *net)
5336{
5337#ifdef CONFIG_PROC_FS
c3506372
CH
5338 proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5339 sizeof(struct ipv6_route_iter));
3617d949
CH
5340 proc_create_net_single("rt6_stats", 0444, net->proc_net,
5341 rt6_stats_seq_show, NULL);
d189634e
TG
5342#endif
5343 return 0;
5344}
5345
5346static void __net_exit ip6_route_net_exit_late(struct net *net)
5347{
5348#ifdef CONFIG_PROC_FS
ece31ffd
G
5349 remove_proc_entry("ipv6_route", net->proc_net);
5350 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5351#endif
5352}
5353
cdb18761
DL
5354static struct pernet_operations ip6_route_net_ops = {
5355 .init = ip6_route_net_init,
5356 .exit = ip6_route_net_exit,
5357};
5358
c3426b47
DM
5359static int __net_init ipv6_inetpeer_init(struct net *net)
5360{
5361 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5362
5363 if (!bp)
5364 return -ENOMEM;
5365 inet_peer_base_init(bp);
5366 net->ipv6.peers = bp;
5367 return 0;
5368}
5369
5370static void __net_exit ipv6_inetpeer_exit(struct net *net)
5371{
5372 struct inet_peer_base *bp = net->ipv6.peers;
5373
5374 net->ipv6.peers = NULL;
56a6b248 5375 inetpeer_invalidate_tree(bp);
c3426b47
DM
5376 kfree(bp);
5377}
5378
2b823f72 5379static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5380 .init = ipv6_inetpeer_init,
5381 .exit = ipv6_inetpeer_exit,
5382};
5383
d189634e
TG
5384static struct pernet_operations ip6_route_net_late_ops = {
5385 .init = ip6_route_net_init_late,
5386 .exit = ip6_route_net_exit_late,
5387};
5388
8ed67789
DL
5389static struct notifier_block ip6_route_dev_notifier = {
5390 .notifier_call = ip6_route_dev_notify,
242d3a49 5391 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5392};
5393
2f460933
WC
5394void __init ip6_route_init_special_entries(void)
5395{
5396 /* Registering of the loopback is done before this portion of code,
5397 * the loopback reference in rt6_info will not be taken, do it
5398 * manually for init_net */
ad1601ae 5399 init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
2f460933
WC
5400 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5401 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5402 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5403 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5404 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5405 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5406 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5407 #endif
5408}
5409
433d49c3 5410int __init ip6_route_init(void)
1da177e4 5411{
433d49c3 5412 int ret;
8d0b94af 5413 int cpu;
433d49c3 5414
9a7ec3a9
DL
5415 ret = -ENOMEM;
5416 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5417 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5418 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5419 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5420 goto out;
14e50e57 5421
fc66f95c 5422 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5423 if (ret)
bdb3289f 5424 goto out_kmem_cache;
bdb3289f 5425
c3426b47
DM
5426 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5427 if (ret)
e8803b6c 5428 goto out_dst_entries;
2a0c451a 5429
7e52b33b
DM
5430 ret = register_pernet_subsys(&ip6_route_net_ops);
5431 if (ret)
5432 goto out_register_inetpeer;
c3426b47 5433
5dc121e9
AE
5434 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5435
e8803b6c 5436 ret = fib6_init();
433d49c3 5437 if (ret)
8ed67789 5438 goto out_register_subsys;
433d49c3 5439
433d49c3
DL
5440 ret = xfrm6_init();
5441 if (ret)
e8803b6c 5442 goto out_fib6_init;
c35b7e72 5443
433d49c3
DL
5444 ret = fib6_rules_init();
5445 if (ret)
5446 goto xfrm6_init;
7e5449c2 5447
d189634e
TG
5448 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5449 if (ret)
5450 goto fib6_rules_init;
5451
16feebcf
FW
5452 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5453 inet6_rtm_newroute, NULL, 0);
5454 if (ret < 0)
5455 goto out_register_late_subsys;
5456
5457 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5458 inet6_rtm_delroute, NULL, 0);
5459 if (ret < 0)
5460 goto out_register_late_subsys;
5461
5462 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5463 inet6_rtm_getroute, NULL,
5464 RTNL_FLAG_DOIT_UNLOCKED);
5465 if (ret < 0)
d189634e 5466 goto out_register_late_subsys;
c127ea2c 5467
8ed67789 5468 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5469 if (ret)
d189634e 5470 goto out_register_late_subsys;
8ed67789 5471
8d0b94af
MKL
5472 for_each_possible_cpu(cpu) {
5473 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5474
5475 INIT_LIST_HEAD(&ul->head);
5476 spin_lock_init(&ul->lock);
5477 }
5478
433d49c3
DL
5479out:
5480 return ret;
5481
d189634e 5482out_register_late_subsys:
16feebcf 5483 rtnl_unregister_all(PF_INET6);
d189634e 5484 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5485fib6_rules_init:
433d49c3
DL
5486 fib6_rules_cleanup();
5487xfrm6_init:
433d49c3 5488 xfrm6_fini();
2a0c451a
TG
5489out_fib6_init:
5490 fib6_gc_cleanup();
8ed67789
DL
5491out_register_subsys:
5492 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5493out_register_inetpeer:
5494 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5495out_dst_entries:
5496 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5497out_kmem_cache:
f2fc6a54 5498 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5499 goto out;
1da177e4
LT
5500}
5501
5502void ip6_route_cleanup(void)
5503{
8ed67789 5504 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5505 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5506 fib6_rules_cleanup();
1da177e4 5507 xfrm6_fini();
1da177e4 5508 fib6_gc_cleanup();
c3426b47 5509 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5510 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5511 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5512 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5513}