net/ipv6: Move dst flags to booleans in fib entries
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
52bd4c0c 99static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3 100static size_t rt6_nlmsg_size(struct rt6_info *rt);
d4ead6b3
DA
101static int rt6_fill_node(struct net *net, struct sk_buff *skb,
102 struct rt6_info *rt, struct dst_entry *dst,
103 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
104 int iif, int type, u32 portid, u32 seq,
105 unsigned int flags);
35732d01
WW
106static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
107 struct in6_addr *daddr,
108 struct in6_addr *saddr);
1da177e4 109
70ceb4f5 110#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 111static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 112 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
113 const struct in6_addr *gwaddr,
114 struct net_device *dev,
95c96174 115 unsigned int pref);
efa2cea0 116static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 117 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
118 const struct in6_addr *gwaddr,
119 struct net_device *dev);
70ceb4f5
YH
120#endif
121
8d0b94af
MKL
122struct uncached_list {
123 spinlock_t lock;
124 struct list_head head;
125};
126
127static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
128
510c321b 129void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
130{
131 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
132
8d0b94af
MKL
133 rt->rt6i_uncached_list = ul;
134
135 spin_lock_bh(&ul->lock);
136 list_add_tail(&rt->rt6i_uncached, &ul->head);
137 spin_unlock_bh(&ul->lock);
138}
139
510c321b 140void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
141{
142 if (!list_empty(&rt->rt6i_uncached)) {
143 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 144 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
145
146 spin_lock_bh(&ul->lock);
147 list_del(&rt->rt6i_uncached);
81eb8447 148 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
149 spin_unlock_bh(&ul->lock);
150 }
151}
152
153static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154{
155 struct net_device *loopback_dev = net->loopback_dev;
156 int cpu;
157
e332bc67
EB
158 if (dev == loopback_dev)
159 return;
160
8d0b94af
MKL
161 for_each_possible_cpu(cpu) {
162 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 struct rt6_info *rt;
164
165 spin_lock_bh(&ul->lock);
166 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 struct inet6_dev *rt_idev = rt->rt6i_idev;
168 struct net_device *rt_dev = rt->dst.dev;
169
e332bc67 170 if (rt_idev->dev == dev) {
8d0b94af
MKL
171 rt->rt6i_idev = in6_dev_get(loopback_dev);
172 in6_dev_put(rt_idev);
173 }
174
e332bc67 175 if (rt_dev == dev) {
8d0b94af
MKL
176 rt->dst.dev = loopback_dev;
177 dev_hold(rt->dst.dev);
178 dev_put(rt_dev);
179 }
180 }
181 spin_unlock_bh(&ul->lock);
182 }
183}
184
f894cbf8
DM
185static inline const void *choose_neigh_daddr(struct rt6_info *rt,
186 struct sk_buff *skb,
187 const void *daddr)
39232973
DM
188{
189 struct in6_addr *p = &rt->rt6i_gateway;
190
a7563f34 191 if (!ipv6_addr_any(p))
39232973 192 return (const void *) p;
f894cbf8
DM
193 else if (skb)
194 return &ipv6_hdr(skb)->daddr;
39232973
DM
195 return daddr;
196}
197
f894cbf8
DM
198static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
199 struct sk_buff *skb,
200 const void *daddr)
d3aaeb38 201{
39232973
DM
202 struct rt6_info *rt = (struct rt6_info *) dst;
203 struct neighbour *n;
204
f894cbf8 205 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 206 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
207 if (n)
208 return n;
209 return neigh_create(&nd_tbl, daddr, dst->dev);
210}
211
63fca65d
JA
212static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
213{
214 struct net_device *dev = dst->dev;
215 struct rt6_info *rt = (struct rt6_info *)dst;
216
217 daddr = choose_neigh_daddr(rt, NULL, daddr);
218 if (!daddr)
219 return;
220 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
221 return;
222 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
223 return;
224 __ipv6_confirm_neigh(dev, daddr);
225}
226
9a7ec3a9 227static struct dst_ops ip6_dst_ops_template = {
1da177e4 228 .family = AF_INET6,
1da177e4
LT
229 .gc = ip6_dst_gc,
230 .gc_thresh = 1024,
231 .check = ip6_dst_check,
0dbaee3b 232 .default_advmss = ip6_default_advmss,
ebb762f2 233 .mtu = ip6_mtu,
d4ead6b3 234 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
235 .destroy = ip6_dst_destroy,
236 .ifdown = ip6_dst_ifdown,
237 .negative_advice = ip6_negative_advice,
238 .link_failure = ip6_link_failure,
239 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 240 .redirect = rt6_do_redirect,
9f8955cc 241 .local_out = __ip6_local_out,
d3aaeb38 242 .neigh_lookup = ip6_neigh_lookup,
63fca65d 243 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
244};
245
ebb762f2 246static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 247{
618f9bc7
SK
248 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
249
250 return mtu ? : dst->dev->mtu;
ec831ea7
RD
251}
252
6700c270
DM
253static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
254 struct sk_buff *skb, u32 mtu)
14e50e57
DM
255{
256}
257
6700c270
DM
258static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
259 struct sk_buff *skb)
b587ee3b
DM
260{
261}
262
14e50e57
DM
263static struct dst_ops ip6_dst_blackhole_ops = {
264 .family = AF_INET6,
14e50e57
DM
265 .destroy = ip6_dst_destroy,
266 .check = ip6_dst_check,
ebb762f2 267 .mtu = ip6_blackhole_mtu,
214f45c9 268 .default_advmss = ip6_default_advmss,
14e50e57 269 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 270 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 271 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 272 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
273};
274
62fa8a84 275static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 276 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
277};
278
421842ed
DA
279static const struct rt6_info fib6_null_entry_template = {
280 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
281 .rt6i_protocol = RTPROT_KERNEL,
282 .rt6i_metric = ~(u32)0,
283 .rt6i_ref = ATOMIC_INIT(1),
284 .fib6_type = RTN_UNREACHABLE,
285 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
286};
287
fb0af4c7 288static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
289 .dst = {
290 .__refcnt = ATOMIC_INIT(1),
291 .__use = 1,
2c20cbd7 292 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 293 .error = -ENETUNREACH,
d8d1f30b
CG
294 .input = ip6_pkt_discard,
295 .output = ip6_pkt_discard_out,
1da177e4
LT
296 },
297 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 298 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
299 .rt6i_metric = ~(u32) 0,
300 .rt6i_ref = ATOMIC_INIT(1),
e8478e80 301 .fib6_type = RTN_UNREACHABLE,
1da177e4
LT
302};
303
101367c2
TG
304#ifdef CONFIG_IPV6_MULTIPLE_TABLES
305
fb0af4c7 306static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
307 .dst = {
308 .__refcnt = ATOMIC_INIT(1),
309 .__use = 1,
2c20cbd7 310 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 311 .error = -EACCES,
d8d1f30b
CG
312 .input = ip6_pkt_prohibit,
313 .output = ip6_pkt_prohibit_out,
101367c2
TG
314 },
315 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 316 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
317 .rt6i_metric = ~(u32) 0,
318 .rt6i_ref = ATOMIC_INIT(1),
e8478e80 319 .fib6_type = RTN_PROHIBIT,
101367c2
TG
320};
321
fb0af4c7 322static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
323 .dst = {
324 .__refcnt = ATOMIC_INIT(1),
325 .__use = 1,
2c20cbd7 326 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 327 .error = -EINVAL,
d8d1f30b 328 .input = dst_discard,
ede2059d 329 .output = dst_discard_out,
101367c2
TG
330 },
331 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 332 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
333 .rt6i_metric = ~(u32) 0,
334 .rt6i_ref = ATOMIC_INIT(1),
e8478e80 335 .fib6_type = RTN_BLACKHOLE,
101367c2
TG
336};
337
338#endif
339
ebfa45f0
MKL
340static void rt6_info_init(struct rt6_info *rt)
341{
342 struct dst_entry *dst = &rt->dst;
343
344 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
345 INIT_LIST_HEAD(&rt->rt6i_siblings);
346 INIT_LIST_HEAD(&rt->rt6i_uncached);
d4ead6b3 347 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
ebfa45f0
MKL
348}
349
1da177e4 350/* allocate dst with ip6_dst_ops */
d52d3997
MKL
351static struct rt6_info *__ip6_dst_alloc(struct net *net,
352 struct net_device *dev,
ad706862 353 int flags)
1da177e4 354{
97bab73f 355 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 356 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 357
81eb8447 358 if (rt) {
ebfa45f0 359 rt6_info_init(rt);
81eb8447
WW
360 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
361 }
8104891b 362
cf911662 363 return rt;
1da177e4
LT
364}
365
9ab179d8
DA
366struct rt6_info *ip6_dst_alloc(struct net *net,
367 struct net_device *dev,
368 int flags)
d52d3997 369{
ad706862 370 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
371
372 if (rt) {
373 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
bfd8e5a4 374 if (!rt->rt6i_pcpu) {
587fea74 375 dst_release_immediate(&rt->dst);
d52d3997
MKL
376 return NULL;
377 }
378 }
379
380 return rt;
381}
9ab179d8 382EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 383
1da177e4
LT
384static void ip6_dst_destroy(struct dst_entry *dst)
385{
386 struct rt6_info *rt = (struct rt6_info *)dst;
35732d01 387 struct rt6_exception_bucket *bucket;
3a2232e9 388 struct rt6_info *from = rt->from;
8d0b94af 389 struct inet6_dev *idev;
d4ead6b3 390 struct dst_metrics *m;
1da177e4 391
4b32b5ad 392 dst_destroy_metrics_generic(dst);
87775312 393 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
394 rt6_uncached_list_del(rt);
395
396 idev = rt->rt6i_idev;
38308473 397 if (idev) {
1da177e4
LT
398 rt->rt6i_idev = NULL;
399 in6_dev_put(idev);
1ab1457c 400 }
35732d01
WW
401 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
402 if (bucket) {
403 rt->rt6i_exception_bucket = NULL;
404 kfree(bucket);
405 }
1716a961 406
d4ead6b3
DA
407 m = rt->fib6_metrics;
408 if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
409 kfree(m);
410
3a2232e9
DM
411 rt->from = NULL;
412 dst_release(&from->dst);
b3419363
DM
413}
414
1da177e4
LT
415static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
416 int how)
417{
418 struct rt6_info *rt = (struct rt6_info *)dst;
419 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 420 struct net_device *loopback_dev =
c346dca1 421 dev_net(dev)->loopback_dev;
1da177e4 422
e5645f51
WW
423 if (idev && idev->dev != loopback_dev) {
424 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
425 if (loopback_idev) {
426 rt->rt6i_idev = loopback_idev;
427 in6_dev_put(idev);
97cac082 428 }
1da177e4
LT
429 }
430}
431
5973fb1e
MKL
432static bool __rt6_check_expired(const struct rt6_info *rt)
433{
434 if (rt->rt6i_flags & RTF_EXPIRES)
435 return time_after(jiffies, rt->dst.expires);
436 else
437 return false;
438}
439
a50feda5 440static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 441{
1716a961
G
442 if (rt->rt6i_flags & RTF_EXPIRES) {
443 if (time_after(jiffies, rt->dst.expires))
a50feda5 444 return true;
3a2232e9 445 } else if (rt->from) {
1e2ea8ad 446 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
14895687 447 fib6_check_expired(rt->from);
1716a961 448 }
a50feda5 449 return false;
1da177e4
LT
450}
451
b4bac172
DA
452static struct rt6_info *rt6_multipath_select(const struct net *net,
453 struct rt6_info *match,
52bd4c0c 454 struct flowi6 *fl6, int oif,
b75cc8f9 455 const struct sk_buff *skb,
52bd4c0c 456 int strict)
51ebd318
ND
457{
458 struct rt6_info *sibling, *next_sibling;
51ebd318 459
b673d6cc
JS
460 /* We might have already computed the hash for ICMPv6 errors. In such
461 * case it will always be non-zero. Otherwise now is the time to do it.
462 */
463 if (!fl6->mp_hash)
b4bac172 464 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 465
5e670d84 466 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
3d709f69
IS
467 return match;
468
469 list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
470 rt6i_siblings) {
5e670d84
DA
471 int nh_upper_bound;
472
473 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
474 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
475 continue;
476 if (rt6_score_route(sibling, oif, strict) < 0)
477 break;
478 match = sibling;
479 break;
480 }
481
51ebd318
ND
482 return match;
483}
484
1da177e4 485/*
66f5d6ce 486 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
487 */
488
8ed67789
DL
489static inline struct rt6_info *rt6_device_match(struct net *net,
490 struct rt6_info *rt,
b71d1d42 491 const struct in6_addr *saddr,
1da177e4 492 int oif,
d420895e 493 int flags)
1da177e4
LT
494{
495 struct rt6_info *local = NULL;
496 struct rt6_info *sprt;
497
5e670d84
DA
498 if (!oif && ipv6_addr_any(saddr) &&
499 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
8067bb8c 500 return rt;
dd3abc4e 501
071fb37e 502 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
5e670d84 503 const struct net_device *dev = sprt->fib6_nh.nh_dev;
dd3abc4e 504
5e670d84 505 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
506 continue;
507
dd3abc4e 508 if (oif) {
1da177e4
LT
509 if (dev->ifindex == oif)
510 return sprt;
511 if (dev->flags & IFF_LOOPBACK) {
38308473 512 if (!sprt->rt6i_idev ||
1da177e4 513 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 514 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 515 continue;
17fb0b2b
DA
516 if (local &&
517 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
518 continue;
519 }
520 local = sprt;
521 }
dd3abc4e
YH
522 } else {
523 if (ipv6_chk_addr(net, saddr, dev,
524 flags & RT6_LOOKUP_F_IFACE))
525 return sprt;
1da177e4 526 }
dd3abc4e 527 }
1da177e4 528
dd3abc4e 529 if (oif) {
1da177e4
LT
530 if (local)
531 return local;
532
d420895e 533 if (flags & RT6_LOOKUP_F_IFACE)
421842ed 534 return net->ipv6.fib6_null_entry;
1da177e4 535 }
8067bb8c 536
421842ed 537 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
538}
539
27097255 540#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
541struct __rt6_probe_work {
542 struct work_struct work;
543 struct in6_addr target;
544 struct net_device *dev;
545};
546
547static void rt6_probe_deferred(struct work_struct *w)
548{
549 struct in6_addr mcaddr;
550 struct __rt6_probe_work *work =
551 container_of(w, struct __rt6_probe_work, work);
552
553 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 554 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 555 dev_put(work->dev);
662f5533 556 kfree(work);
c2f17e82
HFS
557}
558
27097255
YH
559static void rt6_probe(struct rt6_info *rt)
560{
990edb42 561 struct __rt6_probe_work *work;
5e670d84 562 const struct in6_addr *nh_gw;
f2c31e32 563 struct neighbour *neigh;
5e670d84
DA
564 struct net_device *dev;
565
27097255
YH
566 /*
567 * Okay, this does not seem to be appropriate
568 * for now, however, we need to check if it
569 * is really so; aka Router Reachability Probing.
570 *
571 * Router Reachability Probe MUST be rate-limited
572 * to no more than one per minute.
573 */
2152caea 574 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 575 return;
5e670d84
DA
576
577 nh_gw = &rt->fib6_nh.nh_gw;
578 dev = rt->fib6_nh.nh_dev;
2152caea 579 rcu_read_lock_bh();
5e670d84 580 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 581 if (neigh) {
8d6c31bf
MKL
582 if (neigh->nud_state & NUD_VALID)
583 goto out;
584
990edb42 585 work = NULL;
2152caea 586 write_lock(&neigh->lock);
990edb42
MKL
587 if (!(neigh->nud_state & NUD_VALID) &&
588 time_after(jiffies,
589 neigh->updated +
590 rt->rt6i_idev->cnf.rtr_probe_interval)) {
591 work = kmalloc(sizeof(*work), GFP_ATOMIC);
592 if (work)
593 __neigh_set_probe_once(neigh);
c2f17e82 594 }
2152caea 595 write_unlock(&neigh->lock);
990edb42
MKL
596 } else {
597 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 598 }
990edb42
MKL
599
600 if (work) {
601 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
602 work->target = *nh_gw;
603 dev_hold(dev);
604 work->dev = dev;
990edb42
MKL
605 schedule_work(&work->work);
606 }
607
8d6c31bf 608out:
2152caea 609 rcu_read_unlock_bh();
27097255
YH
610}
611#else
612static inline void rt6_probe(struct rt6_info *rt)
613{
27097255
YH
614}
615#endif
616
1da177e4 617/*
554cfb7e 618 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 619 */
b6f99a21 620static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 621{
5e670d84
DA
622 const struct net_device *dev = rt->fib6_nh.nh_dev;
623
161980f4 624 if (!oif || dev->ifindex == oif)
554cfb7e 625 return 2;
161980f4
DM
626 if ((dev->flags & IFF_LOOPBACK) &&
627 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
628 return 1;
629 return 0;
554cfb7e 630}
1da177e4 631
afc154e9 632static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 633{
afc154e9 634 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 635 struct neighbour *neigh;
f2c31e32 636
4d0c5911
YH
637 if (rt->rt6i_flags & RTF_NONEXTHOP ||
638 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 639 return RT6_NUD_SUCCEED;
145a3621
YH
640
641 rcu_read_lock_bh();
5e670d84
DA
642 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
643 &rt->fib6_nh.nh_gw);
145a3621
YH
644 if (neigh) {
645 read_lock(&neigh->lock);
554cfb7e 646 if (neigh->nud_state & NUD_VALID)
afc154e9 647 ret = RT6_NUD_SUCCEED;
398bcbeb 648#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 649 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 650 ret = RT6_NUD_SUCCEED;
7e980569
JB
651 else
652 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 653#endif
145a3621 654 read_unlock(&neigh->lock);
afc154e9
HFS
655 } else {
656 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 657 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 658 }
145a3621
YH
659 rcu_read_unlock_bh();
660
a5a81f0b 661 return ret;
1da177e4
LT
662}
663
554cfb7e
YH
664static int rt6_score_route(struct rt6_info *rt, int oif,
665 int strict)
1da177e4 666{
a5a81f0b 667 int m;
1ab1457c 668
4d0c5911 669 m = rt6_check_dev(rt, oif);
77d16f45 670 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 671 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
672#ifdef CONFIG_IPV6_ROUTER_PREF
673 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
674#endif
afc154e9
HFS
675 if (strict & RT6_LOOKUP_F_REACHABLE) {
676 int n = rt6_check_neigh(rt);
677 if (n < 0)
678 return n;
679 }
554cfb7e
YH
680 return m;
681}
682
f11e6659 683static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
684 int *mpri, struct rt6_info *match,
685 bool *do_rr)
554cfb7e 686{
f11e6659 687 int m;
afc154e9 688 bool match_do_rr = false;
35103d11 689 struct inet6_dev *idev = rt->rt6i_idev;
35103d11 690
5e670d84 691 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
692 goto out;
693
14c5206c 694 if (idev->cnf.ignore_routes_with_linkdown &&
5e670d84 695 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 696 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 697 goto out;
f11e6659 698
14895687 699 if (fib6_check_expired(rt))
f11e6659
DM
700 goto out;
701
702 m = rt6_score_route(rt, oif, strict);
7e980569 703 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
704 match_do_rr = true;
705 m = 0; /* lowest valid score */
7e980569 706 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 707 goto out;
afc154e9
HFS
708 }
709
710 if (strict & RT6_LOOKUP_F_REACHABLE)
711 rt6_probe(rt);
f11e6659 712
7e980569 713 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 714 if (m > *mpri) {
afc154e9 715 *do_rr = match_do_rr;
f11e6659
DM
716 *mpri = m;
717 match = rt;
f11e6659 718 }
f11e6659
DM
719out:
720 return match;
721}
722
723static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
8d1040e8 724 struct rt6_info *leaf,
f11e6659 725 struct rt6_info *rr_head,
afc154e9
HFS
726 u32 metric, int oif, int strict,
727 bool *do_rr)
f11e6659 728{
9fbdcfaf 729 struct rt6_info *rt, *match, *cont;
554cfb7e 730 int mpri = -1;
1da177e4 731
f11e6659 732 match = NULL;
9fbdcfaf 733 cont = NULL;
071fb37e 734 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
735 if (rt->rt6i_metric != metric) {
736 cont = rt;
737 break;
738 }
739
740 match = find_match(rt, oif, strict, &mpri, match, do_rr);
741 }
742
66f5d6ce 743 for (rt = leaf; rt && rt != rr_head;
071fb37e 744 rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
745 if (rt->rt6i_metric != metric) {
746 cont = rt;
747 break;
748 }
749
afc154e9 750 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
751 }
752
753 if (match || !cont)
754 return match;
755
071fb37e 756 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
afc154e9 757 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 758
f11e6659
DM
759 return match;
760}
1da177e4 761
8d1040e8
WW
762static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
763 int oif, int strict)
f11e6659 764{
66f5d6ce 765 struct rt6_info *leaf = rcu_dereference(fn->leaf);
f11e6659 766 struct rt6_info *match, *rt0;
afc154e9 767 bool do_rr = false;
17ecf590 768 int key_plen;
1da177e4 769
421842ed
DA
770 if (!leaf || leaf == net->ipv6.fib6_null_entry)
771 return net->ipv6.fib6_null_entry;
8d1040e8 772
66f5d6ce 773 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 774 if (!rt0)
66f5d6ce 775 rt0 = leaf;
1da177e4 776
17ecf590
WW
777 /* Double check to make sure fn is not an intermediate node
778 * and fn->leaf does not points to its child's leaf
779 * (This might happen if all routes under fn are deleted from
780 * the tree and fib6_repair_tree() is called on the node.)
781 */
782 key_plen = rt0->rt6i_dst.plen;
783#ifdef CONFIG_IPV6_SUBTREES
784 if (rt0->rt6i_src.plen)
785 key_plen = rt0->rt6i_src.plen;
786#endif
787 if (fn->fn_bit != key_plen)
421842ed 788 return net->ipv6.fib6_null_entry;
17ecf590 789
8d1040e8 790 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
afc154e9 791 &do_rr);
1da177e4 792
afc154e9 793 if (do_rr) {
071fb37e 794 struct rt6_info *next = rcu_dereference(rt0->rt6_next);
f11e6659 795
554cfb7e 796 /* no entries matched; do round-robin */
f11e6659 797 if (!next || next->rt6i_metric != rt0->rt6i_metric)
8d1040e8 798 next = leaf;
f11e6659 799
66f5d6ce
WW
800 if (next != rt0) {
801 spin_lock_bh(&leaf->rt6i_table->tb6_lock);
802 /* make sure next is not being deleted from the tree */
803 if (next->rt6i_node)
804 rcu_assign_pointer(fn->rr_ptr, next);
805 spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
806 }
1da177e4 807 }
1da177e4 808
421842ed 809 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
810}
811
8b9df265
MKL
812static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
813{
814 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
815}
816
70ceb4f5
YH
817#ifdef CONFIG_IPV6_ROUTE_INFO
818int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 819 const struct in6_addr *gwaddr)
70ceb4f5 820{
c346dca1 821 struct net *net = dev_net(dev);
70ceb4f5
YH
822 struct route_info *rinfo = (struct route_info *) opt;
823 struct in6_addr prefix_buf, *prefix;
824 unsigned int pref;
4bed72e4 825 unsigned long lifetime;
70ceb4f5
YH
826 struct rt6_info *rt;
827
828 if (len < sizeof(struct route_info)) {
829 return -EINVAL;
830 }
831
832 /* Sanity check for prefix_len and length */
833 if (rinfo->length > 3) {
834 return -EINVAL;
835 } else if (rinfo->prefix_len > 128) {
836 return -EINVAL;
837 } else if (rinfo->prefix_len > 64) {
838 if (rinfo->length < 2) {
839 return -EINVAL;
840 }
841 } else if (rinfo->prefix_len > 0) {
842 if (rinfo->length < 1) {
843 return -EINVAL;
844 }
845 }
846
847 pref = rinfo->route_pref;
848 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 849 return -EINVAL;
70ceb4f5 850
4bed72e4 851 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
852
853 if (rinfo->length == 3)
854 prefix = (struct in6_addr *)rinfo->prefix;
855 else {
856 /* this function is safe */
857 ipv6_addr_prefix(&prefix_buf,
858 (struct in6_addr *)rinfo->prefix,
859 rinfo->prefix_len);
860 prefix = &prefix_buf;
861 }
862
f104a567 863 if (rinfo->prefix_len == 0)
afb1d4b5 864 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
865 else
866 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 867 gwaddr, dev);
70ceb4f5
YH
868
869 if (rt && !lifetime) {
afb1d4b5 870 ip6_del_rt(net, rt);
70ceb4f5
YH
871 rt = NULL;
872 }
873
874 if (!rt && lifetime)
830218c1
DA
875 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
876 dev, pref);
70ceb4f5
YH
877 else if (rt)
878 rt->rt6i_flags = RTF_ROUTEINFO |
879 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
880
881 if (rt) {
1716a961 882 if (!addrconf_finite_timeout(lifetime))
14895687 883 fib6_clean_expires(rt);
1716a961 884 else
14895687 885 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 886
94e187c0 887 ip6_rt_put(rt);
70ceb4f5
YH
888 }
889 return 0;
890}
891#endif
892
ae90d867
DA
893/*
894 * Misc support functions
895 */
896
897/* called with rcu_lock held */
898static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
899{
5e670d84 900 struct net_device *dev = rt->fib6_nh.nh_dev;
ae90d867
DA
901
902 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
903 /* for copies of local routes, dst->dev needs to be the
904 * device if it is a master device, the master device if
905 * device is enslaved, and the loopback as the default
906 */
907 if (netif_is_l3_slave(dev) &&
908 !rt6_need_strict(&rt->rt6i_dst.addr))
909 dev = l3mdev_master_dev_rcu(dev);
910 else if (!netif_is_l3_master(dev))
911 dev = dev_net(dev)->loopback_dev;
912 /* last case is netif_is_l3_master(dev) is true in which
913 * case we want dev returned to be dev
914 */
915 }
916
917 return dev;
918}
919
6edb3c96
DA
920static const int fib6_prop[RTN_MAX + 1] = {
921 [RTN_UNSPEC] = 0,
922 [RTN_UNICAST] = 0,
923 [RTN_LOCAL] = 0,
924 [RTN_BROADCAST] = 0,
925 [RTN_ANYCAST] = 0,
926 [RTN_MULTICAST] = 0,
927 [RTN_BLACKHOLE] = -EINVAL,
928 [RTN_UNREACHABLE] = -EHOSTUNREACH,
929 [RTN_PROHIBIT] = -EACCES,
930 [RTN_THROW] = -EAGAIN,
931 [RTN_NAT] = -EINVAL,
932 [RTN_XRESOLVE] = -EINVAL,
933};
934
935static int ip6_rt_type_to_error(u8 fib6_type)
936{
937 return fib6_prop[fib6_type];
938}
939
3b6761d1
DA
940static unsigned short fib6_info_dst_flags(struct rt6_info *rt)
941{
942 unsigned short flags = 0;
943
944 if (rt->dst_nocount)
945 flags |= DST_NOCOUNT;
946 if (rt->dst_nopolicy)
947 flags |= DST_NOPOLICY;
948 if (rt->dst_host)
949 flags |= DST_HOST;
950
951 return flags;
952}
953
6edb3c96
DA
954static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct rt6_info *ort)
955{
956 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
957
958 switch (ort->fib6_type) {
959 case RTN_BLACKHOLE:
960 rt->dst.output = dst_discard_out;
961 rt->dst.input = dst_discard;
962 break;
963 case RTN_PROHIBIT:
964 rt->dst.output = ip6_pkt_prohibit_out;
965 rt->dst.input = ip6_pkt_prohibit;
966 break;
967 case RTN_THROW:
968 case RTN_UNREACHABLE:
969 default:
970 rt->dst.output = ip6_pkt_discard_out;
971 rt->dst.input = ip6_pkt_discard;
972 break;
973 }
974}
975
976static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
977{
3b6761d1
DA
978 rt->dst.flags |= fib6_info_dst_flags(ort);
979
6edb3c96
DA
980 if (ort->rt6i_flags & RTF_REJECT) {
981 ip6_rt_init_dst_reject(rt, ort);
982 return;
983 }
984
985 rt->dst.error = 0;
986 rt->dst.output = ip6_output;
987
988 if (ort->fib6_type == RTN_LOCAL) {
6edb3c96
DA
989 rt->dst.input = ip6_input;
990 } else if (ipv6_addr_type(&ort->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) {
991 rt->dst.input = ip6_mc_input;
992 } else {
993 rt->dst.input = ip6_forward;
994 }
995
996 if (ort->fib6_nh.nh_lwtstate) {
997 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
998 lwtunnel_set_redirect(&rt->dst);
999 }
1000
1001 rt->dst.lastuse = jiffies;
1002}
1003
ae90d867
DA
1004static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
1005{
1006 BUG_ON(from->from);
1007
1008 rt->rt6i_flags &= ~RTF_EXPIRES;
1009 dst_hold(&from->dst);
1010 rt->from = from;
d4ead6b3
DA
1011 dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
1012 if (from->fib6_metrics != &dst_default_metrics) {
1013 rt->dst._metrics |= DST_METRICS_REFCOUNTED;
1014 refcount_inc(&from->fib6_metrics->refcnt);
1015 }
ae90d867
DA
1016}
1017
1018static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
1019{
6edb3c96
DA
1020 ip6_rt_init_dst(rt, ort);
1021
ae90d867 1022 rt->rt6i_dst = ort->rt6i_dst;
ae90d867
DA
1023 rt->rt6i_idev = ort->rt6i_idev;
1024 if (rt->rt6i_idev)
1025 in6_dev_hold(rt->rt6i_idev);
5e670d84 1026 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
ae90d867
DA
1027 rt->rt6i_flags = ort->rt6i_flags;
1028 rt6_set_from(rt, ort);
1029 rt->rt6i_metric = ort->rt6i_metric;
1030#ifdef CONFIG_IPV6_SUBTREES
1031 rt->rt6i_src = ort->rt6i_src;
1032#endif
1033 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
1034 rt->rt6i_table = ort->rt6i_table;
5e670d84 1035 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
ae90d867
DA
1036}
1037
a3c00e46
MKL
1038static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1039 struct in6_addr *saddr)
1040{
66f5d6ce 1041 struct fib6_node *pn, *sn;
a3c00e46
MKL
1042 while (1) {
1043 if (fn->fn_flags & RTN_TL_ROOT)
1044 return NULL;
66f5d6ce
WW
1045 pn = rcu_dereference(fn->parent);
1046 sn = FIB6_SUBTREE(pn);
1047 if (sn && sn != fn)
1048 fn = fib6_lookup(sn, NULL, saddr);
a3c00e46
MKL
1049 else
1050 fn = pn;
1051 if (fn->fn_flags & RTN_RTINFO)
1052 return fn;
1053 }
1054}
c71099ac 1055
d3843fe5
WW
1056static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1057 bool null_fallback)
1058{
1059 struct rt6_info *rt = *prt;
1060
1061 if (dst_hold_safe(&rt->dst))
1062 return true;
1063 if (null_fallback) {
1064 rt = net->ipv6.ip6_null_entry;
1065 dst_hold(&rt->dst);
1066 } else {
1067 rt = NULL;
1068 }
1069 *prt = rt;
1070 return false;
1071}
1072
dec9b0e2
DA
1073/* called with rcu_lock held */
1074static struct rt6_info *ip6_create_rt_rcu(struct rt6_info *rt)
1075{
3b6761d1 1076 unsigned short flags = fib6_info_dst_flags(rt);
dec9b0e2
DA
1077 struct net_device *dev = rt->fib6_nh.nh_dev;
1078 struct rt6_info *nrt;
1079
3b6761d1 1080 nrt = __ip6_dst_alloc(dev_net(dev), dev, flags);
dec9b0e2
DA
1081 if (nrt)
1082 ip6_rt_copy_init(nrt, rt);
1083
1084 return nrt;
1085}
1086
8ed67789
DL
1087static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1088 struct fib6_table *table,
b75cc8f9
DA
1089 struct flowi6 *fl6,
1090 const struct sk_buff *skb,
1091 int flags)
1da177e4 1092{
2b760fcf 1093 struct rt6_info *rt, *rt_cache;
1da177e4 1094 struct fib6_node *fn;
1da177e4 1095
b6cdbc85
DA
1096 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1097 flags &= ~RT6_LOOKUP_F_IFACE;
1098
66f5d6ce 1099 rcu_read_lock();
4c9483b2 1100 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1101restart:
66f5d6ce
WW
1102 rt = rcu_dereference(fn->leaf);
1103 if (!rt) {
421842ed 1104 rt = net->ipv6.fib6_null_entry;
66f5d6ce
WW
1105 } else {
1106 rt = rt6_device_match(net, rt, &fl6->saddr,
1107 fl6->flowi6_oif, flags);
1108 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
b4bac172 1109 rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
b75cc8f9 1110 skb, flags);
66f5d6ce 1111 }
421842ed 1112 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1113 fn = fib6_backtrack(fn, &fl6->saddr);
1114 if (fn)
1115 goto restart;
1116 }
2b760fcf
WW
1117 /* Search through exception table */
1118 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
dec9b0e2 1119 if (rt_cache) {
2b760fcf 1120 rt = rt_cache;
dec9b0e2
DA
1121 if (ip6_hold_safe(net, &rt, true))
1122 dst_use_noref(&rt->dst, jiffies);
1123 } else if (dst_hold_safe(&rt->dst)) {
1124 struct rt6_info *nrt;
1125
1126 nrt = ip6_create_rt_rcu(rt);
1127 dst_release(&rt->dst);
1128 rt = nrt;
1129 } else {
1130 rt = net->ipv6.ip6_null_entry;
1131 dst_hold(&rt->dst);
1132 }
d3843fe5 1133
66f5d6ce 1134 rcu_read_unlock();
b811580d 1135
b65f164d 1136 trace_fib6_table_lookup(net, rt, table, fl6);
b811580d 1137
c71099ac 1138 return rt;
c71099ac
TG
1139}
1140
67ba4152 1141struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1142 const struct sk_buff *skb, int flags)
ea6e574e 1143{
b75cc8f9 1144 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1145}
1146EXPORT_SYMBOL_GPL(ip6_route_lookup);
1147
9acd9f3a 1148struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1149 const struct in6_addr *saddr, int oif,
1150 const struct sk_buff *skb, int strict)
c71099ac 1151{
4c9483b2
DM
1152 struct flowi6 fl6 = {
1153 .flowi6_oif = oif,
1154 .daddr = *daddr,
c71099ac
TG
1155 };
1156 struct dst_entry *dst;
77d16f45 1157 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1158
adaa70bb 1159 if (saddr) {
4c9483b2 1160 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1161 flags |= RT6_LOOKUP_F_HAS_SADDR;
1162 }
1163
b75cc8f9 1164 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1165 if (dst->error == 0)
1166 return (struct rt6_info *) dst;
1167
1168 dst_release(dst);
1169
1da177e4
LT
1170 return NULL;
1171}
7159039a
YH
1172EXPORT_SYMBOL(rt6_lookup);
1173
c71099ac 1174/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1175 * It takes new route entry, the addition fails by any reason the
1176 * route is released.
1177 * Caller must hold dst before calling it.
1da177e4
LT
1178 */
1179
e5fd387a 1180static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301 1181 struct netlink_ext_ack *extack)
1da177e4
LT
1182{
1183 int err;
c71099ac 1184 struct fib6_table *table;
1da177e4 1185
c71099ac 1186 table = rt->rt6i_table;
66f5d6ce 1187 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1188 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1189 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1190
1191 return err;
1192}
1193
afb1d4b5 1194int ip6_ins_rt(struct net *net, struct rt6_info *rt)
40e22e8f 1195{
afb1d4b5 1196 struct nl_info info = { .nl_net = net, };
e715b6d3 1197
1cfb71ee
WW
1198 /* Hold dst to account for the reference from the fib6 tree */
1199 dst_hold(&rt->dst);
d4ead6b3 1200 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1201}
1202
8b9df265
MKL
1203static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1204 const struct in6_addr *daddr,
1205 const struct in6_addr *saddr)
1da177e4 1206{
4832c30d 1207 struct net_device *dev;
1da177e4
LT
1208 struct rt6_info *rt;
1209
1210 /*
1211 * Clone the route.
1212 */
1213
d52d3997 1214 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
3a2232e9 1215 ort = ort->from;
1da177e4 1216
4832c30d
DA
1217 rcu_read_lock();
1218 dev = ip6_rt_get_dev_rcu(ort);
1219 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1220 rcu_read_unlock();
83a09abd
MKL
1221 if (!rt)
1222 return NULL;
1223
1224 ip6_rt_copy_init(rt, ort);
1225 rt->rt6i_flags |= RTF_CACHE;
1226 rt->rt6i_metric = 0;
1227 rt->dst.flags |= DST_HOST;
1228 rt->rt6i_dst.addr = *daddr;
1229 rt->rt6i_dst.plen = 128;
1da177e4 1230
83a09abd
MKL
1231 if (!rt6_is_gw_or_nonexthop(ort)) {
1232 if (ort->rt6i_dst.plen != 128 &&
1233 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1234 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1235#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1236 if (rt->rt6i_src.plen && saddr) {
1237 rt->rt6i_src.addr = *saddr;
1238 rt->rt6i_src.plen = 128;
8b9df265 1239 }
83a09abd 1240#endif
95a9a5ba 1241 }
1da177e4 1242
95a9a5ba
YH
1243 return rt;
1244}
1da177e4 1245
d52d3997
MKL
1246static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1247{
3b6761d1 1248 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1249 struct net_device *dev;
d52d3997
MKL
1250 struct rt6_info *pcpu_rt;
1251
4832c30d
DA
1252 rcu_read_lock();
1253 dev = ip6_rt_get_dev_rcu(rt);
3b6761d1 1254 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1255 rcu_read_unlock();
d52d3997
MKL
1256 if (!pcpu_rt)
1257 return NULL;
1258 ip6_rt_copy_init(pcpu_rt, rt);
1259 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1260 pcpu_rt->rt6i_flags |= RTF_PCPU;
1261 return pcpu_rt;
1262}
1263
66f5d6ce 1264/* It should be called with rcu_read_lock() acquired */
d52d3997
MKL
1265static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1266{
a73e4195 1267 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1268
1269 p = this_cpu_ptr(rt->rt6i_pcpu);
1270 pcpu_rt = *p;
1271
d4ead6b3
DA
1272 if (pcpu_rt)
1273 ip6_hold_safe(NULL, &pcpu_rt, false);
d3843fe5 1274
a73e4195
MKL
1275 return pcpu_rt;
1276}
1277
afb1d4b5
DA
1278static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1279 struct rt6_info *rt)
a73e4195
MKL
1280{
1281 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1282
1283 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1284 if (!pcpu_rt) {
9c7370a1
MKL
1285 dst_hold(&net->ipv6.ip6_null_entry->dst);
1286 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1287 }
1288
a94b9367
WW
1289 dst_hold(&pcpu_rt->dst);
1290 p = this_cpu_ptr(rt->rt6i_pcpu);
1291 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1292 BUG_ON(prev);
a94b9367 1293
d52d3997
MKL
1294 return pcpu_rt;
1295}
1296
35732d01
WW
1297/* exception hash table implementation
1298 */
1299static DEFINE_SPINLOCK(rt6_exception_lock);
1300
1301/* Remove rt6_ex from hash table and free the memory
1302 * Caller must hold rt6_exception_lock
1303 */
1304static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1305 struct rt6_exception *rt6_ex)
1306{
b2427e67 1307 struct net *net;
81eb8447 1308
35732d01
WW
1309 if (!bucket || !rt6_ex)
1310 return;
b2427e67
CIK
1311
1312 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01
WW
1313 rt6_ex->rt6i->rt6i_node = NULL;
1314 hlist_del_rcu(&rt6_ex->hlist);
1315 rt6_release(rt6_ex->rt6i);
1316 kfree_rcu(rt6_ex, rcu);
1317 WARN_ON_ONCE(!bucket->depth);
1318 bucket->depth--;
81eb8447 1319 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1320}
1321
1322/* Remove oldest rt6_ex in bucket and free the memory
1323 * Caller must hold rt6_exception_lock
1324 */
1325static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1326{
1327 struct rt6_exception *rt6_ex, *oldest = NULL;
1328
1329 if (!bucket)
1330 return;
1331
1332 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1333 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1334 oldest = rt6_ex;
1335 }
1336 rt6_remove_exception(bucket, oldest);
1337}
1338
1339static u32 rt6_exception_hash(const struct in6_addr *dst,
1340 const struct in6_addr *src)
1341{
1342 static u32 seed __read_mostly;
1343 u32 val;
1344
1345 net_get_random_once(&seed, sizeof(seed));
1346 val = jhash(dst, sizeof(*dst), seed);
1347
1348#ifdef CONFIG_IPV6_SUBTREES
1349 if (src)
1350 val = jhash(src, sizeof(*src), val);
1351#endif
1352 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1353}
1354
1355/* Helper function to find the cached rt in the hash table
1356 * and update bucket pointer to point to the bucket for this
1357 * (daddr, saddr) pair
1358 * Caller must hold rt6_exception_lock
1359 */
1360static struct rt6_exception *
1361__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1362 const struct in6_addr *daddr,
1363 const struct in6_addr *saddr)
1364{
1365 struct rt6_exception *rt6_ex;
1366 u32 hval;
1367
1368 if (!(*bucket) || !daddr)
1369 return NULL;
1370
1371 hval = rt6_exception_hash(daddr, saddr);
1372 *bucket += hval;
1373
1374 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1375 struct rt6_info *rt6 = rt6_ex->rt6i;
1376 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1377
1378#ifdef CONFIG_IPV6_SUBTREES
1379 if (matched && saddr)
1380 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1381#endif
1382 if (matched)
1383 return rt6_ex;
1384 }
1385 return NULL;
1386}
1387
1388/* Helper function to find the cached rt in the hash table
1389 * and update bucket pointer to point to the bucket for this
1390 * (daddr, saddr) pair
1391 * Caller must hold rcu_read_lock()
1392 */
1393static struct rt6_exception *
1394__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1395 const struct in6_addr *daddr,
1396 const struct in6_addr *saddr)
1397{
1398 struct rt6_exception *rt6_ex;
1399 u32 hval;
1400
1401 WARN_ON_ONCE(!rcu_read_lock_held());
1402
1403 if (!(*bucket) || !daddr)
1404 return NULL;
1405
1406 hval = rt6_exception_hash(daddr, saddr);
1407 *bucket += hval;
1408
1409 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1410 struct rt6_info *rt6 = rt6_ex->rt6i;
1411 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1412
1413#ifdef CONFIG_IPV6_SUBTREES
1414 if (matched && saddr)
1415 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1416#endif
1417 if (matched)
1418 return rt6_ex;
1419 }
1420 return NULL;
1421}
1422
d4ead6b3
DA
1423static unsigned int fib6_mtu(const struct rt6_info *rt)
1424{
1425 unsigned int mtu;
1426
1427 mtu = rt->fib6_pmtu ? : rt->rt6i_idev->cnf.mtu6;
1428 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1429
1430 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1431}
1432
35732d01
WW
1433static int rt6_insert_exception(struct rt6_info *nrt,
1434 struct rt6_info *ort)
1435{
5e670d84 1436 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1437 struct rt6_exception_bucket *bucket;
1438 struct in6_addr *src_key = NULL;
1439 struct rt6_exception *rt6_ex;
1440 int err = 0;
1441
1442 /* ort can't be a cache or pcpu route */
1443 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
3a2232e9 1444 ort = ort->from;
35732d01
WW
1445 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1446
1447 spin_lock_bh(&rt6_exception_lock);
1448
1449 if (ort->exception_bucket_flushed) {
1450 err = -EINVAL;
1451 goto out;
1452 }
1453
1454 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1455 lockdep_is_held(&rt6_exception_lock));
1456 if (!bucket) {
1457 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1458 GFP_ATOMIC);
1459 if (!bucket) {
1460 err = -ENOMEM;
1461 goto out;
1462 }
1463 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1464 }
1465
1466#ifdef CONFIG_IPV6_SUBTREES
1467 /* rt6i_src.plen != 0 indicates ort is in subtree
1468 * and exception table is indexed by a hash of
1469 * both rt6i_dst and rt6i_src.
1470 * Otherwise, the exception table is indexed by
1471 * a hash of only rt6i_dst.
1472 */
1473 if (ort->rt6i_src.plen)
1474 src_key = &nrt->rt6i_src.addr;
1475#endif
60006a48
WW
1476
1477 /* Update rt6i_prefsrc as it could be changed
1478 * in rt6_remove_prefsrc()
1479 */
1480 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
f5bbe7ee
WW
1481 /* rt6_mtu_change() might lower mtu on ort.
1482 * Only insert this exception route if its mtu
1483 * is less than ort's mtu value.
1484 */
d4ead6b3 1485 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1486 err = -EINVAL;
1487 goto out;
1488 }
60006a48 1489
35732d01
WW
1490 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1491 src_key);
1492 if (rt6_ex)
1493 rt6_remove_exception(bucket, rt6_ex);
1494
1495 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1496 if (!rt6_ex) {
1497 err = -ENOMEM;
1498 goto out;
1499 }
1500 rt6_ex->rt6i = nrt;
1501 rt6_ex->stamp = jiffies;
1502 atomic_inc(&nrt->rt6i_ref);
1503 nrt->rt6i_node = ort->rt6i_node;
1504 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1505 bucket->depth++;
81eb8447 1506 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1507
1508 if (bucket->depth > FIB6_MAX_DEPTH)
1509 rt6_exception_remove_oldest(bucket);
1510
1511out:
1512 spin_unlock_bh(&rt6_exception_lock);
1513
1514 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1515 if (!err) {
922c2ac8 1516 spin_lock_bh(&ort->rt6i_table->tb6_lock);
7aef6859 1517 fib6_update_sernum(net, ort);
922c2ac8 1518 spin_unlock_bh(&ort->rt6i_table->tb6_lock);
b886d5f2
PA
1519 fib6_force_start_gc(net);
1520 }
35732d01
WW
1521
1522 return err;
1523}
1524
1525void rt6_flush_exceptions(struct rt6_info *rt)
1526{
1527 struct rt6_exception_bucket *bucket;
1528 struct rt6_exception *rt6_ex;
1529 struct hlist_node *tmp;
1530 int i;
1531
1532 spin_lock_bh(&rt6_exception_lock);
1533 /* Prevent rt6_insert_exception() to recreate the bucket list */
1534 rt->exception_bucket_flushed = 1;
1535
1536 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1537 lockdep_is_held(&rt6_exception_lock));
1538 if (!bucket)
1539 goto out;
1540
1541 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1542 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1543 rt6_remove_exception(bucket, rt6_ex);
1544 WARN_ON_ONCE(bucket->depth);
1545 bucket++;
1546 }
1547
1548out:
1549 spin_unlock_bh(&rt6_exception_lock);
1550}
1551
1552/* Find cached rt in the hash table inside passed in rt
1553 * Caller has to hold rcu_read_lock()
1554 */
1555static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1556 struct in6_addr *daddr,
1557 struct in6_addr *saddr)
1558{
1559 struct rt6_exception_bucket *bucket;
1560 struct in6_addr *src_key = NULL;
1561 struct rt6_exception *rt6_ex;
1562 struct rt6_info *res = NULL;
1563
1564 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1565
1566#ifdef CONFIG_IPV6_SUBTREES
1567 /* rt6i_src.plen != 0 indicates rt is in subtree
1568 * and exception table is indexed by a hash of
1569 * both rt6i_dst and rt6i_src.
1570 * Otherwise, the exception table is indexed by
1571 * a hash of only rt6i_dst.
1572 */
1573 if (rt->rt6i_src.plen)
1574 src_key = saddr;
1575#endif
1576 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1577
1578 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1579 res = rt6_ex->rt6i;
1580
1581 return res;
1582}
1583
1584/* Remove the passed in cached rt from the hash table that contains it */
1585int rt6_remove_exception_rt(struct rt6_info *rt)
1586{
35732d01 1587 struct rt6_exception_bucket *bucket;
3a2232e9 1588 struct rt6_info *from = rt->from;
35732d01
WW
1589 struct in6_addr *src_key = NULL;
1590 struct rt6_exception *rt6_ex;
1591 int err;
1592
1593 if (!from ||
442d713b 1594 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1595 return -EINVAL;
1596
1597 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1598 return -ENOENT;
1599
1600 spin_lock_bh(&rt6_exception_lock);
1601 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1602 lockdep_is_held(&rt6_exception_lock));
1603#ifdef CONFIG_IPV6_SUBTREES
1604 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1605 * and exception table is indexed by a hash of
1606 * both rt6i_dst and rt6i_src.
1607 * Otherwise, the exception table is indexed by
1608 * a hash of only rt6i_dst.
1609 */
1610 if (from->rt6i_src.plen)
1611 src_key = &rt->rt6i_src.addr;
1612#endif
1613 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1614 &rt->rt6i_dst.addr,
1615 src_key);
1616 if (rt6_ex) {
1617 rt6_remove_exception(bucket, rt6_ex);
1618 err = 0;
1619 } else {
1620 err = -ENOENT;
1621 }
1622
1623 spin_unlock_bh(&rt6_exception_lock);
1624 return err;
1625}
1626
1627/* Find rt6_ex which contains the passed in rt cache and
1628 * refresh its stamp
1629 */
1630static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1631{
35732d01 1632 struct rt6_exception_bucket *bucket;
3a2232e9 1633 struct rt6_info *from = rt->from;
35732d01
WW
1634 struct in6_addr *src_key = NULL;
1635 struct rt6_exception *rt6_ex;
1636
1637 if (!from ||
442d713b 1638 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1639 return;
1640
1641 rcu_read_lock();
1642 bucket = rcu_dereference(from->rt6i_exception_bucket);
1643
1644#ifdef CONFIG_IPV6_SUBTREES
1645 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1646 * and exception table is indexed by a hash of
1647 * both rt6i_dst and rt6i_src.
1648 * Otherwise, the exception table is indexed by
1649 * a hash of only rt6i_dst.
1650 */
1651 if (from->rt6i_src.plen)
1652 src_key = &rt->rt6i_src.addr;
1653#endif
1654 rt6_ex = __rt6_find_exception_rcu(&bucket,
1655 &rt->rt6i_dst.addr,
1656 src_key);
1657 if (rt6_ex)
1658 rt6_ex->stamp = jiffies;
1659
1660 rcu_read_unlock();
1661}
1662
60006a48
WW
1663static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1664{
1665 struct rt6_exception_bucket *bucket;
1666 struct rt6_exception *rt6_ex;
1667 int i;
1668
1669 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1670 lockdep_is_held(&rt6_exception_lock));
1671
1672 if (bucket) {
1673 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1674 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1675 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1676 }
1677 bucket++;
1678 }
1679 }
1680}
1681
e9fa1495
SB
1682static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1683 struct rt6_info *rt, int mtu)
1684{
1685 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1686 * lowest MTU in the path: always allow updating the route PMTU to
1687 * reflect PMTU decreases.
1688 *
1689 * If the new MTU is higher, and the route PMTU is equal to the local
1690 * MTU, this means the old MTU is the lowest in the path, so allow
1691 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1692 * handle this.
1693 */
1694
1695 if (dst_mtu(&rt->dst) >= mtu)
1696 return true;
1697
1698 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1699 return true;
1700
1701 return false;
1702}
1703
1704static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1705 struct rt6_info *rt, int mtu)
f5bbe7ee
WW
1706{
1707 struct rt6_exception_bucket *bucket;
1708 struct rt6_exception *rt6_ex;
1709 int i;
1710
1711 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1712 lockdep_is_held(&rt6_exception_lock));
1713
e9fa1495
SB
1714 if (!bucket)
1715 return;
1716
1717 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1718 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1719 struct rt6_info *entry = rt6_ex->rt6i;
1720
1721 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1722 * route), the metrics of its rt->from have already
e9fa1495
SB
1723 * been updated.
1724 */
d4ead6b3 1725 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1726 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1727 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1728 }
e9fa1495 1729 bucket++;
f5bbe7ee
WW
1730 }
1731}
1732
b16cb459
WW
1733#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1734
1735static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1736 struct in6_addr *gateway)
1737{
1738 struct rt6_exception_bucket *bucket;
1739 struct rt6_exception *rt6_ex;
1740 struct hlist_node *tmp;
1741 int i;
1742
1743 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1744 return;
1745
1746 spin_lock_bh(&rt6_exception_lock);
1747 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1748 lockdep_is_held(&rt6_exception_lock));
1749
1750 if (bucket) {
1751 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1752 hlist_for_each_entry_safe(rt6_ex, tmp,
1753 &bucket->chain, hlist) {
1754 struct rt6_info *entry = rt6_ex->rt6i;
1755
1756 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1757 RTF_CACHE_GATEWAY &&
1758 ipv6_addr_equal(gateway,
1759 &entry->rt6i_gateway)) {
1760 rt6_remove_exception(bucket, rt6_ex);
1761 }
1762 }
1763 bucket++;
1764 }
1765 }
1766
1767 spin_unlock_bh(&rt6_exception_lock);
1768}
1769
c757faa8
WW
1770static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1771 struct rt6_exception *rt6_ex,
1772 struct fib6_gc_args *gc_args,
1773 unsigned long now)
1774{
1775 struct rt6_info *rt = rt6_ex->rt6i;
1776
1859bac0
PA
1777 /* we are pruning and obsoleting aged-out and non gateway exceptions
1778 * even if others have still references to them, so that on next
1779 * dst_check() such references can be dropped.
1780 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1781 * expired, independently from their aging, as per RFC 8201 section 4
1782 */
31afeb42
WW
1783 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1784 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1785 RT6_TRACE("aging clone %p\n", rt);
1786 rt6_remove_exception(bucket, rt6_ex);
1787 return;
1788 }
1789 } else if (time_after(jiffies, rt->dst.expires)) {
1790 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1791 rt6_remove_exception(bucket, rt6_ex);
1792 return;
31afeb42
WW
1793 }
1794
1795 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1796 struct neighbour *neigh;
1797 __u8 neigh_flags = 0;
1798
1bfa26ff
ED
1799 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1800 if (neigh)
c757faa8 1801 neigh_flags = neigh->flags;
1bfa26ff 1802
c757faa8
WW
1803 if (!(neigh_flags & NTF_ROUTER)) {
1804 RT6_TRACE("purging route %p via non-router but gateway\n",
1805 rt);
1806 rt6_remove_exception(bucket, rt6_ex);
1807 return;
1808 }
1809 }
31afeb42 1810
c757faa8
WW
1811 gc_args->more++;
1812}
1813
1814void rt6_age_exceptions(struct rt6_info *rt,
1815 struct fib6_gc_args *gc_args,
1816 unsigned long now)
1817{
1818 struct rt6_exception_bucket *bucket;
1819 struct rt6_exception *rt6_ex;
1820 struct hlist_node *tmp;
1821 int i;
1822
1823 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1824 return;
1825
1bfa26ff
ED
1826 rcu_read_lock_bh();
1827 spin_lock(&rt6_exception_lock);
c757faa8
WW
1828 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1829 lockdep_is_held(&rt6_exception_lock));
1830
1831 if (bucket) {
1832 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1833 hlist_for_each_entry_safe(rt6_ex, tmp,
1834 &bucket->chain, hlist) {
1835 rt6_age_examine_exception(bucket, rt6_ex,
1836 gc_args, now);
1837 }
1838 bucket++;
1839 }
1840 }
1bfa26ff
ED
1841 spin_unlock(&rt6_exception_lock);
1842 rcu_read_unlock_bh();
c757faa8
WW
1843}
1844
9ff74384 1845struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
b75cc8f9
DA
1846 int oif, struct flowi6 *fl6,
1847 const struct sk_buff *skb, int flags)
1da177e4 1848{
367efcb9 1849 struct fib6_node *fn, *saved_fn;
2b760fcf 1850 struct rt6_info *rt, *rt_cache;
c71099ac 1851 int strict = 0;
1da177e4 1852
77d16f45 1853 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1854 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1855 if (net->ipv6.devconf_all->forwarding == 0)
1856 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1857
66f5d6ce 1858 rcu_read_lock();
1da177e4 1859
4c9483b2 1860 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1861 saved_fn = fn;
1da177e4 1862
ca254490
DA
1863 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1864 oif = 0;
1865
a3c00e46 1866redo_rt6_select:
8d1040e8 1867 rt = rt6_select(net, fn, oif, strict);
52bd4c0c 1868 if (rt->rt6i_nsiblings)
b4bac172 1869 rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
421842ed 1870 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1871 fn = fib6_backtrack(fn, &fl6->saddr);
1872 if (fn)
1873 goto redo_rt6_select;
367efcb9
MKL
1874 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1875 /* also consider unreachable route */
1876 strict &= ~RT6_LOOKUP_F_REACHABLE;
1877 fn = saved_fn;
1878 goto redo_rt6_select;
367efcb9 1879 }
a3c00e46
MKL
1880 }
1881
2b760fcf
WW
1882 /*Search through exception table */
1883 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1884 if (rt_cache)
1885 rt = rt_cache;
fb9de91e 1886
421842ed
DA
1887 if (rt == net->ipv6.fib6_null_entry) {
1888 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1889 rcu_read_unlock();
d3843fe5 1890 dst_hold(&rt->dst);
b65f164d 1891 trace_fib6_table_lookup(net, rt, table, fl6);
d3843fe5
WW
1892 return rt;
1893 } else if (rt->rt6i_flags & RTF_CACHE) {
d4ead6b3 1894 if (ip6_hold_safe(net, &rt, true))
d3843fe5 1895 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1896
66f5d6ce 1897 rcu_read_unlock();
b65f164d 1898 trace_fib6_table_lookup(net, rt, table, fl6);
d52d3997 1899 return rt;
3da59bd9
MKL
1900 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1901 !(rt->rt6i_flags & RTF_GATEWAY))) {
1902 /* Create a RTF_CACHE clone which will not be
1903 * owned by the fib6 tree. It is for the special case where
1904 * the daddr in the skb during the neighbor look-up is different
1905 * from the fl6->daddr used to look-up route here.
1906 */
1907
1908 struct rt6_info *uncached_rt;
1909
d3843fe5
WW
1910 if (ip6_hold_safe(net, &rt, true)) {
1911 dst_use_noref(&rt->dst, jiffies);
1912 } else {
66f5d6ce 1913 rcu_read_unlock();
d3843fe5
WW
1914 uncached_rt = rt;
1915 goto uncached_rt_out;
1916 }
66f5d6ce 1917 rcu_read_unlock();
d52d3997 1918
3da59bd9
MKL
1919 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1920 dst_release(&rt->dst);
c71099ac 1921
1cfb71ee
WW
1922 if (uncached_rt) {
1923 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1924 * No need for another dst_hold()
1925 */
8d0b94af 1926 rt6_uncached_list_add(uncached_rt);
81eb8447 1927 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1928 } else {
3da59bd9 1929 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1930 dst_hold(&uncached_rt->dst);
1931 }
b811580d 1932
d3843fe5 1933uncached_rt_out:
b65f164d 1934 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
3da59bd9 1935 return uncached_rt;
3da59bd9 1936
d52d3997
MKL
1937 } else {
1938 /* Get a percpu copy */
1939
1940 struct rt6_info *pcpu_rt;
1941
d3843fe5 1942 dst_use_noref(&rt->dst, jiffies);
951f788a 1943 local_bh_disable();
d52d3997 1944 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1945
951f788a 1946 if (!pcpu_rt) {
a94b9367
WW
1947 /* atomic_inc_not_zero() is needed when using rcu */
1948 if (atomic_inc_not_zero(&rt->rt6i_ref)) {
951f788a 1949 /* No dst_hold() on rt is needed because grabbing
a94b9367
WW
1950 * rt->rt6i_ref makes sure rt can't be released.
1951 */
afb1d4b5 1952 pcpu_rt = rt6_make_pcpu_route(net, rt);
a94b9367
WW
1953 rt6_release(rt);
1954 } else {
1955 /* rt is already removed from tree */
a94b9367
WW
1956 pcpu_rt = net->ipv6.ip6_null_entry;
1957 dst_hold(&pcpu_rt->dst);
1958 }
9c7370a1 1959 }
951f788a
ED
1960 local_bh_enable();
1961 rcu_read_unlock();
b65f164d 1962 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
d52d3997
MKL
1963 return pcpu_rt;
1964 }
1da177e4 1965}
9ff74384 1966EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1967
b75cc8f9
DA
1968static struct rt6_info *ip6_pol_route_input(struct net *net,
1969 struct fib6_table *table,
1970 struct flowi6 *fl6,
1971 const struct sk_buff *skb,
1972 int flags)
4acad72d 1973{
b75cc8f9 1974 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1975}
1976
d409b847
MB
1977struct dst_entry *ip6_route_input_lookup(struct net *net,
1978 struct net_device *dev,
b75cc8f9
DA
1979 struct flowi6 *fl6,
1980 const struct sk_buff *skb,
1981 int flags)
72331bc0
SL
1982{
1983 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1984 flags |= RT6_LOOKUP_F_IFACE;
1985
b75cc8f9 1986 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1987}
d409b847 1988EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1989
23aebdac 1990static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1991 struct flow_keys *keys,
1992 struct flow_keys *flkeys)
23aebdac
JS
1993{
1994 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1995 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1996 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1997 const struct ipv6hdr *inner_iph;
1998 const struct icmp6hdr *icmph;
1999 struct ipv6hdr _inner_iph;
2000
2001 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2002 goto out;
2003
2004 icmph = icmp6_hdr(skb);
2005 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
2006 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
2007 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
2008 icmph->icmp6_type != ICMPV6_PARAMPROB)
2009 goto out;
2010
2011 inner_iph = skb_header_pointer(skb,
2012 skb_transport_offset(skb) + sizeof(*icmph),
2013 sizeof(_inner_iph), &_inner_iph);
2014 if (!inner_iph)
2015 goto out;
2016
2017 key_iph = inner_iph;
5e5d6fed 2018 _flkeys = NULL;
23aebdac 2019out:
5e5d6fed
RP
2020 if (_flkeys) {
2021 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2022 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2023 keys->tags.flow_label = _flkeys->tags.flow_label;
2024 keys->basic.ip_proto = _flkeys->basic.ip_proto;
2025 } else {
2026 keys->addrs.v6addrs.src = key_iph->saddr;
2027 keys->addrs.v6addrs.dst = key_iph->daddr;
2028 keys->tags.flow_label = ip6_flowinfo(key_iph);
2029 keys->basic.ip_proto = key_iph->nexthdr;
2030 }
23aebdac
JS
2031}
2032
2033/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
2034u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2035 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
2036{
2037 struct flow_keys hash_keys;
9a2a537a 2038 u32 mhash;
23aebdac 2039
bbfa047a 2040 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
2041 case 0:
2042 memset(&hash_keys, 0, sizeof(hash_keys));
2043 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2044 if (skb) {
2045 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2046 } else {
2047 hash_keys.addrs.v6addrs.src = fl6->saddr;
2048 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2049 hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
2050 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2051 }
2052 break;
2053 case 1:
2054 if (skb) {
2055 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2056 struct flow_keys keys;
2057
2058 /* short-circuit if we already have L4 hash present */
2059 if (skb->l4_hash)
2060 return skb_get_hash_raw(skb) >> 1;
2061
2062 memset(&hash_keys, 0, sizeof(hash_keys));
2063
2064 if (!flkeys) {
2065 skb_flow_dissect_flow_keys(skb, &keys, flag);
2066 flkeys = &keys;
2067 }
2068 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2069 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2070 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2071 hash_keys.ports.src = flkeys->ports.src;
2072 hash_keys.ports.dst = flkeys->ports.dst;
2073 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2074 } else {
2075 memset(&hash_keys, 0, sizeof(hash_keys));
2076 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2077 hash_keys.addrs.v6addrs.src = fl6->saddr;
2078 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2079 hash_keys.ports.src = fl6->fl6_sport;
2080 hash_keys.ports.dst = fl6->fl6_dport;
2081 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2082 }
2083 break;
23aebdac 2084 }
9a2a537a 2085 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2086
9a2a537a 2087 return mhash >> 1;
23aebdac
JS
2088}
2089
c71099ac
TG
2090void ip6_route_input(struct sk_buff *skb)
2091{
b71d1d42 2092 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2093 struct net *net = dev_net(skb->dev);
adaa70bb 2094 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2095 struct ip_tunnel_info *tun_info;
4c9483b2 2096 struct flowi6 fl6 = {
e0d56fdd 2097 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2098 .daddr = iph->daddr,
2099 .saddr = iph->saddr,
6502ca52 2100 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2101 .flowi6_mark = skb->mark,
2102 .flowi6_proto = iph->nexthdr,
c71099ac 2103 };
5e5d6fed 2104 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2105
904af04d 2106 tun_info = skb_tunnel_info(skb);
46fa062a 2107 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2108 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2109
2110 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2111 flkeys = &_flkeys;
2112
23aebdac 2113 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2114 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2115 skb_dst_drop(skb);
b75cc8f9
DA
2116 skb_dst_set(skb,
2117 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2118}
2119
b75cc8f9
DA
2120static struct rt6_info *ip6_pol_route_output(struct net *net,
2121 struct fib6_table *table,
2122 struct flowi6 *fl6,
2123 const struct sk_buff *skb,
2124 int flags)
1da177e4 2125{
b75cc8f9 2126 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2127}
2128
6f21c96a
PA
2129struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2130 struct flowi6 *fl6, int flags)
c71099ac 2131{
d46a9d67 2132 bool any_src;
c71099ac 2133
4c1feac5
DA
2134 if (rt6_need_strict(&fl6->daddr)) {
2135 struct dst_entry *dst;
2136
2137 dst = l3mdev_link_scope_lookup(net, fl6);
2138 if (dst)
2139 return dst;
2140 }
ca254490 2141
1fb9489b 2142 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2143
d46a9d67 2144 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2145 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2146 (fl6->flowi6_oif && any_src))
77d16f45 2147 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2148
d46a9d67 2149 if (!any_src)
adaa70bb 2150 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2151 else if (sk)
2152 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2153
b75cc8f9 2154 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2155}
6f21c96a 2156EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2157
2774c131 2158struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2159{
5c1e6aa3 2160 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2161 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2162 struct dst_entry *new = NULL;
2163
1dbe3252 2164 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2165 DST_OBSOLETE_DEAD, 0);
14e50e57 2166 if (rt) {
0a1f5962 2167 rt6_info_init(rt);
81eb8447 2168 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2169
0a1f5962 2170 new = &rt->dst;
14e50e57 2171 new->__use = 1;
352e512c 2172 new->input = dst_discard;
ede2059d 2173 new->output = dst_discard_out;
14e50e57 2174
0a1f5962 2175 dst_copy_metrics(new, &ort->dst);
14e50e57 2176
1dbe3252 2177 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2178 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2179 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2180 rt->rt6i_metric = 0;
2181
2182 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2183#ifdef CONFIG_IPV6_SUBTREES
2184 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2185#endif
14e50e57
DM
2186 }
2187
69ead7af
DM
2188 dst_release(dst_orig);
2189 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2190}
14e50e57 2191
1da177e4
LT
2192/*
2193 * Destination cache support functions
2194 */
2195
3da59bd9
MKL
2196static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
2197{
36143645 2198 u32 rt_cookie = 0;
c5cff856
WW
2199
2200 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
3da59bd9
MKL
2201 return NULL;
2202
2203 if (rt6_check_expired(rt))
2204 return NULL;
2205
2206 return &rt->dst;
2207}
2208
2209static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
2210{
5973fb1e
MKL
2211 if (!__rt6_check_expired(rt) &&
2212 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3a2232e9 2213 rt6_check(rt->from, cookie))
3da59bd9
MKL
2214 return &rt->dst;
2215 else
2216 return NULL;
2217}
2218
1da177e4
LT
2219static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2220{
2221 struct rt6_info *rt;
2222
2223 rt = (struct rt6_info *) dst;
2224
6f3118b5
ND
2225 /* All IPV6 dsts are created with ->obsolete set to the value
2226 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2227 * into this function always.
2228 */
e3bc10bd 2229
02bcf4e0 2230 if (rt->rt6i_flags & RTF_PCPU ||
3a2232e9 2231 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
3da59bd9
MKL
2232 return rt6_dst_from_check(rt, cookie);
2233 else
2234 return rt6_check(rt, cookie);
1da177e4
LT
2235}
2236
2237static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2238{
2239 struct rt6_info *rt = (struct rt6_info *) dst;
2240
2241 if (rt) {
54c1a859
YH
2242 if (rt->rt6i_flags & RTF_CACHE) {
2243 if (rt6_check_expired(rt)) {
afb1d4b5 2244 ip6_del_rt(dev_net(dst->dev), rt);
54c1a859
YH
2245 dst = NULL;
2246 }
2247 } else {
1da177e4 2248 dst_release(dst);
54c1a859
YH
2249 dst = NULL;
2250 }
1da177e4 2251 }
54c1a859 2252 return dst;
1da177e4
LT
2253}
2254
2255static void ip6_link_failure(struct sk_buff *skb)
2256{
2257 struct rt6_info *rt;
2258
3ffe533c 2259 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2260
adf30907 2261 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2262 if (rt) {
1eb4f758 2263 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0 2264 if (dst_hold_safe(&rt->dst))
afb1d4b5 2265 ip6_del_rt(dev_net(rt->dst.dev), rt);
c5cff856
WW
2266 } else {
2267 struct fib6_node *fn;
2268
2269 rcu_read_lock();
2270 fn = rcu_dereference(rt->rt6i_node);
2271 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2272 fn->fn_sernum = -1;
2273 rcu_read_unlock();
1eb4f758 2274 }
1da177e4
LT
2275 }
2276}
2277
45e4fd26
MKL
2278static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2279{
2280 struct net *net = dev_net(rt->dst.dev);
2281
d4ead6b3 2282 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2283 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2284 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2285}
2286
0d3f6d29
MKL
2287static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2288{
2289 return !(rt->rt6i_flags & RTF_CACHE) &&
4e587ea7
WW
2290 (rt->rt6i_flags & RTF_PCPU ||
2291 rcu_access_pointer(rt->rt6i_node));
0d3f6d29
MKL
2292}
2293
45e4fd26
MKL
2294static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2295 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2296{
0dec879f 2297 const struct in6_addr *daddr, *saddr;
67ba4152 2298 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2299
45e4fd26
MKL
2300 if (rt6->rt6i_flags & RTF_LOCAL)
2301 return;
81aded24 2302
19bda36c
XL
2303 if (dst_metric_locked(dst, RTAX_MTU))
2304 return;
2305
0dec879f
JA
2306 if (iph) {
2307 daddr = &iph->daddr;
2308 saddr = &iph->saddr;
2309 } else if (sk) {
2310 daddr = &sk->sk_v6_daddr;
2311 saddr = &inet6_sk(sk)->saddr;
2312 } else {
2313 daddr = NULL;
2314 saddr = NULL;
2315 }
2316 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2317 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2318 if (mtu >= dst_mtu(dst))
2319 return;
9d289715 2320
0d3f6d29 2321 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2322 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2323 /* update rt6_ex->stamp for cache */
2324 if (rt6->rt6i_flags & RTF_CACHE)
2325 rt6_update_exception_stamp_rt(rt6);
0dec879f 2326 } else if (daddr) {
45e4fd26
MKL
2327 struct rt6_info *nrt6;
2328
d4ead6b3 2329 nrt6 = ip6_rt_cache_alloc(rt6->from, daddr, saddr);
45e4fd26
MKL
2330 if (nrt6) {
2331 rt6_do_update_pmtu(nrt6, mtu);
d4ead6b3 2332 if (rt6_insert_exception(nrt6, rt6->from))
2b760fcf 2333 dst_release_immediate(&nrt6->dst);
45e4fd26 2334 }
1da177e4
LT
2335 }
2336}
2337
45e4fd26
MKL
2338static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2339 struct sk_buff *skb, u32 mtu)
2340{
2341 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2342}
2343
42ae66c8 2344void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2345 int oif, u32 mark, kuid_t uid)
81aded24
DM
2346{
2347 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2348 struct dst_entry *dst;
2349 struct flowi6 fl6;
2350
2351 memset(&fl6, 0, sizeof(fl6));
2352 fl6.flowi6_oif = oif;
1b3c61dc 2353 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2354 fl6.daddr = iph->daddr;
2355 fl6.saddr = iph->saddr;
6502ca52 2356 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2357 fl6.flowi6_uid = uid;
81aded24
DM
2358
2359 dst = ip6_route_output(net, NULL, &fl6);
2360 if (!dst->error)
45e4fd26 2361 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2362 dst_release(dst);
2363}
2364EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2365
2366void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2367{
33c162a9
MKL
2368 struct dst_entry *dst;
2369
81aded24 2370 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2371 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2372
2373 dst = __sk_dst_get(sk);
2374 if (!dst || !dst->obsolete ||
2375 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2376 return;
2377
2378 bh_lock_sock(sk);
2379 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2380 ip6_datagram_dst_update(sk, false);
2381 bh_unlock_sock(sk);
81aded24
DM
2382}
2383EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2384
7d6850f7
AK
2385void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2386 const struct flowi6 *fl6)
2387{
2388#ifdef CONFIG_IPV6_SUBTREES
2389 struct ipv6_pinfo *np = inet6_sk(sk);
2390#endif
2391
2392 ip6_dst_store(sk, dst,
2393 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2394 &sk->sk_v6_daddr : NULL,
2395#ifdef CONFIG_IPV6_SUBTREES
2396 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2397 &np->saddr :
2398#endif
2399 NULL);
2400}
2401
b55b76b2
DJ
2402/* Handle redirects */
2403struct ip6rd_flowi {
2404 struct flowi6 fl6;
2405 struct in6_addr gateway;
2406};
2407
2408static struct rt6_info *__ip6_route_redirect(struct net *net,
2409 struct fib6_table *table,
2410 struct flowi6 *fl6,
b75cc8f9 2411 const struct sk_buff *skb,
b55b76b2
DJ
2412 int flags)
2413{
2414 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2b760fcf 2415 struct rt6_info *rt, *rt_cache;
b55b76b2
DJ
2416 struct fib6_node *fn;
2417
2418 /* Get the "current" route for this destination and
67c408cf 2419 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2420 *
2421 * RFC 4861 specifies that redirects should only be
2422 * accepted if they come from the nexthop to the target.
2423 * Due to the way the routes are chosen, this notion
2424 * is a bit fuzzy and one might need to check all possible
2425 * routes.
2426 */
2427
66f5d6ce 2428 rcu_read_lock();
b55b76b2
DJ
2429 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2430restart:
66f5d6ce 2431 for_each_fib6_node_rt_rcu(fn) {
5e670d84 2432 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c 2433 continue;
14895687 2434 if (fib6_check_expired(rt))
b55b76b2 2435 continue;
6edb3c96 2436 if (rt->rt6i_flags & RTF_REJECT)
b55b76b2
DJ
2437 break;
2438 if (!(rt->rt6i_flags & RTF_GATEWAY))
2439 continue;
5e670d84 2440 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
b55b76b2 2441 continue;
2b760fcf
WW
2442 /* rt_cache's gateway might be different from its 'parent'
2443 * in the case of an ip redirect.
2444 * So we keep searching in the exception table if the gateway
2445 * is different.
2446 */
5e670d84 2447 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2b760fcf
WW
2448 rt_cache = rt6_find_cached_rt(rt,
2449 &fl6->daddr,
2450 &fl6->saddr);
2451 if (rt_cache &&
2452 ipv6_addr_equal(&rdfl->gateway,
2453 &rt_cache->rt6i_gateway)) {
2454 rt = rt_cache;
2455 break;
2456 }
b55b76b2 2457 continue;
2b760fcf 2458 }
b55b76b2
DJ
2459 break;
2460 }
2461
2462 if (!rt)
421842ed 2463 rt = net->ipv6.fib6_null_entry;
6edb3c96 2464 else if (rt->rt6i_flags & RTF_REJECT) {
b55b76b2 2465 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2466 goto out;
2467 }
2468
421842ed 2469 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2470 fn = fib6_backtrack(fn, &fl6->saddr);
2471 if (fn)
2472 goto restart;
b55b76b2 2473 }
a3c00e46 2474
b0a1ba59 2475out:
d3843fe5 2476 ip6_hold_safe(net, &rt, true);
b55b76b2 2477
66f5d6ce 2478 rcu_read_unlock();
b55b76b2 2479
b65f164d 2480 trace_fib6_table_lookup(net, rt, table, fl6);
b55b76b2
DJ
2481 return rt;
2482};
2483
2484static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2485 const struct flowi6 *fl6,
2486 const struct sk_buff *skb,
2487 const struct in6_addr *gateway)
b55b76b2
DJ
2488{
2489 int flags = RT6_LOOKUP_F_HAS_SADDR;
2490 struct ip6rd_flowi rdfl;
2491
2492 rdfl.fl6 = *fl6;
2493 rdfl.gateway = *gateway;
2494
b75cc8f9 2495 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2496 flags, __ip6_route_redirect);
2497}
2498
e2d118a1
LC
2499void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2500 kuid_t uid)
3a5ad2ee
DM
2501{
2502 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2503 struct dst_entry *dst;
2504 struct flowi6 fl6;
2505
2506 memset(&fl6, 0, sizeof(fl6));
e374c618 2507 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2508 fl6.flowi6_oif = oif;
2509 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2510 fl6.daddr = iph->daddr;
2511 fl6.saddr = iph->saddr;
6502ca52 2512 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2513 fl6.flowi6_uid = uid;
3a5ad2ee 2514
b75cc8f9 2515 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2516 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2517 dst_release(dst);
2518}
2519EXPORT_SYMBOL_GPL(ip6_redirect);
2520
c92a59ec
DJ
2521void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2522 u32 mark)
2523{
2524 const struct ipv6hdr *iph = ipv6_hdr(skb);
2525 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2526 struct dst_entry *dst;
2527 struct flowi6 fl6;
2528
2529 memset(&fl6, 0, sizeof(fl6));
e374c618 2530 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2531 fl6.flowi6_oif = oif;
2532 fl6.flowi6_mark = mark;
c92a59ec
DJ
2533 fl6.daddr = msg->dest;
2534 fl6.saddr = iph->daddr;
e2d118a1 2535 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2536
b75cc8f9 2537 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2538 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2539 dst_release(dst);
2540}
2541
3a5ad2ee
DM
2542void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2543{
e2d118a1
LC
2544 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2545 sk->sk_uid);
3a5ad2ee
DM
2546}
2547EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2548
0dbaee3b 2549static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2550{
0dbaee3b
DM
2551 struct net_device *dev = dst->dev;
2552 unsigned int mtu = dst_mtu(dst);
2553 struct net *net = dev_net(dev);
2554
1da177e4
LT
2555 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2556
5578689a
DL
2557 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2558 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2559
2560 /*
1ab1457c
YH
2561 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2562 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2563 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2564 * rely only on pmtu discovery"
2565 */
2566 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2567 mtu = IPV6_MAXPLEN;
2568 return mtu;
2569}
2570
ebb762f2 2571static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2572{
d33e4553 2573 struct inet6_dev *idev;
d4ead6b3 2574 unsigned int mtu;
4b32b5ad
MKL
2575
2576 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2577 if (mtu)
30f78d8e 2578 goto out;
618f9bc7
SK
2579
2580 mtu = IPV6_MIN_MTU;
d33e4553
DM
2581
2582 rcu_read_lock();
2583 idev = __in6_dev_get(dst->dev);
2584 if (idev)
2585 mtu = idev->cnf.mtu6;
2586 rcu_read_unlock();
2587
30f78d8e 2588out:
14972cbd
RP
2589 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2590
2591 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2592}
2593
3b00944c 2594struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2595 struct flowi6 *fl6)
1da177e4 2596{
87a11578 2597 struct dst_entry *dst;
1da177e4
LT
2598 struct rt6_info *rt;
2599 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2600 struct net *net = dev_net(dev);
1da177e4 2601
38308473 2602 if (unlikely(!idev))
122bdf67 2603 return ERR_PTR(-ENODEV);
1da177e4 2604
ad706862 2605 rt = ip6_dst_alloc(net, dev, 0);
38308473 2606 if (unlikely(!rt)) {
1da177e4 2607 in6_dev_put(idev);
87a11578 2608 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2609 goto out;
2610 }
2611
8e2ec639 2612 rt->dst.flags |= DST_HOST;
588753f1 2613 rt->dst.input = ip6_input;
8e2ec639 2614 rt->dst.output = ip6_output;
550bab42 2615 rt->rt6i_gateway = fl6->daddr;
87a11578 2616 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2617 rt->rt6i_dst.plen = 128;
2618 rt->rt6i_idev = idev;
14edd87d 2619 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2620
4c981e28 2621 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2622 * do proper release of the net_device
2623 */
2624 rt6_uncached_list_add(rt);
81eb8447 2625 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2626
87a11578
DM
2627 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2628
1da177e4 2629out:
87a11578 2630 return dst;
1da177e4
LT
2631}
2632
569d3645 2633static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2634{
86393e52 2635 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2636 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2637 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2638 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2639 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2640 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2641 int entries;
7019b78e 2642
fc66f95c 2643 entries = dst_entries_get_fast(ops);
49a18d86 2644 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2645 entries <= rt_max_size)
1da177e4
LT
2646 goto out;
2647
6891a346 2648 net->ipv6.ip6_rt_gc_expire++;
14956643 2649 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2650 entries = dst_entries_get_slow(ops);
2651 if (entries < ops->gc_thresh)
7019b78e 2652 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2653out:
7019b78e 2654 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2655 return entries > rt_max_size;
1da177e4
LT
2656}
2657
d4ead6b3
DA
2658static int ip6_convert_metrics(struct net *net, struct rt6_info *rt,
2659 struct fib6_config *cfg)
e715b6d3 2660{
d4ead6b3 2661 int err = 0;
e715b6d3 2662
d4ead6b3
DA
2663 if (cfg->fc_mx) {
2664 rt->fib6_metrics = kzalloc(sizeof(*rt->fib6_metrics),
2665 GFP_KERNEL);
2666 if (unlikely(!rt->fib6_metrics))
2667 return -ENOMEM;
ea697639 2668
d4ead6b3 2669 refcount_set(&rt->fib6_metrics->refcnt, 1);
e715b6d3 2670
d4ead6b3
DA
2671 err = ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len,
2672 rt->fib6_metrics->metrics);
c3a8d947 2673 }
e715b6d3 2674
d4ead6b3 2675 return err;
e715b6d3 2676}
1da177e4 2677
8c14586f
DA
2678static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2679 struct fib6_config *cfg,
f4797b33
DA
2680 const struct in6_addr *gw_addr,
2681 u32 tbid, int flags)
8c14586f
DA
2682{
2683 struct flowi6 fl6 = {
2684 .flowi6_oif = cfg->fc_ifindex,
2685 .daddr = *gw_addr,
2686 .saddr = cfg->fc_prefsrc,
2687 };
2688 struct fib6_table *table;
2689 struct rt6_info *rt;
8c14586f 2690
f4797b33 2691 table = fib6_get_table(net, tbid);
8c14586f
DA
2692 if (!table)
2693 return NULL;
2694
2695 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2696 flags |= RT6_LOOKUP_F_HAS_SADDR;
2697
f4797b33 2698 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2699 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2700
2701 /* if table lookup failed, fall back to full lookup */
2702 if (rt == net->ipv6.ip6_null_entry) {
2703 ip6_rt_put(rt);
2704 rt = NULL;
2705 }
2706
2707 return rt;
2708}
2709
fc1e64e1
DA
2710static int ip6_route_check_nh_onlink(struct net *net,
2711 struct fib6_config *cfg,
9fbb704c 2712 const struct net_device *dev,
fc1e64e1
DA
2713 struct netlink_ext_ack *extack)
2714{
44750f84 2715 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2716 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2717 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2718 struct rt6_info *grt;
2719 int err;
2720
2721 err = 0;
2722 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2723 if (grt) {
58e354c0
DA
2724 if (!grt->dst.error &&
2725 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2726 NL_SET_ERR_MSG(extack,
2727 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2728 err = -EINVAL;
2729 }
2730
2731 ip6_rt_put(grt);
2732 }
2733
2734 return err;
2735}
2736
1edce99f
DA
2737static int ip6_route_check_nh(struct net *net,
2738 struct fib6_config *cfg,
2739 struct net_device **_dev,
2740 struct inet6_dev **idev)
2741{
2742 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2743 struct net_device *dev = _dev ? *_dev : NULL;
2744 struct rt6_info *grt = NULL;
2745 int err = -EHOSTUNREACH;
2746
2747 if (cfg->fc_table) {
f4797b33
DA
2748 int flags = RT6_LOOKUP_F_IFACE;
2749
2750 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2751 cfg->fc_table, flags);
1edce99f
DA
2752 if (grt) {
2753 if (grt->rt6i_flags & RTF_GATEWAY ||
2754 (dev && dev != grt->dst.dev)) {
2755 ip6_rt_put(grt);
2756 grt = NULL;
2757 }
2758 }
2759 }
2760
2761 if (!grt)
b75cc8f9 2762 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2763
2764 if (!grt)
2765 goto out;
2766
2767 if (dev) {
2768 if (dev != grt->dst.dev) {
2769 ip6_rt_put(grt);
2770 goto out;
2771 }
2772 } else {
2773 *_dev = dev = grt->dst.dev;
2774 *idev = grt->rt6i_idev;
2775 dev_hold(dev);
2776 in6_dev_hold(grt->rt6i_idev);
2777 }
2778
2779 if (!(grt->rt6i_flags & RTF_GATEWAY))
2780 err = 0;
2781
2782 ip6_rt_put(grt);
2783
2784out:
2785 return err;
2786}
2787
9fbb704c
DA
2788static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2789 struct net_device **_dev, struct inet6_dev **idev,
2790 struct netlink_ext_ack *extack)
2791{
2792 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2793 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2794 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2795 const struct net_device *dev = *_dev;
232378e8 2796 bool need_addr_check = !dev;
9fbb704c
DA
2797 int err = -EINVAL;
2798
2799 /* if gw_addr is local we will fail to detect this in case
2800 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2801 * will return already-added prefix route via interface that
2802 * prefix route was assigned to, which might be non-loopback.
2803 */
232378e8
DA
2804 if (dev &&
2805 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2806 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2807 goto out;
2808 }
2809
2810 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2811 /* IPv6 strictly inhibits using not link-local
2812 * addresses as nexthop address.
2813 * Otherwise, router will not able to send redirects.
2814 * It is very good, but in some (rare!) circumstances
2815 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2816 * some exceptions. --ANK
2817 * We allow IPv4-mapped nexthops to support RFC4798-type
2818 * addressing
2819 */
2820 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2821 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2822 goto out;
2823 }
2824
2825 if (cfg->fc_flags & RTNH_F_ONLINK)
2826 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2827 else
2828 err = ip6_route_check_nh(net, cfg, _dev, idev);
2829
2830 if (err)
2831 goto out;
2832 }
2833
2834 /* reload in case device was changed */
2835 dev = *_dev;
2836
2837 err = -EINVAL;
2838 if (!dev) {
2839 NL_SET_ERR_MSG(extack, "Egress device not specified");
2840 goto out;
2841 } else if (dev->flags & IFF_LOOPBACK) {
2842 NL_SET_ERR_MSG(extack,
2843 "Egress device can not be loopback device for this route");
2844 goto out;
2845 }
232378e8
DA
2846
2847 /* if we did not check gw_addr above, do so now that the
2848 * egress device has been resolved.
2849 */
2850 if (need_addr_check &&
2851 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2852 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2853 goto out;
2854 }
2855
9fbb704c
DA
2856 err = 0;
2857out:
2858 return err;
2859}
2860
333c4301
DA
2861static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2862 struct netlink_ext_ack *extack)
1da177e4 2863{
5578689a 2864 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
2865 struct rt6_info *rt = NULL;
2866 struct net_device *dev = NULL;
2867 struct inet6_dev *idev = NULL;
c71099ac 2868 struct fib6_table *table;
1da177e4 2869 int addr_type;
8c5b83f0 2870 int err = -EINVAL;
1da177e4 2871
557c44be 2872 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2873 if (cfg->fc_flags & RTF_PCPU) {
2874 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2875 goto out;
d5d531cb 2876 }
557c44be 2877
2ea2352e
WW
2878 /* RTF_CACHE is an internal flag; can not be set by userspace */
2879 if (cfg->fc_flags & RTF_CACHE) {
2880 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2881 goto out;
2882 }
2883
e8478e80
DA
2884 if (cfg->fc_type > RTN_MAX) {
2885 NL_SET_ERR_MSG(extack, "Invalid route type");
2886 goto out;
2887 }
2888
d5d531cb
DA
2889 if (cfg->fc_dst_len > 128) {
2890 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2891 goto out;
2892 }
2893 if (cfg->fc_src_len > 128) {
2894 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2895 goto out;
d5d531cb 2896 }
1da177e4 2897#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2898 if (cfg->fc_src_len) {
2899 NL_SET_ERR_MSG(extack,
2900 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2901 goto out;
d5d531cb 2902 }
1da177e4 2903#endif
86872cb5 2904 if (cfg->fc_ifindex) {
1da177e4 2905 err = -ENODEV;
5578689a 2906 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2907 if (!dev)
2908 goto out;
2909 idev = in6_dev_get(dev);
2910 if (!idev)
2911 goto out;
2912 }
2913
86872cb5
TG
2914 if (cfg->fc_metric == 0)
2915 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2916
fc1e64e1
DA
2917 if (cfg->fc_flags & RTNH_F_ONLINK) {
2918 if (!dev) {
2919 NL_SET_ERR_MSG(extack,
2920 "Nexthop device required for onlink");
2921 err = -ENODEV;
2922 goto out;
2923 }
2924
2925 if (!(dev->flags & IFF_UP)) {
2926 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2927 err = -ENETDOWN;
2928 goto out;
2929 }
2930 }
2931
d71314b4 2932 err = -ENOBUFS;
38308473
DM
2933 if (cfg->fc_nlinfo.nlh &&
2934 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2935 table = fib6_get_table(net, cfg->fc_table);
38308473 2936 if (!table) {
f3213831 2937 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2938 table = fib6_new_table(net, cfg->fc_table);
2939 }
2940 } else {
2941 table = fib6_new_table(net, cfg->fc_table);
2942 }
38308473
DM
2943
2944 if (!table)
c71099ac 2945 goto out;
c71099ac 2946
ad706862
MKL
2947 rt = ip6_dst_alloc(net, NULL,
2948 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 2949
38308473 2950 if (!rt) {
1da177e4
LT
2951 err = -ENOMEM;
2952 goto out;
2953 }
2954
d4ead6b3
DA
2955 err = ip6_convert_metrics(net, rt, cfg);
2956 if (err < 0)
2957 goto out;
2958
1716a961 2959 if (cfg->fc_flags & RTF_EXPIRES)
14895687 2960 fib6_set_expires(rt, jiffies +
1716a961
G
2961 clock_t_to_jiffies(cfg->fc_expires));
2962 else
14895687 2963 fib6_clean_expires(rt);
1da177e4 2964
86872cb5
TG
2965 if (cfg->fc_protocol == RTPROT_UNSPEC)
2966 cfg->fc_protocol = RTPROT_BOOT;
2967 rt->rt6i_protocol = cfg->fc_protocol;
2968
2969 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4 2970
19e42e45
RP
2971 if (cfg->fc_encap) {
2972 struct lwtunnel_state *lwtstate;
2973
30357d7d 2974 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2975 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2976 &lwtstate, extack);
19e42e45
RP
2977 if (err)
2978 goto out;
5e670d84 2979 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
19e42e45
RP
2980 }
2981
86872cb5
TG
2982 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2983 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 2984 if (rt->rt6i_dst.plen == 128)
3b6761d1 2985 rt->dst_host = true;
e5fd387a 2986
1da177e4 2987#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
2988 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2989 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
2990#endif
2991
86872cb5 2992 rt->rt6i_metric = cfg->fc_metric;
5e670d84 2993 rt->fib6_nh.nh_weight = 1;
1da177e4 2994
e8478e80
DA
2995 rt->fib6_type = cfg->fc_type;
2996
1da177e4
LT
2997 /* We cannot add true routes via loopback here,
2998 they would result in kernel looping; promote them to reject routes
2999 */
86872cb5 3000 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
3001 (dev && (dev->flags & IFF_LOOPBACK) &&
3002 !(addr_type & IPV6_ADDR_LOOPBACK) &&
3003 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 3004 /* hold loopback dev/idev if we haven't done so. */
5578689a 3005 if (dev != net->loopback_dev) {
1da177e4
LT
3006 if (dev) {
3007 dev_put(dev);
3008 in6_dev_put(idev);
3009 }
5578689a 3010 dev = net->loopback_dev;
1da177e4
LT
3011 dev_hold(dev);
3012 idev = in6_dev_get(dev);
3013 if (!idev) {
3014 err = -ENODEV;
3015 goto out;
3016 }
3017 }
1da177e4
LT
3018 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
3019 goto install_route;
3020 }
3021
86872cb5 3022 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
3023 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3024 if (err)
48ed7b26 3025 goto out;
1da177e4 3026
5e670d84 3027 rt->fib6_nh.nh_gw = rt->rt6i_gateway = cfg->fc_gateway;
1da177e4
LT
3028 }
3029
3030 err = -ENODEV;
38308473 3031 if (!dev)
1da177e4
LT
3032 goto out;
3033
428604fb
LB
3034 if (idev->cnf.disable_ipv6) {
3035 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3036 err = -EACCES;
3037 goto out;
3038 }
3039
955ec4cb
DA
3040 if (!(dev->flags & IFF_UP)) {
3041 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3042 err = -ENETDOWN;
3043 goto out;
3044 }
3045
c3968a85
DW
3046 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3047 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3048 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3049 err = -EINVAL;
3050 goto out;
3051 }
4e3fd7a0 3052 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
3053 rt->rt6i_prefsrc.plen = 128;
3054 } else
3055 rt->rt6i_prefsrc.plen = 0;
3056
86872cb5 3057 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
3058
3059install_route:
5609b80a
IS
3060 if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3061 !netif_carrier_ok(dev))
5e670d84
DA
3062 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3063 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
3064 rt->fib6_nh.nh_dev = rt->dst.dev = dev;
1da177e4 3065 rt->rt6i_idev = idev;
c71099ac 3066 rt->rt6i_table = table;
63152fc0 3067
c346dca1 3068 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 3069
8c5b83f0 3070 return rt;
6b9ea5a6
RP
3071out:
3072 if (dev)
3073 dev_put(dev);
3074 if (idev)
3075 in6_dev_put(idev);
587fea74
WW
3076 if (rt)
3077 dst_release_immediate(&rt->dst);
6b9ea5a6 3078
8c5b83f0 3079 return ERR_PTR(err);
6b9ea5a6
RP
3080}
3081
d4ead6b3 3082int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack)
6b9ea5a6 3083{
8c5b83f0 3084 struct rt6_info *rt;
6b9ea5a6
RP
3085 int err;
3086
333c4301 3087 rt = ip6_route_info_create(cfg, extack);
d4ead6b3
DA
3088 if (IS_ERR(rt))
3089 return PTR_ERR(rt);
6b9ea5a6 3090
d4ead6b3 3091 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
6b9ea5a6 3092
1da177e4
LT
3093 return err;
3094}
3095
86872cb5 3096static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4 3097{
afb1d4b5 3098 struct net *net = info->nl_net;
c71099ac 3099 struct fib6_table *table;
afb1d4b5 3100 int err;
1da177e4 3101
421842ed 3102 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3103 err = -ENOENT;
3104 goto out;
3105 }
6c813a72 3106
c71099ac 3107 table = rt->rt6i_table;
66f5d6ce 3108 spin_lock_bh(&table->tb6_lock);
86872cb5 3109 err = fib6_del(rt, info);
66f5d6ce 3110 spin_unlock_bh(&table->tb6_lock);
1da177e4 3111
6825a26c 3112out:
94e187c0 3113 ip6_rt_put(rt);
1da177e4
LT
3114 return err;
3115}
3116
afb1d4b5 3117int ip6_del_rt(struct net *net, struct rt6_info *rt)
e0a1ad73 3118{
afb1d4b5
DA
3119 struct nl_info info = { .nl_net = net };
3120
528c4ceb 3121 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3122}
3123
0ae81335
DA
3124static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
3125{
3126 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3127 struct net *net = info->nl_net;
16a16cd3 3128 struct sk_buff *skb = NULL;
0ae81335 3129 struct fib6_table *table;
e3330039 3130 int err = -ENOENT;
0ae81335 3131
421842ed 3132 if (rt == net->ipv6.fib6_null_entry)
e3330039 3133 goto out_put;
0ae81335 3134 table = rt->rt6i_table;
66f5d6ce 3135 spin_lock_bh(&table->tb6_lock);
0ae81335
DA
3136
3137 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
3138 struct rt6_info *sibling, *next_sibling;
3139
16a16cd3
DA
3140 /* prefer to send a single notification with all hops */
3141 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3142 if (skb) {
3143 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3144
d4ead6b3 3145 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3146 NULL, NULL, 0, RTM_DELROUTE,
3147 info->portid, seq, 0) < 0) {
3148 kfree_skb(skb);
3149 skb = NULL;
3150 } else
3151 info->skip_notify = 1;
3152 }
3153
0ae81335
DA
3154 list_for_each_entry_safe(sibling, next_sibling,
3155 &rt->rt6i_siblings,
3156 rt6i_siblings) {
3157 err = fib6_del(sibling, info);
3158 if (err)
e3330039 3159 goto out_unlock;
0ae81335
DA
3160 }
3161 }
3162
3163 err = fib6_del(rt, info);
e3330039 3164out_unlock:
66f5d6ce 3165 spin_unlock_bh(&table->tb6_lock);
e3330039 3166out_put:
0ae81335 3167 ip6_rt_put(rt);
16a16cd3
DA
3168
3169 if (skb) {
e3330039 3170 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3171 info->nlh, gfp_any());
3172 }
0ae81335
DA
3173 return err;
3174}
3175
333c4301
DA
3176static int ip6_route_del(struct fib6_config *cfg,
3177 struct netlink_ext_ack *extack)
1da177e4 3178{
2b760fcf 3179 struct rt6_info *rt, *rt_cache;
c71099ac 3180 struct fib6_table *table;
1da177e4 3181 struct fib6_node *fn;
1da177e4
LT
3182 int err = -ESRCH;
3183
5578689a 3184 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3185 if (!table) {
3186 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3187 return err;
d5d531cb 3188 }
c71099ac 3189
66f5d6ce 3190 rcu_read_lock();
1da177e4 3191
c71099ac 3192 fn = fib6_locate(&table->tb6_root,
86872cb5 3193 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3194 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3195 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3196
1da177e4 3197 if (fn) {
66f5d6ce 3198 for_each_fib6_node_rt_rcu(fn) {
2b760fcf
WW
3199 if (cfg->fc_flags & RTF_CACHE) {
3200 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3201 &cfg->fc_src);
3202 if (!rt_cache)
3203 continue;
3204 rt = rt_cache;
3205 }
86872cb5 3206 if (cfg->fc_ifindex &&
5e670d84
DA
3207 (!rt->fib6_nh.nh_dev ||
3208 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3209 continue;
86872cb5 3210 if (cfg->fc_flags & RTF_GATEWAY &&
5e670d84 3211 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
1da177e4 3212 continue;
86872cb5 3213 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 3214 continue;
c2ed1880
M
3215 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
3216 continue;
d3843fe5
WW
3217 if (!dst_hold_safe(&rt->dst))
3218 break;
66f5d6ce 3219 rcu_read_unlock();
1da177e4 3220
0ae81335
DA
3221 /* if gateway was specified only delete the one hop */
3222 if (cfg->fc_flags & RTF_GATEWAY)
3223 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3224
3225 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3226 }
3227 }
66f5d6ce 3228 rcu_read_unlock();
1da177e4
LT
3229
3230 return err;
3231}
3232
6700c270 3233static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3234{
a6279458 3235 struct netevent_redirect netevent;
e8599ff4 3236 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3237 struct ndisc_options ndopts;
3238 struct inet6_dev *in6_dev;
3239 struct neighbour *neigh;
71bcdba0 3240 struct rd_msg *msg;
6e157b6a
DM
3241 int optlen, on_link;
3242 u8 *lladdr;
e8599ff4 3243
29a3cad5 3244 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3245 optlen -= sizeof(*msg);
e8599ff4
DM
3246
3247 if (optlen < 0) {
6e157b6a 3248 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3249 return;
3250 }
3251
71bcdba0 3252 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3253
71bcdba0 3254 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3255 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3256 return;
3257 }
3258
6e157b6a 3259 on_link = 0;
71bcdba0 3260 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3261 on_link = 1;
71bcdba0 3262 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3263 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3264 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3265 return;
3266 }
3267
3268 in6_dev = __in6_dev_get(skb->dev);
3269 if (!in6_dev)
3270 return;
3271 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3272 return;
3273
3274 /* RFC2461 8.1:
3275 * The IP source address of the Redirect MUST be the same as the current
3276 * first-hop router for the specified ICMP Destination Address.
3277 */
3278
f997c55c 3279 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3280 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3281 return;
3282 }
6e157b6a
DM
3283
3284 lladdr = NULL;
e8599ff4
DM
3285 if (ndopts.nd_opts_tgt_lladdr) {
3286 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3287 skb->dev);
3288 if (!lladdr) {
3289 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3290 return;
3291 }
3292 }
3293
6e157b6a 3294 rt = (struct rt6_info *) dst;
ec13ad1d 3295 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3296 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3297 return;
6e157b6a 3298 }
e8599ff4 3299
6e157b6a
DM
3300 /* Redirect received -> path was valid.
3301 * Look, redirects are sent only in response to data packets,
3302 * so that this nexthop apparently is reachable. --ANK
3303 */
0dec879f 3304 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3305
71bcdba0 3306 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3307 if (!neigh)
3308 return;
a6279458 3309
1da177e4
LT
3310 /*
3311 * We have finally decided to accept it.
3312 */
3313
f997c55c 3314 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3315 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3316 NEIGH_UPDATE_F_OVERRIDE|
3317 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3318 NEIGH_UPDATE_F_ISROUTER)),
3319 NDISC_REDIRECT, &ndopts);
1da177e4 3320
83a09abd 3321 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 3322 if (!nrt)
1da177e4
LT
3323 goto out;
3324
3325 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3326 if (on_link)
3327 nrt->rt6i_flags &= ~RTF_GATEWAY;
3328
b91d5329 3329 nrt->rt6i_protocol = RTPROT_REDIRECT;
4e3fd7a0 3330 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3331
2b760fcf
WW
3332 /* No need to remove rt from the exception table if rt is
3333 * a cached route because rt6_insert_exception() will
3334 * takes care of it
3335 */
d4ead6b3 3336 if (rt6_insert_exception(nrt, rt->from)) {
2b760fcf
WW
3337 dst_release_immediate(&nrt->dst);
3338 goto out;
3339 }
1da177e4 3340
d8d1f30b
CG
3341 netevent.old = &rt->dst;
3342 netevent.new = &nrt->dst;
71bcdba0 3343 netevent.daddr = &msg->dest;
60592833 3344 netevent.neigh = neigh;
8d71740c
TT
3345 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3346
1da177e4 3347out:
e8599ff4 3348 neigh_release(neigh);
6e157b6a
DM
3349}
3350
70ceb4f5 3351#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 3352static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 3353 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3354 const struct in6_addr *gwaddr,
3355 struct net_device *dev)
70ceb4f5 3356{
830218c1
DA
3357 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3358 int ifindex = dev->ifindex;
70ceb4f5
YH
3359 struct fib6_node *fn;
3360 struct rt6_info *rt = NULL;
c71099ac
TG
3361 struct fib6_table *table;
3362
830218c1 3363 table = fib6_get_table(net, tb_id);
38308473 3364 if (!table)
c71099ac 3365 return NULL;
70ceb4f5 3366
66f5d6ce 3367 rcu_read_lock();
38fbeeee 3368 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3369 if (!fn)
3370 goto out;
3371
66f5d6ce 3372 for_each_fib6_node_rt_rcu(fn) {
5e670d84 3373 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
70ceb4f5
YH
3374 continue;
3375 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3376 continue;
5e670d84 3377 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
70ceb4f5 3378 continue;
d3843fe5 3379 ip6_hold_safe(NULL, &rt, false);
70ceb4f5
YH
3380 break;
3381 }
3382out:
66f5d6ce 3383 rcu_read_unlock();
70ceb4f5
YH
3384 return rt;
3385}
3386
efa2cea0 3387static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 3388 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3389 const struct in6_addr *gwaddr,
3390 struct net_device *dev,
95c96174 3391 unsigned int pref)
70ceb4f5 3392{
86872cb5 3393 struct fib6_config cfg = {
238fc7ea 3394 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3395 .fc_ifindex = dev->ifindex,
86872cb5
TG
3396 .fc_dst_len = prefixlen,
3397 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3398 RTF_UP | RTF_PREF(pref),
b91d5329 3399 .fc_protocol = RTPROT_RA,
e8478e80 3400 .fc_type = RTN_UNICAST,
15e47304 3401 .fc_nlinfo.portid = 0,
efa2cea0
DL
3402 .fc_nlinfo.nlh = NULL,
3403 .fc_nlinfo.nl_net = net,
86872cb5
TG
3404 };
3405
830218c1 3406 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3407 cfg.fc_dst = *prefix;
3408 cfg.fc_gateway = *gwaddr;
70ceb4f5 3409
e317da96
YH
3410 /* We should treat it as a default route if prefix length is 0. */
3411 if (!prefixlen)
86872cb5 3412 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3413
333c4301 3414 ip6_route_add(&cfg, NULL);
70ceb4f5 3415
830218c1 3416 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3417}
3418#endif
3419
afb1d4b5
DA
3420struct rt6_info *rt6_get_dflt_router(struct net *net,
3421 const struct in6_addr *addr,
3422 struct net_device *dev)
1ab1457c 3423{
830218c1 3424 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 3425 struct rt6_info *rt;
c71099ac 3426 struct fib6_table *table;
1da177e4 3427
afb1d4b5 3428 table = fib6_get_table(net, tb_id);
38308473 3429 if (!table)
c71099ac 3430 return NULL;
1da177e4 3431
66f5d6ce
WW
3432 rcu_read_lock();
3433 for_each_fib6_node_rt_rcu(&table->tb6_root) {
5e670d84 3434 if (dev == rt->fib6_nh.nh_dev &&
045927ff 3435 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
5e670d84 3436 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
1da177e4
LT
3437 break;
3438 }
3439 if (rt)
d3843fe5 3440 ip6_hold_safe(NULL, &rt, false);
66f5d6ce 3441 rcu_read_unlock();
1da177e4
LT
3442 return rt;
3443}
3444
afb1d4b5
DA
3445struct rt6_info *rt6_add_dflt_router(struct net *net,
3446 const struct in6_addr *gwaddr,
ebacaaa0
YH
3447 struct net_device *dev,
3448 unsigned int pref)
1da177e4 3449{
86872cb5 3450 struct fib6_config cfg = {
ca254490 3451 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3452 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3453 .fc_ifindex = dev->ifindex,
3454 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3455 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3456 .fc_protocol = RTPROT_RA,
e8478e80 3457 .fc_type = RTN_UNICAST,
15e47304 3458 .fc_nlinfo.portid = 0,
5578689a 3459 .fc_nlinfo.nlh = NULL,
afb1d4b5 3460 .fc_nlinfo.nl_net = net,
86872cb5 3461 };
1da177e4 3462
4e3fd7a0 3463 cfg.fc_gateway = *gwaddr;
1da177e4 3464
333c4301 3465 if (!ip6_route_add(&cfg, NULL)) {
830218c1
DA
3466 struct fib6_table *table;
3467
3468 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3469 if (table)
3470 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3471 }
1da177e4 3472
afb1d4b5 3473 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3474}
3475
afb1d4b5
DA
3476static void __rt6_purge_dflt_routers(struct net *net,
3477 struct fib6_table *table)
1da177e4
LT
3478{
3479 struct rt6_info *rt;
3480
3481restart:
66f5d6ce
WW
3482 rcu_read_lock();
3483 for_each_fib6_node_rt_rcu(&table->tb6_root) {
3e8b0ac3
LC
3484 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3485 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d3843fe5 3486 if (dst_hold_safe(&rt->dst)) {
66f5d6ce 3487 rcu_read_unlock();
afb1d4b5 3488 ip6_del_rt(net, rt);
d3843fe5 3489 } else {
66f5d6ce 3490 rcu_read_unlock();
d3843fe5 3491 }
1da177e4
LT
3492 goto restart;
3493 }
3494 }
66f5d6ce 3495 rcu_read_unlock();
830218c1
DA
3496
3497 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3498}
3499
3500void rt6_purge_dflt_routers(struct net *net)
3501{
3502 struct fib6_table *table;
3503 struct hlist_head *head;
3504 unsigned int h;
3505
3506 rcu_read_lock();
3507
3508 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3509 head = &net->ipv6.fib_table_hash[h];
3510 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3511 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3512 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3513 }
3514 }
3515
3516 rcu_read_unlock();
1da177e4
LT
3517}
3518
5578689a
DL
3519static void rtmsg_to_fib6_config(struct net *net,
3520 struct in6_rtmsg *rtmsg,
86872cb5
TG
3521 struct fib6_config *cfg)
3522{
3523 memset(cfg, 0, sizeof(*cfg));
3524
ca254490
DA
3525 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3526 : RT6_TABLE_MAIN;
86872cb5
TG
3527 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3528 cfg->fc_metric = rtmsg->rtmsg_metric;
3529 cfg->fc_expires = rtmsg->rtmsg_info;
3530 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3531 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3532 cfg->fc_flags = rtmsg->rtmsg_flags;
e8478e80 3533 cfg->fc_type = rtmsg->rtmsg_type;
86872cb5 3534
5578689a 3535 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3536
4e3fd7a0
AD
3537 cfg->fc_dst = rtmsg->rtmsg_dst;
3538 cfg->fc_src = rtmsg->rtmsg_src;
3539 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3540}
3541
5578689a 3542int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3543{
86872cb5 3544 struct fib6_config cfg;
1da177e4
LT
3545 struct in6_rtmsg rtmsg;
3546 int err;
3547
67ba4152 3548 switch (cmd) {
1da177e4
LT
3549 case SIOCADDRT: /* Add a route */
3550 case SIOCDELRT: /* Delete a route */
af31f412 3551 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3552 return -EPERM;
3553 err = copy_from_user(&rtmsg, arg,
3554 sizeof(struct in6_rtmsg));
3555 if (err)
3556 return -EFAULT;
86872cb5 3557
5578689a 3558 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3559
1da177e4
LT
3560 rtnl_lock();
3561 switch (cmd) {
3562 case SIOCADDRT:
333c4301 3563 err = ip6_route_add(&cfg, NULL);
1da177e4
LT
3564 break;
3565 case SIOCDELRT:
333c4301 3566 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3567 break;
3568 default:
3569 err = -EINVAL;
3570 }
3571 rtnl_unlock();
3572
3573 return err;
3ff50b79 3574 }
1da177e4
LT
3575
3576 return -EINVAL;
3577}
3578
3579/*
3580 * Drop the packet on the floor
3581 */
3582
d5fdd6ba 3583static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3584{
612f09e8 3585 int type;
adf30907 3586 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3587 switch (ipstats_mib_noroutes) {
3588 case IPSTATS_MIB_INNOROUTES:
0660e03f 3589 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3590 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3591 IP6_INC_STATS(dev_net(dst->dev),
3592 __in6_dev_get_safely(skb->dev),
3bd653c8 3593 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3594 break;
3595 }
3596 /* FALLTHROUGH */
3597 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3598 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3599 ipstats_mib_noroutes);
612f09e8
YH
3600 break;
3601 }
3ffe533c 3602 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3603 kfree_skb(skb);
3604 return 0;
3605}
3606
9ce8ade0
TG
3607static int ip6_pkt_discard(struct sk_buff *skb)
3608{
612f09e8 3609 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3610}
3611
ede2059d 3612static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3613{
adf30907 3614 skb->dev = skb_dst(skb)->dev;
612f09e8 3615 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3616}
3617
9ce8ade0
TG
3618static int ip6_pkt_prohibit(struct sk_buff *skb)
3619{
612f09e8 3620 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3621}
3622
ede2059d 3623static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3624{
adf30907 3625 skb->dev = skb_dst(skb)->dev;
612f09e8 3626 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3627}
3628
1da177e4
LT
3629/*
3630 * Allocate a dst for local (unicast / anycast) address.
3631 */
3632
afb1d4b5
DA
3633struct rt6_info *addrconf_dst_alloc(struct net *net,
3634 struct inet6_dev *idev,
1da177e4 3635 const struct in6_addr *addr,
8f031519 3636 bool anycast)
1da177e4 3637{
ca254490 3638 u32 tb_id;
4832c30d 3639 struct net_device *dev = idev->dev;
5f02ce24
DA
3640 struct rt6_info *rt;
3641
5f02ce24 3642 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 3643 if (!rt)
1da177e4
LT
3644 return ERR_PTR(-ENOMEM);
3645
3b6761d1
DA
3646 rt->dst_nocount = true;
3647
1da177e4 3648 in6_dev_hold(idev);
1da177e4 3649 rt->rt6i_idev = idev;
1da177e4 3650
3b6761d1 3651 rt->dst_host = true;
94b5e0f9 3652 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 3653 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
e8478e80
DA
3654 if (anycast) {
3655 rt->fib6_type = RTN_ANYCAST;
58c4fb86 3656 rt->rt6i_flags |= RTF_ANYCAST;
e8478e80
DA
3657 } else {
3658 rt->fib6_type = RTN_LOCAL;
1da177e4 3659 rt->rt6i_flags |= RTF_LOCAL;
e8478e80 3660 }
1da177e4 3661
5e670d84
DA
3662 rt->fib6_nh.nh_gw = *addr;
3663 rt->fib6_nh.nh_dev = dev;
550bab42 3664 rt->rt6i_gateway = *addr;
4e3fd7a0 3665 rt->rt6i_dst.addr = *addr;
1da177e4 3666 rt->rt6i_dst.plen = 128;
ca254490
DA
3667 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3668 rt->rt6i_table = fib6_get_table(net, tb_id);
1da177e4 3669
1da177e4
LT
3670 return rt;
3671}
3672
c3968a85
DW
3673/* remove deleted ip from prefsrc entries */
3674struct arg_dev_net_ip {
3675 struct net_device *dev;
3676 struct net *net;
3677 struct in6_addr *addr;
3678};
3679
3680static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3681{
3682 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3683 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3684 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3685
5e670d84 3686 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
421842ed 3687 rt != net->ipv6.fib6_null_entry &&
c3968a85 3688 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
60006a48 3689 spin_lock_bh(&rt6_exception_lock);
c3968a85
DW
3690 /* remove prefsrc entry */
3691 rt->rt6i_prefsrc.plen = 0;
60006a48
WW
3692 /* need to update cache as well */
3693 rt6_exceptions_remove_prefsrc(rt);
3694 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3695 }
3696 return 0;
3697}
3698
3699void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3700{
3701 struct net *net = dev_net(ifp->idev->dev);
3702 struct arg_dev_net_ip adni = {
3703 .dev = ifp->idev->dev,
3704 .net = net,
3705 .addr = &ifp->addr,
3706 };
0c3584d5 3707 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3708}
3709
be7a010d 3710#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3711
3712/* Remove routers and update dst entries when gateway turn into host. */
3713static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3714{
3715 struct in6_addr *gateway = (struct in6_addr *)arg;
3716
2b760fcf 3717 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
5e670d84 3718 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
be7a010d
DJ
3719 return -1;
3720 }
b16cb459
WW
3721
3722 /* Further clean up cached routes in exception table.
3723 * This is needed because cached route may have a different
3724 * gateway than its 'parent' in the case of an ip redirect.
3725 */
3726 rt6_exceptions_clean_tohost(rt, gateway);
3727
be7a010d
DJ
3728 return 0;
3729}
3730
3731void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3732{
3733 fib6_clean_all(net, fib6_clean_tohost, gateway);
3734}
3735
2127d95a
IS
3736struct arg_netdev_event {
3737 const struct net_device *dev;
4c981e28
IS
3738 union {
3739 unsigned int nh_flags;
3740 unsigned long event;
3741 };
2127d95a
IS
3742};
3743
d7dedee1
IS
3744static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
3745{
3746 struct rt6_info *iter;
3747 struct fib6_node *fn;
3748
3749 fn = rcu_dereference_protected(rt->rt6i_node,
3750 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3751 iter = rcu_dereference_protected(fn->leaf,
3752 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3753 while (iter) {
3754 if (iter->rt6i_metric == rt->rt6i_metric &&
3755 rt6_qualify_for_ecmp(iter))
3756 return iter;
3757 iter = rcu_dereference_protected(iter->rt6_next,
3758 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3759 }
3760
3761 return NULL;
3762}
3763
3764static bool rt6_is_dead(const struct rt6_info *rt)
3765{
5e670d84
DA
3766 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3767 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d7dedee1
IS
3768 rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3769 return true;
3770
3771 return false;
3772}
3773
3774static int rt6_multipath_total_weight(const struct rt6_info *rt)
3775{
3776 struct rt6_info *iter;
3777 int total = 0;
3778
3779 if (!rt6_is_dead(rt))
5e670d84 3780 total += rt->fib6_nh.nh_weight;
d7dedee1
IS
3781
3782 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
3783 if (!rt6_is_dead(iter))
5e670d84 3784 total += iter->fib6_nh.nh_weight;
d7dedee1
IS
3785 }
3786
3787 return total;
3788}
3789
3790static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
3791{
3792 int upper_bound = -1;
3793
3794 if (!rt6_is_dead(rt)) {
5e670d84 3795 *weight += rt->fib6_nh.nh_weight;
d7dedee1
IS
3796 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3797 total) - 1;
3798 }
5e670d84 3799 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
d7dedee1
IS
3800}
3801
3802static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
3803{
3804 struct rt6_info *iter;
3805 int weight = 0;
3806
3807 rt6_upper_bound_set(rt, &weight, total);
3808
3809 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3810 rt6_upper_bound_set(iter, &weight, total);
3811}
3812
3813void rt6_multipath_rebalance(struct rt6_info *rt)
3814{
3815 struct rt6_info *first;
3816 int total;
3817
3818 /* In case the entire multipath route was marked for flushing,
3819 * then there is no need to rebalance upon the removal of every
3820 * sibling route.
3821 */
3822 if (!rt->rt6i_nsiblings || rt->should_flush)
3823 return;
3824
3825 /* During lookup routes are evaluated in order, so we need to
3826 * make sure upper bounds are assigned from the first sibling
3827 * onwards.
3828 */
3829 first = rt6_multipath_first_sibling(rt);
3830 if (WARN_ON_ONCE(!first))
3831 return;
3832
3833 total = rt6_multipath_total_weight(first);
3834 rt6_multipath_upper_bound_set(first, total);
3835}
3836
2127d95a
IS
3837static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3838{
3839 const struct arg_netdev_event *arg = p_arg;
7aef6859 3840 struct net *net = dev_net(arg->dev);
2127d95a 3841
421842ed 3842 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
5e670d84 3843 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
7aef6859 3844 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3845 rt6_multipath_rebalance(rt);
1de178ed 3846 }
2127d95a
IS
3847
3848 return 0;
3849}
3850
3851void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3852{
3853 struct arg_netdev_event arg = {
3854 .dev = dev,
6802f3ad
IS
3855 {
3856 .nh_flags = nh_flags,
3857 },
2127d95a
IS
3858 };
3859
3860 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3861 arg.nh_flags |= RTNH_F_LINKDOWN;
3862
3863 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3864}
3865
1de178ed
IS
3866static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
3867 const struct net_device *dev)
3868{
3869 struct rt6_info *iter;
3870
5e670d84 3871 if (rt->fib6_nh.nh_dev == dev)
1de178ed
IS
3872 return true;
3873 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
5e670d84 3874 if (iter->fib6_nh.nh_dev == dev)
1de178ed
IS
3875 return true;
3876
3877 return false;
3878}
3879
3880static void rt6_multipath_flush(struct rt6_info *rt)
3881{
3882 struct rt6_info *iter;
3883
3884 rt->should_flush = 1;
3885 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3886 iter->should_flush = 1;
3887}
3888
3889static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
3890 const struct net_device *down_dev)
3891{
3892 struct rt6_info *iter;
3893 unsigned int dead = 0;
3894
5e670d84
DA
3895 if (rt->fib6_nh.nh_dev == down_dev ||
3896 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
3897 dead++;
3898 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
5e670d84
DA
3899 if (iter->fib6_nh.nh_dev == down_dev ||
3900 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
3901 dead++;
3902
3903 return dead;
3904}
3905
3906static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
3907 const struct net_device *dev,
3908 unsigned int nh_flags)
3909{
3910 struct rt6_info *iter;
3911
5e670d84
DA
3912 if (rt->fib6_nh.nh_dev == dev)
3913 rt->fib6_nh.nh_flags |= nh_flags;
1de178ed 3914 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
5e670d84
DA
3915 if (iter->fib6_nh.nh_dev == dev)
3916 iter->fib6_nh.nh_flags |= nh_flags;
1de178ed
IS
3917}
3918
a1a22c12 3919/* called with write lock held for table with rt */
4c981e28 3920static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
1da177e4 3921{
4c981e28
IS
3922 const struct arg_netdev_event *arg = p_arg;
3923 const struct net_device *dev = arg->dev;
7aef6859 3924 struct net *net = dev_net(dev);
8ed67789 3925
421842ed 3926 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
3927 return 0;
3928
3929 switch (arg->event) {
3930 case NETDEV_UNREGISTER:
5e670d84 3931 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
27c6fa73 3932 case NETDEV_DOWN:
1de178ed 3933 if (rt->should_flush)
27c6fa73 3934 return -1;
1de178ed 3935 if (!rt->rt6i_nsiblings)
5e670d84 3936 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
1de178ed
IS
3937 if (rt6_multipath_uses_dev(rt, dev)) {
3938 unsigned int count;
3939
3940 count = rt6_multipath_dead_count(rt, dev);
3941 if (rt->rt6i_nsiblings + 1 == count) {
3942 rt6_multipath_flush(rt);
3943 return -1;
3944 }
3945 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3946 RTNH_F_LINKDOWN);
7aef6859 3947 fib6_update_sernum(net, rt);
d7dedee1 3948 rt6_multipath_rebalance(rt);
1de178ed
IS
3949 }
3950 return -2;
27c6fa73 3951 case NETDEV_CHANGE:
5e670d84 3952 if (rt->fib6_nh.nh_dev != dev ||
1de178ed 3953 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 3954 break;
5e670d84 3955 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 3956 rt6_multipath_rebalance(rt);
27c6fa73 3957 break;
2b241361 3958 }
c159d30c 3959
1da177e4
LT
3960 return 0;
3961}
3962
27c6fa73 3963void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 3964{
4c981e28 3965 struct arg_netdev_event arg = {
8ed67789 3966 .dev = dev,
6802f3ad
IS
3967 {
3968 .event = event,
3969 },
8ed67789
DL
3970 };
3971
4c981e28
IS
3972 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
3973}
3974
3975void rt6_disable_ip(struct net_device *dev, unsigned long event)
3976{
3977 rt6_sync_down_dev(dev, event);
3978 rt6_uncached_list_flush_dev(dev_net(dev), dev);
3979 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
3980}
3981
95c96174 3982struct rt6_mtu_change_arg {
1da177e4 3983 struct net_device *dev;
95c96174 3984 unsigned int mtu;
1da177e4
LT
3985};
3986
3987static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3988{
3989 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3990 struct inet6_dev *idev;
3991
3992 /* In IPv6 pmtu discovery is not optional,
3993 so that RTAX_MTU lock cannot disable it.
3994 We still use this lock to block changes
3995 caused by addrconf/ndisc.
3996 */
3997
3998 idev = __in6_dev_get(arg->dev);
38308473 3999 if (!idev)
1da177e4
LT
4000 return 0;
4001
4002 /* For administrative MTU increase, there is no way to discover
4003 IPv6 PMTU increase, so PMTU increase should be updated here.
4004 Since RFC 1981 doesn't include administrative MTU increase
4005 update PMTU increase is a MUST. (i.e. jumbo frame)
4006 */
5e670d84 4007 if (rt->fib6_nh.nh_dev == arg->dev &&
d4ead6b3
DA
4008 !fib6_metric_locked(rt, RTAX_MTU)) {
4009 u32 mtu = rt->fib6_pmtu;
4010
4011 if (mtu >= arg->mtu ||
4012 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4013 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4014
f5bbe7ee 4015 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4016 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4017 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4018 }
1da177e4
LT
4019 return 0;
4020}
4021
95c96174 4022void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4023{
c71099ac
TG
4024 struct rt6_mtu_change_arg arg = {
4025 .dev = dev,
4026 .mtu = mtu,
4027 };
1da177e4 4028
0c3584d5 4029 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4030}
4031
ef7c79ed 4032static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4033 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 4034 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4035 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4036 [RTA_PRIORITY] = { .type = NLA_U32 },
4037 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4038 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4039 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4040 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4041 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4042 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4043 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4044 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
4045};
4046
4047static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4048 struct fib6_config *cfg,
4049 struct netlink_ext_ack *extack)
1da177e4 4050{
86872cb5
TG
4051 struct rtmsg *rtm;
4052 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4053 unsigned int pref;
86872cb5 4054 int err;
1da177e4 4055
fceb6435
JB
4056 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4057 NULL);
86872cb5
TG
4058 if (err < 0)
4059 goto errout;
1da177e4 4060
86872cb5
TG
4061 err = -EINVAL;
4062 rtm = nlmsg_data(nlh);
4063 memset(cfg, 0, sizeof(*cfg));
4064
4065 cfg->fc_table = rtm->rtm_table;
4066 cfg->fc_dst_len = rtm->rtm_dst_len;
4067 cfg->fc_src_len = rtm->rtm_src_len;
4068 cfg->fc_flags = RTF_UP;
4069 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 4070 cfg->fc_type = rtm->rtm_type;
86872cb5 4071
ef2c7d7b
ND
4072 if (rtm->rtm_type == RTN_UNREACHABLE ||
4073 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4074 rtm->rtm_type == RTN_PROHIBIT ||
4075 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4076 cfg->fc_flags |= RTF_REJECT;
4077
ab79ad14
4078 if (rtm->rtm_type == RTN_LOCAL)
4079 cfg->fc_flags |= RTF_LOCAL;
4080
1f56a01f
MKL
4081 if (rtm->rtm_flags & RTM_F_CLONED)
4082 cfg->fc_flags |= RTF_CACHE;
4083
fc1e64e1
DA
4084 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4085
15e47304 4086 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 4087 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 4088 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
4089
4090 if (tb[RTA_GATEWAY]) {
67b61f6c 4091 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4092 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4093 }
86872cb5
TG
4094
4095 if (tb[RTA_DST]) {
4096 int plen = (rtm->rtm_dst_len + 7) >> 3;
4097
4098 if (nla_len(tb[RTA_DST]) < plen)
4099 goto errout;
4100
4101 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4102 }
86872cb5
TG
4103
4104 if (tb[RTA_SRC]) {
4105 int plen = (rtm->rtm_src_len + 7) >> 3;
4106
4107 if (nla_len(tb[RTA_SRC]) < plen)
4108 goto errout;
4109
4110 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4111 }
86872cb5 4112
c3968a85 4113 if (tb[RTA_PREFSRC])
67b61f6c 4114 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4115
86872cb5
TG
4116 if (tb[RTA_OIF])
4117 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4118
4119 if (tb[RTA_PRIORITY])
4120 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4121
4122 if (tb[RTA_METRICS]) {
4123 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4124 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4125 }
86872cb5
TG
4126
4127 if (tb[RTA_TABLE])
4128 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4129
51ebd318
ND
4130 if (tb[RTA_MULTIPATH]) {
4131 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4132 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4133
4134 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4135 cfg->fc_mp_len, extack);
9ed59592
DA
4136 if (err < 0)
4137 goto errout;
51ebd318
ND
4138 }
4139
c78ba6d6
LR
4140 if (tb[RTA_PREF]) {
4141 pref = nla_get_u8(tb[RTA_PREF]);
4142 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4143 pref != ICMPV6_ROUTER_PREF_HIGH)
4144 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4145 cfg->fc_flags |= RTF_PREF(pref);
4146 }
4147
19e42e45
RP
4148 if (tb[RTA_ENCAP])
4149 cfg->fc_encap = tb[RTA_ENCAP];
4150
9ed59592 4151 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4152 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4153
c255bd68 4154 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4155 if (err < 0)
4156 goto errout;
4157 }
4158
32bc201e
XL
4159 if (tb[RTA_EXPIRES]) {
4160 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4161
4162 if (addrconf_finite_timeout(timeout)) {
4163 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4164 cfg->fc_flags |= RTF_EXPIRES;
4165 }
4166 }
4167
86872cb5
TG
4168 err = 0;
4169errout:
4170 return err;
1da177e4
LT
4171}
4172
6b9ea5a6
RP
4173struct rt6_nh {
4174 struct rt6_info *rt6_info;
4175 struct fib6_config r_cfg;
6b9ea5a6
RP
4176 struct list_head next;
4177};
4178
4179static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4180{
4181 struct rt6_nh *nh;
4182
4183 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4184 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4185 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4186 nh->r_cfg.fc_ifindex);
4187 }
4188}
4189
d4ead6b3
DA
4190static int ip6_route_info_append(struct net *net,
4191 struct list_head *rt6_nh_list,
6b9ea5a6
RP
4192 struct rt6_info *rt, struct fib6_config *r_cfg)
4193{
4194 struct rt6_nh *nh;
6b9ea5a6
RP
4195 int err = -EEXIST;
4196
4197 list_for_each_entry(nh, rt6_nh_list, next) {
4198 /* check if rt6_info already exists */
f06b7549 4199 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
6b9ea5a6
RP
4200 return err;
4201 }
4202
4203 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4204 if (!nh)
4205 return -ENOMEM;
4206 nh->rt6_info = rt;
d4ead6b3 4207 err = ip6_convert_metrics(net, rt, r_cfg);
6b9ea5a6
RP
4208 if (err) {
4209 kfree(nh);
4210 return err;
4211 }
4212 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4213 list_add_tail(&nh->next, rt6_nh_list);
4214
4215 return 0;
4216}
4217
3b1137fe
DA
4218static void ip6_route_mpath_notify(struct rt6_info *rt,
4219 struct rt6_info *rt_last,
4220 struct nl_info *info,
4221 __u16 nlflags)
4222{
4223 /* if this is an APPEND route, then rt points to the first route
4224 * inserted and rt_last points to last route inserted. Userspace
4225 * wants a consistent dump of the route which starts at the first
4226 * nexthop. Since sibling routes are always added at the end of
4227 * the list, find the first sibling of the last route appended
4228 */
4229 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
4230 rt = list_first_entry(&rt_last->rt6i_siblings,
4231 struct rt6_info,
4232 rt6i_siblings);
4233 }
4234
4235 if (rt)
4236 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4237}
4238
333c4301
DA
4239static int ip6_route_multipath_add(struct fib6_config *cfg,
4240 struct netlink_ext_ack *extack)
51ebd318 4241{
3b1137fe
DA
4242 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
4243 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4244 struct fib6_config r_cfg;
4245 struct rtnexthop *rtnh;
6b9ea5a6
RP
4246 struct rt6_info *rt;
4247 struct rt6_nh *err_nh;
4248 struct rt6_nh *nh, *nh_safe;
3b1137fe 4249 __u16 nlflags;
51ebd318
ND
4250 int remaining;
4251 int attrlen;
6b9ea5a6
RP
4252 int err = 1;
4253 int nhn = 0;
4254 int replace = (cfg->fc_nlinfo.nlh &&
4255 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4256 LIST_HEAD(rt6_nh_list);
51ebd318 4257
3b1137fe
DA
4258 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4259 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4260 nlflags |= NLM_F_APPEND;
4261
35f1b4e9 4262 remaining = cfg->fc_mp_len;
51ebd318 4263 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4264
6b9ea5a6
RP
4265 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4266 * rt6_info structs per nexthop
4267 */
51ebd318
ND
4268 while (rtnh_ok(rtnh, remaining)) {
4269 memcpy(&r_cfg, cfg, sizeof(*cfg));
4270 if (rtnh->rtnh_ifindex)
4271 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4272
4273 attrlen = rtnh_attrlen(rtnh);
4274 if (attrlen > 0) {
4275 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4276
4277 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4278 if (nla) {
67b61f6c 4279 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4280 r_cfg.fc_flags |= RTF_GATEWAY;
4281 }
19e42e45
RP
4282 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4283 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4284 if (nla)
4285 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4286 }
6b9ea5a6 4287
68e2ffde 4288 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
333c4301 4289 rt = ip6_route_info_create(&r_cfg, extack);
8c5b83f0
RP
4290 if (IS_ERR(rt)) {
4291 err = PTR_ERR(rt);
4292 rt = NULL;
6b9ea5a6 4293 goto cleanup;
8c5b83f0 4294 }
6b9ea5a6 4295
5e670d84 4296 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
398958ae 4297
d4ead6b3
DA
4298 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4299 rt, &r_cfg);
51ebd318 4300 if (err) {
587fea74 4301 dst_release_immediate(&rt->dst);
6b9ea5a6
RP
4302 goto cleanup;
4303 }
4304
4305 rtnh = rtnh_next(rtnh, &remaining);
4306 }
4307
3b1137fe
DA
4308 /* for add and replace send one notification with all nexthops.
4309 * Skip the notification in fib6_add_rt2node and send one with
4310 * the full route when done
4311 */
4312 info->skip_notify = 1;
4313
6b9ea5a6
RP
4314 err_nh = NULL;
4315 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe 4316 rt_last = nh->rt6_info;
d4ead6b3 4317 err = __ip6_ins_rt(nh->rt6_info, info, extack);
3b1137fe
DA
4318 /* save reference to first route for notification */
4319 if (!rt_notif && !err)
4320 rt_notif = nh->rt6_info;
4321
6b9ea5a6
RP
4322 /* nh->rt6_info is used or freed at this point, reset to NULL*/
4323 nh->rt6_info = NULL;
4324 if (err) {
4325 if (replace && nhn)
4326 ip6_print_replace_route_err(&rt6_nh_list);
4327 err_nh = nh;
4328 goto add_errout;
51ebd318 4329 }
6b9ea5a6 4330
1a72418b 4331 /* Because each route is added like a single route we remove
27596472
MK
4332 * these flags after the first nexthop: if there is a collision,
4333 * we have already failed to add the first nexthop:
4334 * fib6_add_rt2node() has rejected it; when replacing, old
4335 * nexthops have been replaced by first new, the rest should
4336 * be added to it.
1a72418b 4337 */
27596472
MK
4338 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4339 NLM_F_REPLACE);
6b9ea5a6
RP
4340 nhn++;
4341 }
4342
3b1137fe
DA
4343 /* success ... tell user about new route */
4344 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4345 goto cleanup;
4346
4347add_errout:
3b1137fe
DA
4348 /* send notification for routes that were added so that
4349 * the delete notifications sent by ip6_route_del are
4350 * coherent
4351 */
4352 if (rt_notif)
4353 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4354
6b9ea5a6
RP
4355 /* Delete routes that were already added */
4356 list_for_each_entry(nh, &rt6_nh_list, next) {
4357 if (err_nh == nh)
4358 break;
333c4301 4359 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4360 }
4361
4362cleanup:
4363 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
587fea74
WW
4364 if (nh->rt6_info)
4365 dst_release_immediate(&nh->rt6_info->dst);
6b9ea5a6
RP
4366 list_del(&nh->next);
4367 kfree(nh);
4368 }
4369
4370 return err;
4371}
4372
333c4301
DA
4373static int ip6_route_multipath_del(struct fib6_config *cfg,
4374 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4375{
4376 struct fib6_config r_cfg;
4377 struct rtnexthop *rtnh;
4378 int remaining;
4379 int attrlen;
4380 int err = 1, last_err = 0;
4381
4382 remaining = cfg->fc_mp_len;
4383 rtnh = (struct rtnexthop *)cfg->fc_mp;
4384
4385 /* Parse a Multipath Entry */
4386 while (rtnh_ok(rtnh, remaining)) {
4387 memcpy(&r_cfg, cfg, sizeof(*cfg));
4388 if (rtnh->rtnh_ifindex)
4389 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4390
4391 attrlen = rtnh_attrlen(rtnh);
4392 if (attrlen > 0) {
4393 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4394
4395 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4396 if (nla) {
4397 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4398 r_cfg.fc_flags |= RTF_GATEWAY;
4399 }
4400 }
333c4301 4401 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4402 if (err)
4403 last_err = err;
4404
51ebd318
ND
4405 rtnh = rtnh_next(rtnh, &remaining);
4406 }
4407
4408 return last_err;
4409}
4410
c21ef3e3
DA
4411static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4412 struct netlink_ext_ack *extack)
1da177e4 4413{
86872cb5
TG
4414 struct fib6_config cfg;
4415 int err;
1da177e4 4416
333c4301 4417 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4418 if (err < 0)
4419 return err;
4420
51ebd318 4421 if (cfg.fc_mp)
333c4301 4422 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4423 else {
4424 cfg.fc_delete_all_nh = 1;
333c4301 4425 return ip6_route_del(&cfg, extack);
0ae81335 4426 }
1da177e4
LT
4427}
4428
c21ef3e3
DA
4429static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4430 struct netlink_ext_ack *extack)
1da177e4 4431{
86872cb5
TG
4432 struct fib6_config cfg;
4433 int err;
1da177e4 4434
333c4301 4435 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4436 if (err < 0)
4437 return err;
4438
51ebd318 4439 if (cfg.fc_mp)
333c4301 4440 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4441 else
333c4301 4442 return ip6_route_add(&cfg, extack);
1da177e4
LT
4443}
4444
beb1afac 4445static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 4446{
beb1afac
DA
4447 int nexthop_len = 0;
4448
4449 if (rt->rt6i_nsiblings) {
4450 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4451 + NLA_ALIGN(sizeof(struct rtnexthop))
4452 + nla_total_size(16) /* RTA_GATEWAY */
5e670d84 4453 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
beb1afac
DA
4454
4455 nexthop_len *= rt->rt6i_nsiblings;
4456 }
4457
339bf98f
TG
4458 return NLMSG_ALIGN(sizeof(struct rtmsg))
4459 + nla_total_size(16) /* RTA_SRC */
4460 + nla_total_size(16) /* RTA_DST */
4461 + nla_total_size(16) /* RTA_GATEWAY */
4462 + nla_total_size(16) /* RTA_PREFSRC */
4463 + nla_total_size(4) /* RTA_TABLE */
4464 + nla_total_size(4) /* RTA_IIF */
4465 + nla_total_size(4) /* RTA_OIF */
4466 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4467 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4468 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4469 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4470 + nla_total_size(1) /* RTA_PREF */
5e670d84 4471 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
beb1afac
DA
4472 + nexthop_len;
4473}
4474
4475static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 4476 unsigned int *flags, bool skip_oif)
beb1afac 4477{
5e670d84 4478 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
f9d882ea
IS
4479 *flags |= RTNH_F_DEAD;
4480
5e670d84 4481 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
beb1afac
DA
4482 *flags |= RTNH_F_LINKDOWN;
4483 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
4484 *flags |= RTNH_F_DEAD;
4485 }
4486
4487 if (rt->rt6i_flags & RTF_GATEWAY) {
5e670d84 4488 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
beb1afac
DA
4489 goto nla_put_failure;
4490 }
4491
5e670d84
DA
4492 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4493 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4494 *flags |= RTNH_F_OFFLOAD;
4495
5be083ce 4496 /* not needed for multipath encoding b/c it has a rtnexthop struct */
5e670d84
DA
4497 if (!skip_oif && rt->fib6_nh.nh_dev &&
4498 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
beb1afac
DA
4499 goto nla_put_failure;
4500
5e670d84
DA
4501 if (rt->fib6_nh.nh_lwtstate &&
4502 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
beb1afac
DA
4503 goto nla_put_failure;
4504
4505 return 0;
4506
4507nla_put_failure:
4508 return -EMSGSIZE;
4509}
4510
5be083ce 4511/* add multipath next hop */
beb1afac
DA
4512static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4513{
5e670d84 4514 const struct net_device *dev = rt->fib6_nh.nh_dev;
beb1afac
DA
4515 struct rtnexthop *rtnh;
4516 unsigned int flags = 0;
4517
4518 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4519 if (!rtnh)
4520 goto nla_put_failure;
4521
5e670d84
DA
4522 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4523 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
beb1afac 4524
5be083ce 4525 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4526 goto nla_put_failure;
4527
4528 rtnh->rtnh_flags = flags;
4529
4530 /* length of rtnetlink header + attributes */
4531 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4532
4533 return 0;
4534
4535nla_put_failure:
4536 return -EMSGSIZE;
339bf98f
TG
4537}
4538
d4ead6b3
DA
4539static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4540 struct rt6_info *rt, struct dst_entry *dst,
4541 struct in6_addr *dest, struct in6_addr *src,
15e47304 4542 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4543 unsigned int flags)
1da177e4
LT
4544{
4545 struct rtmsg *rtm;
2d7202bf 4546 struct nlmsghdr *nlh;
d4ead6b3
DA
4547 long expires = 0;
4548 u32 *pmetrics;
9e762a4a 4549 u32 table;
1da177e4 4550
15e47304 4551 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4552 if (!nlh)
26932566 4553 return -EMSGSIZE;
2d7202bf
TG
4554
4555 rtm = nlmsg_data(nlh);
1da177e4
LT
4556 rtm->rtm_family = AF_INET6;
4557 rtm->rtm_dst_len = rt->rt6i_dst.plen;
4558 rtm->rtm_src_len = rt->rt6i_src.plen;
4559 rtm->rtm_tos = 0;
c71099ac 4560 if (rt->rt6i_table)
9e762a4a 4561 table = rt->rt6i_table->tb6_id;
c71099ac 4562 else
9e762a4a
PM
4563 table = RT6_TABLE_UNSPEC;
4564 rtm->rtm_table = table;
c78679e8
DM
4565 if (nla_put_u32(skb, RTA_TABLE, table))
4566 goto nla_put_failure;
e8478e80
DA
4567
4568 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4569 rtm->rtm_flags = 0;
4570 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4571 rtm->rtm_protocol = rt->rt6i_protocol;
1da177e4 4572
38308473 4573 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
4574 rtm->rtm_flags |= RTM_F_CLONED;
4575
d4ead6b3
DA
4576 if (dest) {
4577 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4578 goto nla_put_failure;
1ab1457c 4579 rtm->rtm_dst_len = 128;
1da177e4 4580 } else if (rtm->rtm_dst_len)
930345ea 4581 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 4582 goto nla_put_failure;
1da177e4
LT
4583#ifdef CONFIG_IPV6_SUBTREES
4584 if (src) {
930345ea 4585 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4586 goto nla_put_failure;
1ab1457c 4587 rtm->rtm_src_len = 128;
c78679e8 4588 } else if (rtm->rtm_src_len &&
930345ea 4589 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 4590 goto nla_put_failure;
1da177e4 4591#endif
7bc570c8
YH
4592 if (iif) {
4593#ifdef CONFIG_IPV6_MROUTE
4594 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
4595 int err = ip6mr_get_route(net, skb, rtm, portid);
4596
4597 if (err == 0)
4598 return 0;
4599 if (err < 0)
4600 goto nla_put_failure;
7bc570c8
YH
4601 } else
4602#endif
c78679e8
DM
4603 if (nla_put_u32(skb, RTA_IIF, iif))
4604 goto nla_put_failure;
d4ead6b3 4605 } else if (dest) {
1da177e4 4606 struct in6_addr saddr_buf;
d4ead6b3 4607 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4608 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4609 goto nla_put_failure;
1da177e4 4610 }
2d7202bf 4611
c3968a85
DW
4612 if (rt->rt6i_prefsrc.plen) {
4613 struct in6_addr saddr_buf;
4e3fd7a0 4614 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 4615 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4616 goto nla_put_failure;
c3968a85
DW
4617 }
4618
d4ead6b3
DA
4619 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4620 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4621 goto nla_put_failure;
4622
c78679e8
DM
4623 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4624 goto nla_put_failure;
8253947e 4625
beb1afac
DA
4626 /* For multipath routes, walk the siblings list and add
4627 * each as a nexthop within RTA_MULTIPATH.
4628 */
4629 if (rt->rt6i_nsiblings) {
4630 struct rt6_info *sibling, *next_sibling;
4631 struct nlattr *mp;
4632
4633 mp = nla_nest_start(skb, RTA_MULTIPATH);
4634 if (!mp)
4635 goto nla_put_failure;
4636
4637 if (rt6_add_nexthop(skb, rt) < 0)
4638 goto nla_put_failure;
4639
4640 list_for_each_entry_safe(sibling, next_sibling,
4641 &rt->rt6i_siblings, rt6i_siblings) {
4642 if (rt6_add_nexthop(skb, sibling) < 0)
4643 goto nla_put_failure;
4644 }
4645
4646 nla_nest_end(skb, mp);
4647 } else {
5be083ce 4648 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4649 goto nla_put_failure;
4650 }
4651
14895687
DA
4652 if (rt->rt6i_flags & RTF_EXPIRES) {
4653 expires = dst ? dst->expires : rt->expires;
4654 expires -= jiffies;
4655 }
69cdf8f9 4656
d4ead6b3 4657 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4658 goto nla_put_failure;
2d7202bf 4659
c78ba6d6
LR
4660 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4661 goto nla_put_failure;
4662
19e42e45 4663
053c095a
JB
4664 nlmsg_end(skb, nlh);
4665 return 0;
2d7202bf
TG
4666
4667nla_put_failure:
26932566
PM
4668 nlmsg_cancel(skb, nlh);
4669 return -EMSGSIZE;
1da177e4
LT
4670}
4671
1b43af54 4672int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
4673{
4674 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4675 struct net *net = arg->net;
4676
421842ed 4677 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4678 return 0;
1da177e4 4679
2d7202bf
TG
4680 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4681 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4682
4683 /* user wants prefix routes only */
4684 if (rtm->rtm_flags & RTM_F_PREFIX &&
4685 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4686 /* success since this is not a prefix route */
4687 return 1;
4688 }
4689 }
1da177e4 4690
d4ead6b3
DA
4691 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4692 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4693 arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
1da177e4
LT
4694}
4695
c21ef3e3
DA
4696static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4697 struct netlink_ext_ack *extack)
1da177e4 4698{
3b1e0a65 4699 struct net *net = sock_net(in_skb->sk);
ab364a6f 4700 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
4701 int err, iif = 0, oif = 0;
4702 struct dst_entry *dst;
ab364a6f 4703 struct rt6_info *rt;
1da177e4 4704 struct sk_buff *skb;
ab364a6f 4705 struct rtmsg *rtm;
4c9483b2 4706 struct flowi6 fl6;
18c3a61c 4707 bool fibmatch;
1da177e4 4708
fceb6435 4709 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4710 extack);
ab364a6f
TG
4711 if (err < 0)
4712 goto errout;
1da177e4 4713
ab364a6f 4714 err = -EINVAL;
4c9483b2 4715 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4716 rtm = nlmsg_data(nlh);
4717 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4718 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4719
ab364a6f
TG
4720 if (tb[RTA_SRC]) {
4721 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4722 goto errout;
4723
4e3fd7a0 4724 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4725 }
4726
4727 if (tb[RTA_DST]) {
4728 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4729 goto errout;
4730
4e3fd7a0 4731 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4732 }
4733
4734 if (tb[RTA_IIF])
4735 iif = nla_get_u32(tb[RTA_IIF]);
4736
4737 if (tb[RTA_OIF])
72331bc0 4738 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4739
2e47b291
LC
4740 if (tb[RTA_MARK])
4741 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4742
622ec2c9
LC
4743 if (tb[RTA_UID])
4744 fl6.flowi6_uid = make_kuid(current_user_ns(),
4745 nla_get_u32(tb[RTA_UID]));
4746 else
4747 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4748
1da177e4
LT
4749 if (iif) {
4750 struct net_device *dev;
72331bc0
SL
4751 int flags = 0;
4752
121622db
FW
4753 rcu_read_lock();
4754
4755 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4756 if (!dev) {
121622db 4757 rcu_read_unlock();
1da177e4 4758 err = -ENODEV;
ab364a6f 4759 goto errout;
1da177e4 4760 }
72331bc0
SL
4761
4762 fl6.flowi6_iif = iif;
4763
4764 if (!ipv6_addr_any(&fl6.saddr))
4765 flags |= RT6_LOOKUP_F_HAS_SADDR;
4766
b75cc8f9 4767 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4768
4769 rcu_read_unlock();
72331bc0
SL
4770 } else {
4771 fl6.flowi6_oif = oif;
4772
58acfd71 4773 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4774 }
4775
18c3a61c
RP
4776
4777 rt = container_of(dst, struct rt6_info, dst);
4778 if (rt->dst.error) {
4779 err = rt->dst.error;
4780 ip6_rt_put(rt);
4781 goto errout;
1da177e4
LT
4782 }
4783
9d6acb3b
WC
4784 if (rt == net->ipv6.ip6_null_entry) {
4785 err = rt->dst.error;
4786 ip6_rt_put(rt);
4787 goto errout;
4788 }
4789
fba961ab
DM
4790 if (fibmatch && rt->from) {
4791 struct rt6_info *ort = rt->from;
58acfd71
IS
4792
4793 dst_hold(&ort->dst);
4794 ip6_rt_put(rt);
4795 rt = ort;
4796 }
4797
ab364a6f 4798 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4799 if (!skb) {
94e187c0 4800 ip6_rt_put(rt);
ab364a6f
TG
4801 err = -ENOBUFS;
4802 goto errout;
4803 }
1da177e4 4804
d8d1f30b 4805 skb_dst_set(skb, &rt->dst);
18c3a61c 4806 if (fibmatch)
d4ead6b3 4807 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, iif,
18c3a61c
RP
4808 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4809 nlh->nlmsg_seq, 0);
4810 else
d4ead6b3
DA
4811 err = rt6_fill_node(net, skb, rt, dst, &fl6.daddr, &fl6.saddr,
4812 iif, RTM_NEWROUTE,
4813 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4814 0);
1da177e4 4815 if (err < 0) {
ab364a6f
TG
4816 kfree_skb(skb);
4817 goto errout;
1da177e4
LT
4818 }
4819
15e47304 4820 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4821errout:
1da177e4 4822 return err;
1da177e4
LT
4823}
4824
37a1d361
RP
4825void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4826 unsigned int nlm_flags)
1da177e4
LT
4827{
4828 struct sk_buff *skb;
5578689a 4829 struct net *net = info->nl_net;
528c4ceb
DL
4830 u32 seq;
4831 int err;
4832
4833 err = -ENOBUFS;
38308473 4834 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4835
19e42e45 4836 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4837 if (!skb)
21713ebc
TG
4838 goto errout;
4839
d4ead6b3
DA
4840 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4841 event, info->portid, seq, nlm_flags);
26932566
PM
4842 if (err < 0) {
4843 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4844 WARN_ON(err == -EMSGSIZE);
4845 kfree_skb(skb);
4846 goto errout;
4847 }
15e47304 4848 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4849 info->nlh, gfp_any());
4850 return;
21713ebc
TG
4851errout:
4852 if (err < 0)
5578689a 4853 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4854}
4855
8ed67789 4856static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4857 unsigned long event, void *ptr)
8ed67789 4858{
351638e7 4859 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4860 struct net *net = dev_net(dev);
8ed67789 4861
242d3a49
WC
4862 if (!(dev->flags & IFF_LOOPBACK))
4863 return NOTIFY_OK;
4864
4865 if (event == NETDEV_REGISTER) {
421842ed
DA
4866 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
4867 net->ipv6.fib6_null_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4868 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4869 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4870#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4871 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4872 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4873 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4874 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4875#endif
76da0704
WC
4876 } else if (event == NETDEV_UNREGISTER &&
4877 dev->reg_state != NETREG_UNREGISTERED) {
4878 /* NETDEV_UNREGISTER could be fired for multiple times by
4879 * netdev_wait_allrefs(). Make sure we only call this once.
4880 */
421842ed 4881 in6_dev_put_clear(&net->ipv6.fib6_null_entry->rt6i_idev);
12d94a80 4882 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4883#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4884 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4885 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4886#endif
4887 }
4888
4889 return NOTIFY_OK;
4890}
4891
1da177e4
LT
4892/*
4893 * /proc
4894 */
4895
4896#ifdef CONFIG_PROC_FS
4897
33120b30 4898static const struct file_operations ipv6_route_proc_fops = {
33120b30
AD
4899 .open = ipv6_route_open,
4900 .read = seq_read,
4901 .llseek = seq_lseek,
8d2ca1d7 4902 .release = seq_release_net,
33120b30
AD
4903};
4904
1da177e4
LT
4905static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4906{
69ddb805 4907 struct net *net = (struct net *)seq->private;
1da177e4 4908 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4909 net->ipv6.rt6_stats->fib_nodes,
4910 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 4911 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
4912 net->ipv6.rt6_stats->fib_rt_entries,
4913 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4914 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4915 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4916
4917 return 0;
4918}
4919
4920static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4921{
de05c557 4922 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4923}
4924
9a32144e 4925static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4926 .open = rt6_stats_seq_open,
4927 .read = seq_read,
4928 .llseek = seq_lseek,
b6fcbdb4 4929 .release = single_release_net,
1da177e4
LT
4930};
4931#endif /* CONFIG_PROC_FS */
4932
4933#ifdef CONFIG_SYSCTL
4934
1da177e4 4935static
fe2c6338 4936int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4937 void __user *buffer, size_t *lenp, loff_t *ppos)
4938{
c486da34
LAG
4939 struct net *net;
4940 int delay;
4941 if (!write)
1da177e4 4942 return -EINVAL;
c486da34
LAG
4943
4944 net = (struct net *)ctl->extra1;
4945 delay = net->ipv6.sysctl.flush_delay;
4946 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4947 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4948 return 0;
1da177e4
LT
4949}
4950
fe2c6338 4951struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4952 {
1da177e4 4953 .procname = "flush",
4990509f 4954 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4955 .maxlen = sizeof(int),
89c8b3a1 4956 .mode = 0200,
6d9f239a 4957 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4958 },
4959 {
1da177e4 4960 .procname = "gc_thresh",
9a7ec3a9 4961 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4962 .maxlen = sizeof(int),
4963 .mode = 0644,
6d9f239a 4964 .proc_handler = proc_dointvec,
1da177e4
LT
4965 },
4966 {
1da177e4 4967 .procname = "max_size",
4990509f 4968 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
4969 .maxlen = sizeof(int),
4970 .mode = 0644,
6d9f239a 4971 .proc_handler = proc_dointvec,
1da177e4
LT
4972 },
4973 {
1da177e4 4974 .procname = "gc_min_interval",
4990509f 4975 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4976 .maxlen = sizeof(int),
4977 .mode = 0644,
6d9f239a 4978 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4979 },
4980 {
1da177e4 4981 .procname = "gc_timeout",
4990509f 4982 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
4983 .maxlen = sizeof(int),
4984 .mode = 0644,
6d9f239a 4985 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4986 },
4987 {
1da177e4 4988 .procname = "gc_interval",
4990509f 4989 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
4990 .maxlen = sizeof(int),
4991 .mode = 0644,
6d9f239a 4992 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4993 },
4994 {
1da177e4 4995 .procname = "gc_elasticity",
4990509f 4996 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
4997 .maxlen = sizeof(int),
4998 .mode = 0644,
f3d3f616 4999 .proc_handler = proc_dointvec,
1da177e4
LT
5000 },
5001 {
1da177e4 5002 .procname = "mtu_expires",
4990509f 5003 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5004 .maxlen = sizeof(int),
5005 .mode = 0644,
6d9f239a 5006 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5007 },
5008 {
1da177e4 5009 .procname = "min_adv_mss",
4990509f 5010 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5011 .maxlen = sizeof(int),
5012 .mode = 0644,
f3d3f616 5013 .proc_handler = proc_dointvec,
1da177e4
LT
5014 },
5015 {
1da177e4 5016 .procname = "gc_min_interval_ms",
4990509f 5017 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5018 .maxlen = sizeof(int),
5019 .mode = 0644,
6d9f239a 5020 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5021 },
f8572d8f 5022 { }
1da177e4
LT
5023};
5024
2c8c1e72 5025struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5026{
5027 struct ctl_table *table;
5028
5029 table = kmemdup(ipv6_route_table_template,
5030 sizeof(ipv6_route_table_template),
5031 GFP_KERNEL);
5ee09105
YH
5032
5033 if (table) {
5034 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5035 table[0].extra1 = net;
86393e52 5036 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5037 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5038 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5039 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5040 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5041 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5042 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5043 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5044 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
5045
5046 /* Don't export sysctls to unprivileged users */
5047 if (net->user_ns != &init_user_ns)
5048 table[0].procname = NULL;
5ee09105
YH
5049 }
5050
760f2d01
DL
5051 return table;
5052}
1da177e4
LT
5053#endif
5054
2c8c1e72 5055static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5056{
633d424b 5057 int ret = -ENOMEM;
8ed67789 5058
86393e52
AD
5059 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5060 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5061
fc66f95c
ED
5062 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5063 goto out_ip6_dst_ops;
5064
421842ed
DA
5065 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5066 sizeof(*net->ipv6.fib6_null_entry),
5067 GFP_KERNEL);
5068 if (!net->ipv6.fib6_null_entry)
5069 goto out_ip6_dst_entries;
5070
8ed67789
DL
5071 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5072 sizeof(*net->ipv6.ip6_null_entry),
5073 GFP_KERNEL);
5074 if (!net->ipv6.ip6_null_entry)
421842ed 5075 goto out_fib6_null_entry;
d8d1f30b 5076 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5077 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5078 ip6_template_metrics, true);
8ed67789
DL
5079
5080#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5081 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5082 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5083 sizeof(*net->ipv6.ip6_prohibit_entry),
5084 GFP_KERNEL);
68fffc67
PZ
5085 if (!net->ipv6.ip6_prohibit_entry)
5086 goto out_ip6_null_entry;
d8d1f30b 5087 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5088 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5089 ip6_template_metrics, true);
8ed67789
DL
5090
5091 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5092 sizeof(*net->ipv6.ip6_blk_hole_entry),
5093 GFP_KERNEL);
68fffc67
PZ
5094 if (!net->ipv6.ip6_blk_hole_entry)
5095 goto out_ip6_prohibit_entry;
d8d1f30b 5096 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5097 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5098 ip6_template_metrics, true);
8ed67789
DL
5099#endif
5100
b339a47c
PZ
5101 net->ipv6.sysctl.flush_delay = 0;
5102 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5103 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5104 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5105 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5106 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5107 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5108 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5109
6891a346
BT
5110 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5111
8ed67789
DL
5112 ret = 0;
5113out:
5114 return ret;
f2fc6a54 5115
68fffc67
PZ
5116#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5117out_ip6_prohibit_entry:
5118 kfree(net->ipv6.ip6_prohibit_entry);
5119out_ip6_null_entry:
5120 kfree(net->ipv6.ip6_null_entry);
5121#endif
421842ed
DA
5122out_fib6_null_entry:
5123 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5124out_ip6_dst_entries:
5125 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5126out_ip6_dst_ops:
f2fc6a54 5127 goto out;
cdb18761
DL
5128}
5129
2c8c1e72 5130static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5131{
421842ed 5132 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5133 kfree(net->ipv6.ip6_null_entry);
5134#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5135 kfree(net->ipv6.ip6_prohibit_entry);
5136 kfree(net->ipv6.ip6_blk_hole_entry);
5137#endif
41bb78b4 5138 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5139}
5140
d189634e
TG
5141static int __net_init ip6_route_net_init_late(struct net *net)
5142{
5143#ifdef CONFIG_PROC_FS
d4beaa66 5144 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
d6444062 5145 proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
5146#endif
5147 return 0;
5148}
5149
5150static void __net_exit ip6_route_net_exit_late(struct net *net)
5151{
5152#ifdef CONFIG_PROC_FS
ece31ffd
G
5153 remove_proc_entry("ipv6_route", net->proc_net);
5154 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5155#endif
5156}
5157
cdb18761
DL
5158static struct pernet_operations ip6_route_net_ops = {
5159 .init = ip6_route_net_init,
5160 .exit = ip6_route_net_exit,
5161};
5162
c3426b47
DM
5163static int __net_init ipv6_inetpeer_init(struct net *net)
5164{
5165 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5166
5167 if (!bp)
5168 return -ENOMEM;
5169 inet_peer_base_init(bp);
5170 net->ipv6.peers = bp;
5171 return 0;
5172}
5173
5174static void __net_exit ipv6_inetpeer_exit(struct net *net)
5175{
5176 struct inet_peer_base *bp = net->ipv6.peers;
5177
5178 net->ipv6.peers = NULL;
56a6b248 5179 inetpeer_invalidate_tree(bp);
c3426b47
DM
5180 kfree(bp);
5181}
5182
2b823f72 5183static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5184 .init = ipv6_inetpeer_init,
5185 .exit = ipv6_inetpeer_exit,
5186};
5187
d189634e
TG
5188static struct pernet_operations ip6_route_net_late_ops = {
5189 .init = ip6_route_net_init_late,
5190 .exit = ip6_route_net_exit_late,
5191};
5192
8ed67789
DL
5193static struct notifier_block ip6_route_dev_notifier = {
5194 .notifier_call = ip6_route_dev_notify,
242d3a49 5195 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5196};
5197
2f460933
WC
5198void __init ip6_route_init_special_entries(void)
5199{
5200 /* Registering of the loopback is done before this portion of code,
5201 * the loopback reference in rt6_info will not be taken, do it
5202 * manually for init_net */
421842ed
DA
5203 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
5204 init_net.ipv6.fib6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2f460933
WC
5205 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5206 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5207 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5208 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5209 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5210 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5211 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5212 #endif
5213}
5214
433d49c3 5215int __init ip6_route_init(void)
1da177e4 5216{
433d49c3 5217 int ret;
8d0b94af 5218 int cpu;
433d49c3 5219
9a7ec3a9
DL
5220 ret = -ENOMEM;
5221 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5222 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5223 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5224 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5225 goto out;
14e50e57 5226
fc66f95c 5227 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5228 if (ret)
bdb3289f 5229 goto out_kmem_cache;
bdb3289f 5230
c3426b47
DM
5231 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5232 if (ret)
e8803b6c 5233 goto out_dst_entries;
2a0c451a 5234
7e52b33b
DM
5235 ret = register_pernet_subsys(&ip6_route_net_ops);
5236 if (ret)
5237 goto out_register_inetpeer;
c3426b47 5238
5dc121e9
AE
5239 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5240
e8803b6c 5241 ret = fib6_init();
433d49c3 5242 if (ret)
8ed67789 5243 goto out_register_subsys;
433d49c3 5244
433d49c3
DL
5245 ret = xfrm6_init();
5246 if (ret)
e8803b6c 5247 goto out_fib6_init;
c35b7e72 5248
433d49c3
DL
5249 ret = fib6_rules_init();
5250 if (ret)
5251 goto xfrm6_init;
7e5449c2 5252
d189634e
TG
5253 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5254 if (ret)
5255 goto fib6_rules_init;
5256
16feebcf
FW
5257 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5258 inet6_rtm_newroute, NULL, 0);
5259 if (ret < 0)
5260 goto out_register_late_subsys;
5261
5262 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5263 inet6_rtm_delroute, NULL, 0);
5264 if (ret < 0)
5265 goto out_register_late_subsys;
5266
5267 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5268 inet6_rtm_getroute, NULL,
5269 RTNL_FLAG_DOIT_UNLOCKED);
5270 if (ret < 0)
d189634e 5271 goto out_register_late_subsys;
c127ea2c 5272
8ed67789 5273 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5274 if (ret)
d189634e 5275 goto out_register_late_subsys;
8ed67789 5276
8d0b94af
MKL
5277 for_each_possible_cpu(cpu) {
5278 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5279
5280 INIT_LIST_HEAD(&ul->head);
5281 spin_lock_init(&ul->lock);
5282 }
5283
433d49c3
DL
5284out:
5285 return ret;
5286
d189634e 5287out_register_late_subsys:
16feebcf 5288 rtnl_unregister_all(PF_INET6);
d189634e 5289 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5290fib6_rules_init:
433d49c3
DL
5291 fib6_rules_cleanup();
5292xfrm6_init:
433d49c3 5293 xfrm6_fini();
2a0c451a
TG
5294out_fib6_init:
5295 fib6_gc_cleanup();
8ed67789
DL
5296out_register_subsys:
5297 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5298out_register_inetpeer:
5299 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5300out_dst_entries:
5301 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5302out_kmem_cache:
f2fc6a54 5303 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5304 goto out;
1da177e4
LT
5305}
5306
5307void ip6_route_cleanup(void)
5308{
8ed67789 5309 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5310 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5311 fib6_rules_cleanup();
1da177e4 5312 xfrm6_fini();
1da177e4 5313 fib6_gc_cleanup();
c3426b47 5314 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5315 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5316 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5317 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5318}