net/ipv6: Move release of fib6_info from pcpu routes to helper
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
8d1c802b
DA
99static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
100static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 101static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 102 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 103 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
104 int iif, int type, u32 portid, u32 seq,
105 unsigned int flags);
8d1c802b 106static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
107 struct in6_addr *daddr,
108 struct in6_addr *saddr);
1da177e4 109
70ceb4f5 110#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 111static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 112 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
113 const struct in6_addr *gwaddr,
114 struct net_device *dev,
95c96174 115 unsigned int pref);
8d1c802b 116static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 117 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
118 const struct in6_addr *gwaddr,
119 struct net_device *dev);
70ceb4f5
YH
120#endif
121
8d0b94af
MKL
122struct uncached_list {
123 spinlock_t lock;
124 struct list_head head;
125};
126
127static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
128
510c321b 129void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
130{
131 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
132
8d0b94af
MKL
133 rt->rt6i_uncached_list = ul;
134
135 spin_lock_bh(&ul->lock);
136 list_add_tail(&rt->rt6i_uncached, &ul->head);
137 spin_unlock_bh(&ul->lock);
138}
139
510c321b 140void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
141{
142 if (!list_empty(&rt->rt6i_uncached)) {
143 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 144 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
145
146 spin_lock_bh(&ul->lock);
147 list_del(&rt->rt6i_uncached);
81eb8447 148 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
149 spin_unlock_bh(&ul->lock);
150 }
151}
152
153static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154{
155 struct net_device *loopback_dev = net->loopback_dev;
156 int cpu;
157
e332bc67
EB
158 if (dev == loopback_dev)
159 return;
160
8d0b94af
MKL
161 for_each_possible_cpu(cpu) {
162 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 struct rt6_info *rt;
164
165 spin_lock_bh(&ul->lock);
166 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 struct inet6_dev *rt_idev = rt->rt6i_idev;
168 struct net_device *rt_dev = rt->dst.dev;
169
e332bc67 170 if (rt_idev->dev == dev) {
8d0b94af
MKL
171 rt->rt6i_idev = in6_dev_get(loopback_dev);
172 in6_dev_put(rt_idev);
173 }
174
e332bc67 175 if (rt_dev == dev) {
8d0b94af
MKL
176 rt->dst.dev = loopback_dev;
177 dev_hold(rt->dst.dev);
178 dev_put(rt_dev);
179 }
180 }
181 spin_unlock_bh(&ul->lock);
182 }
183}
184
f8a1b43b 185static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
186 struct sk_buff *skb,
187 const void *daddr)
39232973 188{
a7563f34 189 if (!ipv6_addr_any(p))
39232973 190 return (const void *) p;
f894cbf8
DM
191 else if (skb)
192 return &ipv6_hdr(skb)->daddr;
39232973
DM
193 return daddr;
194}
195
f8a1b43b
DA
196struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
197 struct net_device *dev,
198 struct sk_buff *skb,
199 const void *daddr)
d3aaeb38 200{
39232973
DM
201 struct neighbour *n;
202
f8a1b43b
DA
203 daddr = choose_neigh_daddr(gw, skb, daddr);
204 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
205 if (n)
206 return n;
f8a1b43b
DA
207 return neigh_create(&nd_tbl, daddr, dev);
208}
209
210static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
211 struct sk_buff *skb,
212 const void *daddr)
213{
214 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
215
216 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
217}
218
63fca65d
JA
219static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
220{
221 struct net_device *dev = dst->dev;
222 struct rt6_info *rt = (struct rt6_info *)dst;
223
f8a1b43b 224 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
225 if (!daddr)
226 return;
227 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
228 return;
229 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
230 return;
231 __ipv6_confirm_neigh(dev, daddr);
232}
233
9a7ec3a9 234static struct dst_ops ip6_dst_ops_template = {
1da177e4 235 .family = AF_INET6,
1da177e4
LT
236 .gc = ip6_dst_gc,
237 .gc_thresh = 1024,
238 .check = ip6_dst_check,
0dbaee3b 239 .default_advmss = ip6_default_advmss,
ebb762f2 240 .mtu = ip6_mtu,
d4ead6b3 241 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
242 .destroy = ip6_dst_destroy,
243 .ifdown = ip6_dst_ifdown,
244 .negative_advice = ip6_negative_advice,
245 .link_failure = ip6_link_failure,
246 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 247 .redirect = rt6_do_redirect,
9f8955cc 248 .local_out = __ip6_local_out,
f8a1b43b 249 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 250 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
251};
252
ebb762f2 253static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 254{
618f9bc7
SK
255 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
256
257 return mtu ? : dst->dev->mtu;
ec831ea7
RD
258}
259
6700c270
DM
260static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
261 struct sk_buff *skb, u32 mtu)
14e50e57
DM
262{
263}
264
6700c270
DM
265static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
266 struct sk_buff *skb)
b587ee3b
DM
267{
268}
269
14e50e57
DM
270static struct dst_ops ip6_dst_blackhole_ops = {
271 .family = AF_INET6,
14e50e57
DM
272 .destroy = ip6_dst_destroy,
273 .check = ip6_dst_check,
ebb762f2 274 .mtu = ip6_blackhole_mtu,
214f45c9 275 .default_advmss = ip6_default_advmss,
14e50e57 276 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 277 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 278 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 279 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
280};
281
62fa8a84 282static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 283 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
284};
285
8d1c802b 286static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
287 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
288 .fib6_protocol = RTPROT_KERNEL,
289 .fib6_metric = ~(u32)0,
290 .fib6_ref = ATOMIC_INIT(1),
421842ed
DA
291 .fib6_type = RTN_UNREACHABLE,
292 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
293};
294
fb0af4c7 295static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
296 .dst = {
297 .__refcnt = ATOMIC_INIT(1),
298 .__use = 1,
2c20cbd7 299 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 300 .error = -ENETUNREACH,
d8d1f30b
CG
301 .input = ip6_pkt_discard,
302 .output = ip6_pkt_discard_out,
1da177e4
LT
303 },
304 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
305};
306
101367c2
TG
307#ifdef CONFIG_IPV6_MULTIPLE_TABLES
308
fb0af4c7 309static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
310 .dst = {
311 .__refcnt = ATOMIC_INIT(1),
312 .__use = 1,
2c20cbd7 313 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 314 .error = -EACCES,
d8d1f30b
CG
315 .input = ip6_pkt_prohibit,
316 .output = ip6_pkt_prohibit_out,
101367c2
TG
317 },
318 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
319};
320
fb0af4c7 321static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
322 .dst = {
323 .__refcnt = ATOMIC_INIT(1),
324 .__use = 1,
2c20cbd7 325 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 326 .error = -EINVAL,
d8d1f30b 327 .input = dst_discard,
ede2059d 328 .output = dst_discard_out,
101367c2
TG
329 },
330 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
331};
332
333#endif
334
ebfa45f0
MKL
335static void rt6_info_init(struct rt6_info *rt)
336{
337 struct dst_entry *dst = &rt->dst;
338
339 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
340 INIT_LIST_HEAD(&rt->rt6i_uncached);
341}
342
1da177e4 343/* allocate dst with ip6_dst_ops */
93531c67
DA
344struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
345 int flags)
1da177e4 346{
97bab73f 347 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 348 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 349
81eb8447 350 if (rt) {
ebfa45f0 351 rt6_info_init(rt);
81eb8447
WW
352 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
353 }
8104891b 354
cf911662 355 return rt;
1da177e4 356}
9ab179d8 357EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 358
1da177e4
LT
359static void ip6_dst_destroy(struct dst_entry *dst)
360{
361 struct rt6_info *rt = (struct rt6_info *)dst;
8d1c802b 362 struct fib6_info *from = rt->from;
8d0b94af 363 struct inet6_dev *idev;
1da177e4 364
4b32b5ad 365 dst_destroy_metrics_generic(dst);
8d0b94af
MKL
366 rt6_uncached_list_del(rt);
367
368 idev = rt->rt6i_idev;
38308473 369 if (idev) {
1da177e4
LT
370 rt->rt6i_idev = NULL;
371 in6_dev_put(idev);
1ab1457c 372 }
d4ead6b3 373
3a2232e9 374 rt->from = NULL;
93531c67 375 fib6_info_release(from);
b3419363
DM
376}
377
1da177e4
LT
378static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
379 int how)
380{
381 struct rt6_info *rt = (struct rt6_info *)dst;
382 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 383 struct net_device *loopback_dev =
c346dca1 384 dev_net(dev)->loopback_dev;
1da177e4 385
e5645f51
WW
386 if (idev && idev->dev != loopback_dev) {
387 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
388 if (loopback_idev) {
389 rt->rt6i_idev = loopback_idev;
390 in6_dev_put(idev);
97cac082 391 }
1da177e4
LT
392 }
393}
394
5973fb1e
MKL
395static bool __rt6_check_expired(const struct rt6_info *rt)
396{
397 if (rt->rt6i_flags & RTF_EXPIRES)
398 return time_after(jiffies, rt->dst.expires);
399 else
400 return false;
401}
402
a50feda5 403static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 404{
1716a961
G
405 if (rt->rt6i_flags & RTF_EXPIRES) {
406 if (time_after(jiffies, rt->dst.expires))
a50feda5 407 return true;
3a2232e9 408 } else if (rt->from) {
1e2ea8ad 409 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
14895687 410 fib6_check_expired(rt->from);
1716a961 411 }
a50feda5 412 return false;
1da177e4
LT
413}
414
8d1c802b
DA
415static struct fib6_info *rt6_multipath_select(const struct net *net,
416 struct fib6_info *match,
52bd4c0c 417 struct flowi6 *fl6, int oif,
b75cc8f9 418 const struct sk_buff *skb,
52bd4c0c 419 int strict)
51ebd318 420{
8d1c802b 421 struct fib6_info *sibling, *next_sibling;
51ebd318 422
b673d6cc
JS
423 /* We might have already computed the hash for ICMPv6 errors. In such
424 * case it will always be non-zero. Otherwise now is the time to do it.
425 */
426 if (!fl6->mp_hash)
b4bac172 427 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 428
5e670d84 429 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
3d709f69
IS
430 return match;
431
93c2fb25
DA
432 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
433 fib6_siblings) {
5e670d84
DA
434 int nh_upper_bound;
435
436 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
437 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
438 continue;
439 if (rt6_score_route(sibling, oif, strict) < 0)
440 break;
441 match = sibling;
442 break;
443 }
444
51ebd318
ND
445 return match;
446}
447
1da177e4 448/*
66f5d6ce 449 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
450 */
451
8d1c802b
DA
452static inline struct fib6_info *rt6_device_match(struct net *net,
453 struct fib6_info *rt,
b71d1d42 454 const struct in6_addr *saddr,
1da177e4 455 int oif,
d420895e 456 int flags)
1da177e4 457{
8d1c802b 458 struct fib6_info *sprt;
1da177e4 459
5e670d84
DA
460 if (!oif && ipv6_addr_any(saddr) &&
461 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
8067bb8c 462 return rt;
dd3abc4e 463
071fb37e 464 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
5e670d84 465 const struct net_device *dev = sprt->fib6_nh.nh_dev;
dd3abc4e 466
5e670d84 467 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
468 continue;
469
dd3abc4e 470 if (oif) {
1da177e4
LT
471 if (dev->ifindex == oif)
472 return sprt;
dd3abc4e
YH
473 } else {
474 if (ipv6_chk_addr(net, saddr, dev,
475 flags & RT6_LOOKUP_F_IFACE))
476 return sprt;
1da177e4 477 }
dd3abc4e 478 }
1da177e4 479
eea68cd3
DA
480 if (oif && flags & RT6_LOOKUP_F_IFACE)
481 return net->ipv6.fib6_null_entry;
8067bb8c 482
421842ed 483 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
484}
485
27097255 486#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
487struct __rt6_probe_work {
488 struct work_struct work;
489 struct in6_addr target;
490 struct net_device *dev;
491};
492
493static void rt6_probe_deferred(struct work_struct *w)
494{
495 struct in6_addr mcaddr;
496 struct __rt6_probe_work *work =
497 container_of(w, struct __rt6_probe_work, work);
498
499 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 500 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 501 dev_put(work->dev);
662f5533 502 kfree(work);
c2f17e82
HFS
503}
504
8d1c802b 505static void rt6_probe(struct fib6_info *rt)
27097255 506{
990edb42 507 struct __rt6_probe_work *work;
5e670d84 508 const struct in6_addr *nh_gw;
f2c31e32 509 struct neighbour *neigh;
5e670d84
DA
510 struct net_device *dev;
511
27097255
YH
512 /*
513 * Okay, this does not seem to be appropriate
514 * for now, however, we need to check if it
515 * is really so; aka Router Reachability Probing.
516 *
517 * Router Reachability Probe MUST be rate-limited
518 * to no more than one per minute.
519 */
93c2fb25 520 if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
7ff74a59 521 return;
5e670d84
DA
522
523 nh_gw = &rt->fib6_nh.nh_gw;
524 dev = rt->fib6_nh.nh_dev;
2152caea 525 rcu_read_lock_bh();
5e670d84 526 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 527 if (neigh) {
dcd1f572
DA
528 struct inet6_dev *idev;
529
8d6c31bf
MKL
530 if (neigh->nud_state & NUD_VALID)
531 goto out;
532
dcd1f572 533 idev = __in6_dev_get(dev);
990edb42 534 work = NULL;
2152caea 535 write_lock(&neigh->lock);
990edb42
MKL
536 if (!(neigh->nud_state & NUD_VALID) &&
537 time_after(jiffies,
dcd1f572 538 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
539 work = kmalloc(sizeof(*work), GFP_ATOMIC);
540 if (work)
541 __neigh_set_probe_once(neigh);
c2f17e82 542 }
2152caea 543 write_unlock(&neigh->lock);
990edb42
MKL
544 } else {
545 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 546 }
990edb42
MKL
547
548 if (work) {
549 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
550 work->target = *nh_gw;
551 dev_hold(dev);
552 work->dev = dev;
990edb42
MKL
553 schedule_work(&work->work);
554 }
555
8d6c31bf 556out:
2152caea 557 rcu_read_unlock_bh();
27097255
YH
558}
559#else
8d1c802b 560static inline void rt6_probe(struct fib6_info *rt)
27097255 561{
27097255
YH
562}
563#endif
564
1da177e4 565/*
554cfb7e 566 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 567 */
8d1c802b 568static inline int rt6_check_dev(struct fib6_info *rt, int oif)
554cfb7e 569{
5e670d84
DA
570 const struct net_device *dev = rt->fib6_nh.nh_dev;
571
161980f4 572 if (!oif || dev->ifindex == oif)
554cfb7e 573 return 2;
161980f4 574 return 0;
554cfb7e 575}
1da177e4 576
8d1c802b 577static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
1da177e4 578{
afc154e9 579 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 580 struct neighbour *neigh;
f2c31e32 581
93c2fb25
DA
582 if (rt->fib6_flags & RTF_NONEXTHOP ||
583 !(rt->fib6_flags & RTF_GATEWAY))
afc154e9 584 return RT6_NUD_SUCCEED;
145a3621
YH
585
586 rcu_read_lock_bh();
5e670d84
DA
587 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
588 &rt->fib6_nh.nh_gw);
145a3621
YH
589 if (neigh) {
590 read_lock(&neigh->lock);
554cfb7e 591 if (neigh->nud_state & NUD_VALID)
afc154e9 592 ret = RT6_NUD_SUCCEED;
398bcbeb 593#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 594 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 595 ret = RT6_NUD_SUCCEED;
7e980569
JB
596 else
597 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 598#endif
145a3621 599 read_unlock(&neigh->lock);
afc154e9
HFS
600 } else {
601 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 602 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 603 }
145a3621
YH
604 rcu_read_unlock_bh();
605
a5a81f0b 606 return ret;
1da177e4
LT
607}
608
8d1c802b 609static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
1da177e4 610{
a5a81f0b 611 int m;
1ab1457c 612
4d0c5911 613 m = rt6_check_dev(rt, oif);
77d16f45 614 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 615 return RT6_NUD_FAIL_HARD;
ebacaaa0 616#ifdef CONFIG_IPV6_ROUTER_PREF
93c2fb25 617 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
ebacaaa0 618#endif
afc154e9
HFS
619 if (strict & RT6_LOOKUP_F_REACHABLE) {
620 int n = rt6_check_neigh(rt);
621 if (n < 0)
622 return n;
623 }
554cfb7e
YH
624 return m;
625}
626
dcd1f572
DA
627/* called with rc_read_lock held */
628static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
629{
630 const struct net_device *dev = fib6_info_nh_dev(f6i);
631 bool rc = false;
632
633 if (dev) {
634 const struct inet6_dev *idev = __in6_dev_get(dev);
635
636 rc = !!idev->cnf.ignore_routes_with_linkdown;
637 }
638
639 return rc;
640}
641
8d1c802b
DA
642static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
643 int *mpri, struct fib6_info *match,
afc154e9 644 bool *do_rr)
554cfb7e 645{
f11e6659 646 int m;
afc154e9 647 bool match_do_rr = false;
35103d11 648
5e670d84 649 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
650 goto out;
651
dcd1f572 652 if (fib6_ignore_linkdown(rt) &&
5e670d84 653 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 654 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 655 goto out;
f11e6659 656
14895687 657 if (fib6_check_expired(rt))
f11e6659
DM
658 goto out;
659
660 m = rt6_score_route(rt, oif, strict);
7e980569 661 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
662 match_do_rr = true;
663 m = 0; /* lowest valid score */
7e980569 664 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 665 goto out;
afc154e9
HFS
666 }
667
668 if (strict & RT6_LOOKUP_F_REACHABLE)
669 rt6_probe(rt);
f11e6659 670
7e980569 671 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 672 if (m > *mpri) {
afc154e9 673 *do_rr = match_do_rr;
f11e6659
DM
674 *mpri = m;
675 match = rt;
f11e6659 676 }
f11e6659
DM
677out:
678 return match;
679}
680
8d1c802b
DA
681static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
682 struct fib6_info *leaf,
683 struct fib6_info *rr_head,
afc154e9
HFS
684 u32 metric, int oif, int strict,
685 bool *do_rr)
f11e6659 686{
8d1c802b 687 struct fib6_info *rt, *match, *cont;
554cfb7e 688 int mpri = -1;
1da177e4 689
f11e6659 690 match = NULL;
9fbdcfaf 691 cont = NULL;
071fb37e 692 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
93c2fb25 693 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
694 cont = rt;
695 break;
696 }
697
698 match = find_match(rt, oif, strict, &mpri, match, do_rr);
699 }
700
66f5d6ce 701 for (rt = leaf; rt && rt != rr_head;
071fb37e 702 rt = rcu_dereference(rt->rt6_next)) {
93c2fb25 703 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
704 cont = rt;
705 break;
706 }
707
afc154e9 708 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
709 }
710
711 if (match || !cont)
712 return match;
713
071fb37e 714 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
afc154e9 715 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 716
f11e6659
DM
717 return match;
718}
1da177e4 719
8d1c802b 720static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
8d1040e8 721 int oif, int strict)
f11e6659 722{
8d1c802b
DA
723 struct fib6_info *leaf = rcu_dereference(fn->leaf);
724 struct fib6_info *match, *rt0;
afc154e9 725 bool do_rr = false;
17ecf590 726 int key_plen;
1da177e4 727
421842ed
DA
728 if (!leaf || leaf == net->ipv6.fib6_null_entry)
729 return net->ipv6.fib6_null_entry;
8d1040e8 730
66f5d6ce 731 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 732 if (!rt0)
66f5d6ce 733 rt0 = leaf;
1da177e4 734
17ecf590
WW
735 /* Double check to make sure fn is not an intermediate node
736 * and fn->leaf does not points to its child's leaf
737 * (This might happen if all routes under fn are deleted from
738 * the tree and fib6_repair_tree() is called on the node.)
739 */
93c2fb25 740 key_plen = rt0->fib6_dst.plen;
17ecf590 741#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
742 if (rt0->fib6_src.plen)
743 key_plen = rt0->fib6_src.plen;
17ecf590
WW
744#endif
745 if (fn->fn_bit != key_plen)
421842ed 746 return net->ipv6.fib6_null_entry;
17ecf590 747
93c2fb25 748 match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
afc154e9 749 &do_rr);
1da177e4 750
afc154e9 751 if (do_rr) {
8d1c802b 752 struct fib6_info *next = rcu_dereference(rt0->rt6_next);
f11e6659 753
554cfb7e 754 /* no entries matched; do round-robin */
93c2fb25 755 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 756 next = leaf;
f11e6659 757
66f5d6ce 758 if (next != rt0) {
93c2fb25 759 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 760 /* make sure next is not being deleted from the tree */
93c2fb25 761 if (next->fib6_node)
66f5d6ce 762 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 763 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 764 }
1da177e4 765 }
1da177e4 766
421842ed 767 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
768}
769
8d1c802b 770static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
8b9df265 771{
93c2fb25 772 return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
8b9df265
MKL
773}
774
70ceb4f5
YH
775#ifdef CONFIG_IPV6_ROUTE_INFO
776int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 777 const struct in6_addr *gwaddr)
70ceb4f5 778{
c346dca1 779 struct net *net = dev_net(dev);
70ceb4f5
YH
780 struct route_info *rinfo = (struct route_info *) opt;
781 struct in6_addr prefix_buf, *prefix;
782 unsigned int pref;
4bed72e4 783 unsigned long lifetime;
8d1c802b 784 struct fib6_info *rt;
70ceb4f5
YH
785
786 if (len < sizeof(struct route_info)) {
787 return -EINVAL;
788 }
789
790 /* Sanity check for prefix_len and length */
791 if (rinfo->length > 3) {
792 return -EINVAL;
793 } else if (rinfo->prefix_len > 128) {
794 return -EINVAL;
795 } else if (rinfo->prefix_len > 64) {
796 if (rinfo->length < 2) {
797 return -EINVAL;
798 }
799 } else if (rinfo->prefix_len > 0) {
800 if (rinfo->length < 1) {
801 return -EINVAL;
802 }
803 }
804
805 pref = rinfo->route_pref;
806 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 807 return -EINVAL;
70ceb4f5 808
4bed72e4 809 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
810
811 if (rinfo->length == 3)
812 prefix = (struct in6_addr *)rinfo->prefix;
813 else {
814 /* this function is safe */
815 ipv6_addr_prefix(&prefix_buf,
816 (struct in6_addr *)rinfo->prefix,
817 rinfo->prefix_len);
818 prefix = &prefix_buf;
819 }
820
f104a567 821 if (rinfo->prefix_len == 0)
afb1d4b5 822 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
823 else
824 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 825 gwaddr, dev);
70ceb4f5
YH
826
827 if (rt && !lifetime) {
afb1d4b5 828 ip6_del_rt(net, rt);
70ceb4f5
YH
829 rt = NULL;
830 }
831
832 if (!rt && lifetime)
830218c1
DA
833 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
834 dev, pref);
70ceb4f5 835 else if (rt)
93c2fb25
DA
836 rt->fib6_flags = RTF_ROUTEINFO |
837 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
838
839 if (rt) {
1716a961 840 if (!addrconf_finite_timeout(lifetime))
14895687 841 fib6_clean_expires(rt);
1716a961 842 else
14895687 843 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 844
93531c67 845 fib6_info_release(rt);
70ceb4f5
YH
846 }
847 return 0;
848}
849#endif
850
ae90d867
DA
851/*
852 * Misc support functions
853 */
854
855/* called with rcu_lock held */
8d1c802b 856static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
ae90d867 857{
5e670d84 858 struct net_device *dev = rt->fib6_nh.nh_dev;
ae90d867 859
93c2fb25 860 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
861 /* for copies of local routes, dst->dev needs to be the
862 * device if it is a master device, the master device if
863 * device is enslaved, and the loopback as the default
864 */
865 if (netif_is_l3_slave(dev) &&
93c2fb25 866 !rt6_need_strict(&rt->fib6_dst.addr))
ae90d867
DA
867 dev = l3mdev_master_dev_rcu(dev);
868 else if (!netif_is_l3_master(dev))
869 dev = dev_net(dev)->loopback_dev;
870 /* last case is netif_is_l3_master(dev) is true in which
871 * case we want dev returned to be dev
872 */
873 }
874
875 return dev;
876}
877
6edb3c96
DA
878static const int fib6_prop[RTN_MAX + 1] = {
879 [RTN_UNSPEC] = 0,
880 [RTN_UNICAST] = 0,
881 [RTN_LOCAL] = 0,
882 [RTN_BROADCAST] = 0,
883 [RTN_ANYCAST] = 0,
884 [RTN_MULTICAST] = 0,
885 [RTN_BLACKHOLE] = -EINVAL,
886 [RTN_UNREACHABLE] = -EHOSTUNREACH,
887 [RTN_PROHIBIT] = -EACCES,
888 [RTN_THROW] = -EAGAIN,
889 [RTN_NAT] = -EINVAL,
890 [RTN_XRESOLVE] = -EINVAL,
891};
892
893static int ip6_rt_type_to_error(u8 fib6_type)
894{
895 return fib6_prop[fib6_type];
896}
897
8d1c802b 898static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
899{
900 unsigned short flags = 0;
901
902 if (rt->dst_nocount)
903 flags |= DST_NOCOUNT;
904 if (rt->dst_nopolicy)
905 flags |= DST_NOPOLICY;
906 if (rt->dst_host)
907 flags |= DST_HOST;
908
909 return flags;
910}
911
8d1c802b 912static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96
DA
913{
914 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
915
916 switch (ort->fib6_type) {
917 case RTN_BLACKHOLE:
918 rt->dst.output = dst_discard_out;
919 rt->dst.input = dst_discard;
920 break;
921 case RTN_PROHIBIT:
922 rt->dst.output = ip6_pkt_prohibit_out;
923 rt->dst.input = ip6_pkt_prohibit;
924 break;
925 case RTN_THROW:
926 case RTN_UNREACHABLE:
927 default:
928 rt->dst.output = ip6_pkt_discard_out;
929 rt->dst.input = ip6_pkt_discard;
930 break;
931 }
932}
933
8d1c802b 934static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96 935{
3b6761d1
DA
936 rt->dst.flags |= fib6_info_dst_flags(ort);
937
93c2fb25 938 if (ort->fib6_flags & RTF_REJECT) {
6edb3c96
DA
939 ip6_rt_init_dst_reject(rt, ort);
940 return;
941 }
942
943 rt->dst.error = 0;
944 rt->dst.output = ip6_output;
945
946 if (ort->fib6_type == RTN_LOCAL) {
6edb3c96 947 rt->dst.input = ip6_input;
93c2fb25 948 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
949 rt->dst.input = ip6_mc_input;
950 } else {
951 rt->dst.input = ip6_forward;
952 }
953
954 if (ort->fib6_nh.nh_lwtstate) {
955 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
956 lwtunnel_set_redirect(&rt->dst);
957 }
958
959 rt->dst.lastuse = jiffies;
960}
961
8d1c802b 962static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 963{
ae90d867 964 rt->rt6i_flags &= ~RTF_EXPIRES;
93531c67
DA
965 fib6_info_hold(from);
966 rt->from = from;
d4ead6b3
DA
967 dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
968 if (from->fib6_metrics != &dst_default_metrics) {
969 rt->dst._metrics |= DST_METRICS_REFCOUNTED;
970 refcount_inc(&from->fib6_metrics->refcnt);
971 }
ae90d867
DA
972}
973
8d1c802b 974static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
ae90d867 975{
dcd1f572
DA
976 struct net_device *dev = fib6_info_nh_dev(ort);
977
6edb3c96
DA
978 ip6_rt_init_dst(rt, ort);
979
93c2fb25 980 rt->rt6i_dst = ort->fib6_dst;
dcd1f572 981 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
5e670d84 982 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
93c2fb25 983 rt->rt6i_flags = ort->fib6_flags;
ae90d867 984 rt6_set_from(rt, ort);
ae90d867 985#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 986 rt->rt6i_src = ort->fib6_src;
ae90d867 987#endif
93c2fb25 988 rt->rt6i_prefsrc = ort->fib6_prefsrc;
5e670d84 989 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
ae90d867
DA
990}
991
a3c00e46
MKL
992static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
993 struct in6_addr *saddr)
994{
66f5d6ce 995 struct fib6_node *pn, *sn;
a3c00e46
MKL
996 while (1) {
997 if (fn->fn_flags & RTN_TL_ROOT)
998 return NULL;
66f5d6ce
WW
999 pn = rcu_dereference(fn->parent);
1000 sn = FIB6_SUBTREE(pn);
1001 if (sn && sn != fn)
1002 fn = fib6_lookup(sn, NULL, saddr);
a3c00e46
MKL
1003 else
1004 fn = pn;
1005 if (fn->fn_flags & RTN_RTINFO)
1006 return fn;
1007 }
1008}
c71099ac 1009
d3843fe5
WW
1010static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1011 bool null_fallback)
1012{
1013 struct rt6_info *rt = *prt;
1014
1015 if (dst_hold_safe(&rt->dst))
1016 return true;
1017 if (null_fallback) {
1018 rt = net->ipv6.ip6_null_entry;
1019 dst_hold(&rt->dst);
1020 } else {
1021 rt = NULL;
1022 }
1023 *prt = rt;
1024 return false;
1025}
1026
dec9b0e2 1027/* called with rcu_lock held */
8d1c802b 1028static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
dec9b0e2 1029{
3b6761d1 1030 unsigned short flags = fib6_info_dst_flags(rt);
dec9b0e2
DA
1031 struct net_device *dev = rt->fib6_nh.nh_dev;
1032 struct rt6_info *nrt;
1033
93531c67 1034 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
dec9b0e2
DA
1035 if (nrt)
1036 ip6_rt_copy_init(nrt, rt);
1037
1038 return nrt;
1039}
1040
8ed67789
DL
1041static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1042 struct fib6_table *table,
b75cc8f9
DA
1043 struct flowi6 *fl6,
1044 const struct sk_buff *skb,
1045 int flags)
1da177e4 1046{
8d1c802b 1047 struct fib6_info *f6i;
1da177e4 1048 struct fib6_node *fn;
23fb93a4 1049 struct rt6_info *rt;
1da177e4 1050
b6cdbc85
DA
1051 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1052 flags &= ~RT6_LOOKUP_F_IFACE;
1053
66f5d6ce 1054 rcu_read_lock();
4c9483b2 1055 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1056restart:
23fb93a4
DA
1057 f6i = rcu_dereference(fn->leaf);
1058 if (!f6i) {
1059 f6i = net->ipv6.fib6_null_entry;
66f5d6ce 1060 } else {
23fb93a4 1061 f6i = rt6_device_match(net, f6i, &fl6->saddr,
66f5d6ce 1062 fl6->flowi6_oif, flags);
93c2fb25 1063 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
23fb93a4
DA
1064 f6i = rt6_multipath_select(net, f6i, fl6,
1065 fl6->flowi6_oif, skb, flags);
66f5d6ce 1066 }
23fb93a4 1067 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1068 fn = fib6_backtrack(fn, &fl6->saddr);
1069 if (fn)
1070 goto restart;
1071 }
23fb93a4 1072
2b760fcf 1073 /* Search through exception table */
23fb93a4
DA
1074 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1075 if (rt) {
dec9b0e2
DA
1076 if (ip6_hold_safe(net, &rt, true))
1077 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1078 } else if (f6i == net->ipv6.fib6_null_entry) {
dec9b0e2
DA
1079 rt = net->ipv6.ip6_null_entry;
1080 dst_hold(&rt->dst);
23fb93a4
DA
1081 } else {
1082 rt = ip6_create_rt_rcu(f6i);
1083 if (!rt) {
1084 rt = net->ipv6.ip6_null_entry;
1085 dst_hold(&rt->dst);
1086 }
dec9b0e2 1087 }
d3843fe5 1088
66f5d6ce 1089 rcu_read_unlock();
b811580d 1090
b65f164d 1091 trace_fib6_table_lookup(net, rt, table, fl6);
b811580d 1092
c71099ac 1093 return rt;
c71099ac
TG
1094}
1095
67ba4152 1096struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1097 const struct sk_buff *skb, int flags)
ea6e574e 1098{
b75cc8f9 1099 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1100}
1101EXPORT_SYMBOL_GPL(ip6_route_lookup);
1102
9acd9f3a 1103struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1104 const struct in6_addr *saddr, int oif,
1105 const struct sk_buff *skb, int strict)
c71099ac 1106{
4c9483b2
DM
1107 struct flowi6 fl6 = {
1108 .flowi6_oif = oif,
1109 .daddr = *daddr,
c71099ac
TG
1110 };
1111 struct dst_entry *dst;
77d16f45 1112 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1113
adaa70bb 1114 if (saddr) {
4c9483b2 1115 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1116 flags |= RT6_LOOKUP_F_HAS_SADDR;
1117 }
1118
b75cc8f9 1119 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1120 if (dst->error == 0)
1121 return (struct rt6_info *) dst;
1122
1123 dst_release(dst);
1124
1da177e4
LT
1125 return NULL;
1126}
7159039a
YH
1127EXPORT_SYMBOL(rt6_lookup);
1128
c71099ac 1129/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1130 * It takes new route entry, the addition fails by any reason the
1131 * route is released.
1132 * Caller must hold dst before calling it.
1da177e4
LT
1133 */
1134
8d1c802b 1135static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1136 struct netlink_ext_ack *extack)
1da177e4
LT
1137{
1138 int err;
c71099ac 1139 struct fib6_table *table;
1da177e4 1140
93c2fb25 1141 table = rt->fib6_table;
66f5d6ce 1142 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1143 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1144 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1145
1146 return err;
1147}
1148
8d1c802b 1149int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1150{
afb1d4b5 1151 struct nl_info info = { .nl_net = net, };
e715b6d3 1152
d4ead6b3 1153 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1154}
1155
8d1c802b 1156static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
8b9df265
MKL
1157 const struct in6_addr *daddr,
1158 const struct in6_addr *saddr)
1da177e4 1159{
4832c30d 1160 struct net_device *dev;
1da177e4
LT
1161 struct rt6_info *rt;
1162
1163 /*
1164 * Clone the route.
1165 */
1166
4832c30d 1167 dev = ip6_rt_get_dev_rcu(ort);
93531c67 1168 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
83a09abd
MKL
1169 if (!rt)
1170 return NULL;
1171
1172 ip6_rt_copy_init(rt, ort);
1173 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1174 rt->dst.flags |= DST_HOST;
1175 rt->rt6i_dst.addr = *daddr;
1176 rt->rt6i_dst.plen = 128;
1da177e4 1177
83a09abd 1178 if (!rt6_is_gw_or_nonexthop(ort)) {
93c2fb25
DA
1179 if (ort->fib6_dst.plen != 128 &&
1180 ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
83a09abd 1181 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1182#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1183 if (rt->rt6i_src.plen && saddr) {
1184 rt->rt6i_src.addr = *saddr;
1185 rt->rt6i_src.plen = 128;
8b9df265 1186 }
83a09abd 1187#endif
95a9a5ba 1188 }
1da177e4 1189
95a9a5ba
YH
1190 return rt;
1191}
1da177e4 1192
8d1c802b 1193static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
d52d3997 1194{
3b6761d1 1195 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1196 struct net_device *dev;
d52d3997
MKL
1197 struct rt6_info *pcpu_rt;
1198
4832c30d
DA
1199 rcu_read_lock();
1200 dev = ip6_rt_get_dev_rcu(rt);
93531c67 1201 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1202 rcu_read_unlock();
d52d3997
MKL
1203 if (!pcpu_rt)
1204 return NULL;
1205 ip6_rt_copy_init(pcpu_rt, rt);
d52d3997
MKL
1206 pcpu_rt->rt6i_flags |= RTF_PCPU;
1207 return pcpu_rt;
1208}
1209
66f5d6ce 1210/* It should be called with rcu_read_lock() acquired */
8d1c802b 1211static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
d52d3997 1212{
a73e4195 1213 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1214
1215 p = this_cpu_ptr(rt->rt6i_pcpu);
1216 pcpu_rt = *p;
1217
d4ead6b3
DA
1218 if (pcpu_rt)
1219 ip6_hold_safe(NULL, &pcpu_rt, false);
d3843fe5 1220
a73e4195
MKL
1221 return pcpu_rt;
1222}
1223
afb1d4b5 1224static struct rt6_info *rt6_make_pcpu_route(struct net *net,
8d1c802b 1225 struct fib6_info *rt)
a73e4195
MKL
1226{
1227 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1228
1229 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1230 if (!pcpu_rt) {
9c7370a1
MKL
1231 dst_hold(&net->ipv6.ip6_null_entry->dst);
1232 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1233 }
1234
a94b9367
WW
1235 dst_hold(&pcpu_rt->dst);
1236 p = this_cpu_ptr(rt->rt6i_pcpu);
1237 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1238 BUG_ON(prev);
a94b9367 1239
d52d3997
MKL
1240 return pcpu_rt;
1241}
1242
35732d01
WW
1243/* exception hash table implementation
1244 */
1245static DEFINE_SPINLOCK(rt6_exception_lock);
1246
1247/* Remove rt6_ex from hash table and free the memory
1248 * Caller must hold rt6_exception_lock
1249 */
1250static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1251 struct rt6_exception *rt6_ex)
1252{
b2427e67 1253 struct net *net;
81eb8447 1254
35732d01
WW
1255 if (!bucket || !rt6_ex)
1256 return;
b2427e67
CIK
1257
1258 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01 1259 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1260 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1261 kfree_rcu(rt6_ex, rcu);
1262 WARN_ON_ONCE(!bucket->depth);
1263 bucket->depth--;
81eb8447 1264 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1265}
1266
1267/* Remove oldest rt6_ex in bucket and free the memory
1268 * Caller must hold rt6_exception_lock
1269 */
1270static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1271{
1272 struct rt6_exception *rt6_ex, *oldest = NULL;
1273
1274 if (!bucket)
1275 return;
1276
1277 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1278 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1279 oldest = rt6_ex;
1280 }
1281 rt6_remove_exception(bucket, oldest);
1282}
1283
1284static u32 rt6_exception_hash(const struct in6_addr *dst,
1285 const struct in6_addr *src)
1286{
1287 static u32 seed __read_mostly;
1288 u32 val;
1289
1290 net_get_random_once(&seed, sizeof(seed));
1291 val = jhash(dst, sizeof(*dst), seed);
1292
1293#ifdef CONFIG_IPV6_SUBTREES
1294 if (src)
1295 val = jhash(src, sizeof(*src), val);
1296#endif
1297 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1298}
1299
1300/* Helper function to find the cached rt in the hash table
1301 * and update bucket pointer to point to the bucket for this
1302 * (daddr, saddr) pair
1303 * Caller must hold rt6_exception_lock
1304 */
1305static struct rt6_exception *
1306__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1307 const struct in6_addr *daddr,
1308 const struct in6_addr *saddr)
1309{
1310 struct rt6_exception *rt6_ex;
1311 u32 hval;
1312
1313 if (!(*bucket) || !daddr)
1314 return NULL;
1315
1316 hval = rt6_exception_hash(daddr, saddr);
1317 *bucket += hval;
1318
1319 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1320 struct rt6_info *rt6 = rt6_ex->rt6i;
1321 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1322
1323#ifdef CONFIG_IPV6_SUBTREES
1324 if (matched && saddr)
1325 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1326#endif
1327 if (matched)
1328 return rt6_ex;
1329 }
1330 return NULL;
1331}
1332
1333/* Helper function to find the cached rt in the hash table
1334 * and update bucket pointer to point to the bucket for this
1335 * (daddr, saddr) pair
1336 * Caller must hold rcu_read_lock()
1337 */
1338static struct rt6_exception *
1339__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1340 const struct in6_addr *daddr,
1341 const struct in6_addr *saddr)
1342{
1343 struct rt6_exception *rt6_ex;
1344 u32 hval;
1345
1346 WARN_ON_ONCE(!rcu_read_lock_held());
1347
1348 if (!(*bucket) || !daddr)
1349 return NULL;
1350
1351 hval = rt6_exception_hash(daddr, saddr);
1352 *bucket += hval;
1353
1354 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1355 struct rt6_info *rt6 = rt6_ex->rt6i;
1356 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1357
1358#ifdef CONFIG_IPV6_SUBTREES
1359 if (matched && saddr)
1360 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1361#endif
1362 if (matched)
1363 return rt6_ex;
1364 }
1365 return NULL;
1366}
1367
8d1c802b 1368static unsigned int fib6_mtu(const struct fib6_info *rt)
d4ead6b3
DA
1369{
1370 unsigned int mtu;
1371
dcd1f572
DA
1372 if (rt->fib6_pmtu) {
1373 mtu = rt->fib6_pmtu;
1374 } else {
1375 struct net_device *dev = fib6_info_nh_dev(rt);
1376 struct inet6_dev *idev;
1377
1378 rcu_read_lock();
1379 idev = __in6_dev_get(dev);
1380 mtu = idev->cnf.mtu6;
1381 rcu_read_unlock();
1382 }
1383
d4ead6b3
DA
1384 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1385
1386 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1387}
1388
35732d01 1389static int rt6_insert_exception(struct rt6_info *nrt,
8d1c802b 1390 struct fib6_info *ort)
35732d01 1391{
5e670d84 1392 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1393 struct rt6_exception_bucket *bucket;
1394 struct in6_addr *src_key = NULL;
1395 struct rt6_exception *rt6_ex;
1396 int err = 0;
1397
35732d01
WW
1398 spin_lock_bh(&rt6_exception_lock);
1399
1400 if (ort->exception_bucket_flushed) {
1401 err = -EINVAL;
1402 goto out;
1403 }
1404
1405 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1406 lockdep_is_held(&rt6_exception_lock));
1407 if (!bucket) {
1408 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1409 GFP_ATOMIC);
1410 if (!bucket) {
1411 err = -ENOMEM;
1412 goto out;
1413 }
1414 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1415 }
1416
1417#ifdef CONFIG_IPV6_SUBTREES
1418 /* rt6i_src.plen != 0 indicates ort is in subtree
1419 * and exception table is indexed by a hash of
1420 * both rt6i_dst and rt6i_src.
1421 * Otherwise, the exception table is indexed by
1422 * a hash of only rt6i_dst.
1423 */
93c2fb25 1424 if (ort->fib6_src.plen)
35732d01
WW
1425 src_key = &nrt->rt6i_src.addr;
1426#endif
60006a48
WW
1427
1428 /* Update rt6i_prefsrc as it could be changed
1429 * in rt6_remove_prefsrc()
1430 */
93c2fb25 1431 nrt->rt6i_prefsrc = ort->fib6_prefsrc;
f5bbe7ee
WW
1432 /* rt6_mtu_change() might lower mtu on ort.
1433 * Only insert this exception route if its mtu
1434 * is less than ort's mtu value.
1435 */
d4ead6b3 1436 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1437 err = -EINVAL;
1438 goto out;
1439 }
60006a48 1440
35732d01
WW
1441 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1442 src_key);
1443 if (rt6_ex)
1444 rt6_remove_exception(bucket, rt6_ex);
1445
1446 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1447 if (!rt6_ex) {
1448 err = -ENOMEM;
1449 goto out;
1450 }
1451 rt6_ex->rt6i = nrt;
1452 rt6_ex->stamp = jiffies;
35732d01
WW
1453 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1454 bucket->depth++;
81eb8447 1455 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1456
1457 if (bucket->depth > FIB6_MAX_DEPTH)
1458 rt6_exception_remove_oldest(bucket);
1459
1460out:
1461 spin_unlock_bh(&rt6_exception_lock);
1462
1463 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1464 if (!err) {
93c2fb25 1465 spin_lock_bh(&ort->fib6_table->tb6_lock);
7aef6859 1466 fib6_update_sernum(net, ort);
93c2fb25 1467 spin_unlock_bh(&ort->fib6_table->tb6_lock);
b886d5f2
PA
1468 fib6_force_start_gc(net);
1469 }
35732d01
WW
1470
1471 return err;
1472}
1473
8d1c802b 1474void rt6_flush_exceptions(struct fib6_info *rt)
35732d01
WW
1475{
1476 struct rt6_exception_bucket *bucket;
1477 struct rt6_exception *rt6_ex;
1478 struct hlist_node *tmp;
1479 int i;
1480
1481 spin_lock_bh(&rt6_exception_lock);
1482 /* Prevent rt6_insert_exception() to recreate the bucket list */
1483 rt->exception_bucket_flushed = 1;
1484
1485 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1486 lockdep_is_held(&rt6_exception_lock));
1487 if (!bucket)
1488 goto out;
1489
1490 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1491 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1492 rt6_remove_exception(bucket, rt6_ex);
1493 WARN_ON_ONCE(bucket->depth);
1494 bucket++;
1495 }
1496
1497out:
1498 spin_unlock_bh(&rt6_exception_lock);
1499}
1500
1501/* Find cached rt in the hash table inside passed in rt
1502 * Caller has to hold rcu_read_lock()
1503 */
8d1c802b 1504static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
1505 struct in6_addr *daddr,
1506 struct in6_addr *saddr)
1507{
1508 struct rt6_exception_bucket *bucket;
1509 struct in6_addr *src_key = NULL;
1510 struct rt6_exception *rt6_ex;
1511 struct rt6_info *res = NULL;
1512
1513 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1514
1515#ifdef CONFIG_IPV6_SUBTREES
1516 /* rt6i_src.plen != 0 indicates rt is in subtree
1517 * and exception table is indexed by a hash of
1518 * both rt6i_dst and rt6i_src.
1519 * Otherwise, the exception table is indexed by
1520 * a hash of only rt6i_dst.
1521 */
93c2fb25 1522 if (rt->fib6_src.plen)
35732d01
WW
1523 src_key = saddr;
1524#endif
1525 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1526
1527 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1528 res = rt6_ex->rt6i;
1529
1530 return res;
1531}
1532
1533/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1534static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1535{
35732d01 1536 struct rt6_exception_bucket *bucket;
8d1c802b 1537 struct fib6_info *from = rt->from;
35732d01
WW
1538 struct in6_addr *src_key = NULL;
1539 struct rt6_exception *rt6_ex;
1540 int err;
1541
1542 if (!from ||
442d713b 1543 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1544 return -EINVAL;
1545
1546 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1547 return -ENOENT;
1548
1549 spin_lock_bh(&rt6_exception_lock);
1550 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1551 lockdep_is_held(&rt6_exception_lock));
1552#ifdef CONFIG_IPV6_SUBTREES
1553 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1554 * and exception table is indexed by a hash of
1555 * both rt6i_dst and rt6i_src.
1556 * Otherwise, the exception table is indexed by
1557 * a hash of only rt6i_dst.
1558 */
93c2fb25 1559 if (from->fib6_src.plen)
35732d01
WW
1560 src_key = &rt->rt6i_src.addr;
1561#endif
1562 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1563 &rt->rt6i_dst.addr,
1564 src_key);
1565 if (rt6_ex) {
1566 rt6_remove_exception(bucket, rt6_ex);
1567 err = 0;
1568 } else {
1569 err = -ENOENT;
1570 }
1571
1572 spin_unlock_bh(&rt6_exception_lock);
1573 return err;
1574}
1575
1576/* Find rt6_ex which contains the passed in rt cache and
1577 * refresh its stamp
1578 */
1579static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1580{
35732d01 1581 struct rt6_exception_bucket *bucket;
8d1c802b 1582 struct fib6_info *from = rt->from;
35732d01
WW
1583 struct in6_addr *src_key = NULL;
1584 struct rt6_exception *rt6_ex;
1585
1586 if (!from ||
442d713b 1587 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1588 return;
1589
1590 rcu_read_lock();
1591 bucket = rcu_dereference(from->rt6i_exception_bucket);
1592
1593#ifdef CONFIG_IPV6_SUBTREES
1594 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1595 * and exception table is indexed by a hash of
1596 * both rt6i_dst and rt6i_src.
1597 * Otherwise, the exception table is indexed by
1598 * a hash of only rt6i_dst.
1599 */
93c2fb25 1600 if (from->fib6_src.plen)
35732d01
WW
1601 src_key = &rt->rt6i_src.addr;
1602#endif
1603 rt6_ex = __rt6_find_exception_rcu(&bucket,
1604 &rt->rt6i_dst.addr,
1605 src_key);
1606 if (rt6_ex)
1607 rt6_ex->stamp = jiffies;
1608
1609 rcu_read_unlock();
1610}
1611
8d1c802b 1612static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
60006a48
WW
1613{
1614 struct rt6_exception_bucket *bucket;
1615 struct rt6_exception *rt6_ex;
1616 int i;
1617
1618 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1619 lockdep_is_held(&rt6_exception_lock));
1620
1621 if (bucket) {
1622 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1623 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1624 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1625 }
1626 bucket++;
1627 }
1628 }
1629}
1630
e9fa1495
SB
1631static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1632 struct rt6_info *rt, int mtu)
1633{
1634 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1635 * lowest MTU in the path: always allow updating the route PMTU to
1636 * reflect PMTU decreases.
1637 *
1638 * If the new MTU is higher, and the route PMTU is equal to the local
1639 * MTU, this means the old MTU is the lowest in the path, so allow
1640 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1641 * handle this.
1642 */
1643
1644 if (dst_mtu(&rt->dst) >= mtu)
1645 return true;
1646
1647 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1648 return true;
1649
1650 return false;
1651}
1652
1653static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
8d1c802b 1654 struct fib6_info *rt, int mtu)
f5bbe7ee
WW
1655{
1656 struct rt6_exception_bucket *bucket;
1657 struct rt6_exception *rt6_ex;
1658 int i;
1659
1660 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1661 lockdep_is_held(&rt6_exception_lock));
1662
e9fa1495
SB
1663 if (!bucket)
1664 return;
1665
1666 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1667 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1668 struct rt6_info *entry = rt6_ex->rt6i;
1669
1670 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1671 * route), the metrics of its rt->from have already
e9fa1495
SB
1672 * been updated.
1673 */
d4ead6b3 1674 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1675 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1676 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1677 }
e9fa1495 1678 bucket++;
f5bbe7ee
WW
1679 }
1680}
1681
b16cb459
WW
1682#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1683
8d1c802b 1684static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
b16cb459
WW
1685 struct in6_addr *gateway)
1686{
1687 struct rt6_exception_bucket *bucket;
1688 struct rt6_exception *rt6_ex;
1689 struct hlist_node *tmp;
1690 int i;
1691
1692 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1693 return;
1694
1695 spin_lock_bh(&rt6_exception_lock);
1696 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1697 lockdep_is_held(&rt6_exception_lock));
1698
1699 if (bucket) {
1700 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1701 hlist_for_each_entry_safe(rt6_ex, tmp,
1702 &bucket->chain, hlist) {
1703 struct rt6_info *entry = rt6_ex->rt6i;
1704
1705 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1706 RTF_CACHE_GATEWAY &&
1707 ipv6_addr_equal(gateway,
1708 &entry->rt6i_gateway)) {
1709 rt6_remove_exception(bucket, rt6_ex);
1710 }
1711 }
1712 bucket++;
1713 }
1714 }
1715
1716 spin_unlock_bh(&rt6_exception_lock);
1717}
1718
c757faa8
WW
1719static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1720 struct rt6_exception *rt6_ex,
1721 struct fib6_gc_args *gc_args,
1722 unsigned long now)
1723{
1724 struct rt6_info *rt = rt6_ex->rt6i;
1725
1859bac0
PA
1726 /* we are pruning and obsoleting aged-out and non gateway exceptions
1727 * even if others have still references to them, so that on next
1728 * dst_check() such references can be dropped.
1729 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1730 * expired, independently from their aging, as per RFC 8201 section 4
1731 */
31afeb42
WW
1732 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1733 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1734 RT6_TRACE("aging clone %p\n", rt);
1735 rt6_remove_exception(bucket, rt6_ex);
1736 return;
1737 }
1738 } else if (time_after(jiffies, rt->dst.expires)) {
1739 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1740 rt6_remove_exception(bucket, rt6_ex);
1741 return;
31afeb42
WW
1742 }
1743
1744 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1745 struct neighbour *neigh;
1746 __u8 neigh_flags = 0;
1747
1bfa26ff
ED
1748 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1749 if (neigh)
c757faa8 1750 neigh_flags = neigh->flags;
1bfa26ff 1751
c757faa8
WW
1752 if (!(neigh_flags & NTF_ROUTER)) {
1753 RT6_TRACE("purging route %p via non-router but gateway\n",
1754 rt);
1755 rt6_remove_exception(bucket, rt6_ex);
1756 return;
1757 }
1758 }
31afeb42 1759
c757faa8
WW
1760 gc_args->more++;
1761}
1762
8d1c802b 1763void rt6_age_exceptions(struct fib6_info *rt,
c757faa8
WW
1764 struct fib6_gc_args *gc_args,
1765 unsigned long now)
1766{
1767 struct rt6_exception_bucket *bucket;
1768 struct rt6_exception *rt6_ex;
1769 struct hlist_node *tmp;
1770 int i;
1771
1772 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1773 return;
1774
1bfa26ff
ED
1775 rcu_read_lock_bh();
1776 spin_lock(&rt6_exception_lock);
c757faa8
WW
1777 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1778 lockdep_is_held(&rt6_exception_lock));
1779
1780 if (bucket) {
1781 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1782 hlist_for_each_entry_safe(rt6_ex, tmp,
1783 &bucket->chain, hlist) {
1784 rt6_age_examine_exception(bucket, rt6_ex,
1785 gc_args, now);
1786 }
1787 bucket++;
1788 }
1789 }
1bfa26ff
ED
1790 spin_unlock(&rt6_exception_lock);
1791 rcu_read_unlock_bh();
c757faa8
WW
1792}
1793
9ff74384 1794struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
b75cc8f9
DA
1795 int oif, struct flowi6 *fl6,
1796 const struct sk_buff *skb, int flags)
1da177e4 1797{
367efcb9 1798 struct fib6_node *fn, *saved_fn;
8d1c802b 1799 struct fib6_info *f6i;
23fb93a4 1800 struct rt6_info *rt;
c71099ac 1801 int strict = 0;
1da177e4 1802
77d16f45 1803 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1804 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1805 if (net->ipv6.devconf_all->forwarding == 0)
1806 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1807
66f5d6ce 1808 rcu_read_lock();
1da177e4 1809
4c9483b2 1810 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1811 saved_fn = fn;
1da177e4 1812
ca254490
DA
1813 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1814 oif = 0;
1815
a3c00e46 1816redo_rt6_select:
23fb93a4 1817 f6i = rt6_select(net, fn, oif, strict);
93c2fb25 1818 if (f6i->fib6_nsiblings)
23fb93a4
DA
1819 f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
1820 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1821 fn = fib6_backtrack(fn, &fl6->saddr);
1822 if (fn)
1823 goto redo_rt6_select;
367efcb9
MKL
1824 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1825 /* also consider unreachable route */
1826 strict &= ~RT6_LOOKUP_F_REACHABLE;
1827 fn = saved_fn;
1828 goto redo_rt6_select;
367efcb9 1829 }
a3c00e46
MKL
1830 }
1831
23fb93a4 1832 if (f6i == net->ipv6.fib6_null_entry) {
421842ed 1833 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1834 rcu_read_unlock();
d3843fe5 1835 dst_hold(&rt->dst);
b65f164d 1836 trace_fib6_table_lookup(net, rt, table, fl6);
d3843fe5 1837 return rt;
23fb93a4
DA
1838 }
1839
1840 /*Search through exception table */
1841 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1842 if (rt) {
d4ead6b3 1843 if (ip6_hold_safe(net, &rt, true))
d3843fe5 1844 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1845
66f5d6ce 1846 rcu_read_unlock();
b65f164d 1847 trace_fib6_table_lookup(net, rt, table, fl6);
d52d3997 1848 return rt;
3da59bd9 1849 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
93c2fb25 1850 !(f6i->fib6_flags & RTF_GATEWAY))) {
3da59bd9
MKL
1851 /* Create a RTF_CACHE clone which will not be
1852 * owned by the fib6 tree. It is for the special case where
1853 * the daddr in the skb during the neighbor look-up is different
1854 * from the fl6->daddr used to look-up route here.
1855 */
3da59bd9
MKL
1856 struct rt6_info *uncached_rt;
1857
23fb93a4 1858 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
4d85cd0c
DA
1859
1860 rcu_read_unlock();
c71099ac 1861
1cfb71ee
WW
1862 if (uncached_rt) {
1863 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1864 * No need for another dst_hold()
1865 */
8d0b94af 1866 rt6_uncached_list_add(uncached_rt);
81eb8447 1867 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1868 } else {
3da59bd9 1869 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1870 dst_hold(&uncached_rt->dst);
1871 }
b811580d 1872
b65f164d 1873 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
3da59bd9 1874 return uncached_rt;
3da59bd9 1875
d52d3997
MKL
1876 } else {
1877 /* Get a percpu copy */
1878
1879 struct rt6_info *pcpu_rt;
1880
951f788a 1881 local_bh_disable();
23fb93a4 1882 pcpu_rt = rt6_get_pcpu_route(f6i);
d52d3997 1883
93531c67
DA
1884 if (!pcpu_rt)
1885 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1886
951f788a
ED
1887 local_bh_enable();
1888 rcu_read_unlock();
b65f164d 1889 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
d52d3997
MKL
1890 return pcpu_rt;
1891 }
1da177e4 1892}
9ff74384 1893EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1894
b75cc8f9
DA
1895static struct rt6_info *ip6_pol_route_input(struct net *net,
1896 struct fib6_table *table,
1897 struct flowi6 *fl6,
1898 const struct sk_buff *skb,
1899 int flags)
4acad72d 1900{
b75cc8f9 1901 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1902}
1903
d409b847
MB
1904struct dst_entry *ip6_route_input_lookup(struct net *net,
1905 struct net_device *dev,
b75cc8f9
DA
1906 struct flowi6 *fl6,
1907 const struct sk_buff *skb,
1908 int flags)
72331bc0
SL
1909{
1910 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1911 flags |= RT6_LOOKUP_F_IFACE;
1912
b75cc8f9 1913 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1914}
d409b847 1915EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1916
23aebdac 1917static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1918 struct flow_keys *keys,
1919 struct flow_keys *flkeys)
23aebdac
JS
1920{
1921 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1922 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1923 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1924 const struct ipv6hdr *inner_iph;
1925 const struct icmp6hdr *icmph;
1926 struct ipv6hdr _inner_iph;
1927
1928 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1929 goto out;
1930
1931 icmph = icmp6_hdr(skb);
1932 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1933 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1934 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1935 icmph->icmp6_type != ICMPV6_PARAMPROB)
1936 goto out;
1937
1938 inner_iph = skb_header_pointer(skb,
1939 skb_transport_offset(skb) + sizeof(*icmph),
1940 sizeof(_inner_iph), &_inner_iph);
1941 if (!inner_iph)
1942 goto out;
1943
1944 key_iph = inner_iph;
5e5d6fed 1945 _flkeys = NULL;
23aebdac 1946out:
5e5d6fed
RP
1947 if (_flkeys) {
1948 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1949 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1950 keys->tags.flow_label = _flkeys->tags.flow_label;
1951 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1952 } else {
1953 keys->addrs.v6addrs.src = key_iph->saddr;
1954 keys->addrs.v6addrs.dst = key_iph->daddr;
1955 keys->tags.flow_label = ip6_flowinfo(key_iph);
1956 keys->basic.ip_proto = key_iph->nexthdr;
1957 }
23aebdac
JS
1958}
1959
1960/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
1961u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1962 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
1963{
1964 struct flow_keys hash_keys;
9a2a537a 1965 u32 mhash;
23aebdac 1966
bbfa047a 1967 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
1968 case 0:
1969 memset(&hash_keys, 0, sizeof(hash_keys));
1970 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1971 if (skb) {
1972 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1973 } else {
1974 hash_keys.addrs.v6addrs.src = fl6->saddr;
1975 hash_keys.addrs.v6addrs.dst = fl6->daddr;
1976 hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
1977 hash_keys.basic.ip_proto = fl6->flowi6_proto;
1978 }
1979 break;
1980 case 1:
1981 if (skb) {
1982 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1983 struct flow_keys keys;
1984
1985 /* short-circuit if we already have L4 hash present */
1986 if (skb->l4_hash)
1987 return skb_get_hash_raw(skb) >> 1;
1988
1989 memset(&hash_keys, 0, sizeof(hash_keys));
1990
1991 if (!flkeys) {
1992 skb_flow_dissect_flow_keys(skb, &keys, flag);
1993 flkeys = &keys;
1994 }
1995 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1996 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
1997 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
1998 hash_keys.ports.src = flkeys->ports.src;
1999 hash_keys.ports.dst = flkeys->ports.dst;
2000 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2001 } else {
2002 memset(&hash_keys, 0, sizeof(hash_keys));
2003 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2004 hash_keys.addrs.v6addrs.src = fl6->saddr;
2005 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2006 hash_keys.ports.src = fl6->fl6_sport;
2007 hash_keys.ports.dst = fl6->fl6_dport;
2008 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2009 }
2010 break;
23aebdac 2011 }
9a2a537a 2012 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2013
9a2a537a 2014 return mhash >> 1;
23aebdac
JS
2015}
2016
c71099ac
TG
2017void ip6_route_input(struct sk_buff *skb)
2018{
b71d1d42 2019 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2020 struct net *net = dev_net(skb->dev);
adaa70bb 2021 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2022 struct ip_tunnel_info *tun_info;
4c9483b2 2023 struct flowi6 fl6 = {
e0d56fdd 2024 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2025 .daddr = iph->daddr,
2026 .saddr = iph->saddr,
6502ca52 2027 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2028 .flowi6_mark = skb->mark,
2029 .flowi6_proto = iph->nexthdr,
c71099ac 2030 };
5e5d6fed 2031 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2032
904af04d 2033 tun_info = skb_tunnel_info(skb);
46fa062a 2034 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2035 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2036
2037 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2038 flkeys = &_flkeys;
2039
23aebdac 2040 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2041 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2042 skb_dst_drop(skb);
b75cc8f9
DA
2043 skb_dst_set(skb,
2044 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2045}
2046
b75cc8f9
DA
2047static struct rt6_info *ip6_pol_route_output(struct net *net,
2048 struct fib6_table *table,
2049 struct flowi6 *fl6,
2050 const struct sk_buff *skb,
2051 int flags)
1da177e4 2052{
b75cc8f9 2053 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2054}
2055
6f21c96a
PA
2056struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2057 struct flowi6 *fl6, int flags)
c71099ac 2058{
d46a9d67 2059 bool any_src;
c71099ac 2060
4c1feac5
DA
2061 if (rt6_need_strict(&fl6->daddr)) {
2062 struct dst_entry *dst;
2063
2064 dst = l3mdev_link_scope_lookup(net, fl6);
2065 if (dst)
2066 return dst;
2067 }
ca254490 2068
1fb9489b 2069 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2070
d46a9d67 2071 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2072 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2073 (fl6->flowi6_oif && any_src))
77d16f45 2074 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2075
d46a9d67 2076 if (!any_src)
adaa70bb 2077 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2078 else if (sk)
2079 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2080
b75cc8f9 2081 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2082}
6f21c96a 2083EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2084
2774c131 2085struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2086{
5c1e6aa3 2087 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2088 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2089 struct dst_entry *new = NULL;
2090
1dbe3252 2091 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2092 DST_OBSOLETE_DEAD, 0);
14e50e57 2093 if (rt) {
0a1f5962 2094 rt6_info_init(rt);
81eb8447 2095 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2096
0a1f5962 2097 new = &rt->dst;
14e50e57 2098 new->__use = 1;
352e512c 2099 new->input = dst_discard;
ede2059d 2100 new->output = dst_discard_out;
14e50e57 2101
0a1f5962 2102 dst_copy_metrics(new, &ort->dst);
14e50e57 2103
1dbe3252 2104 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2105 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2106 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2107
2108 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2109#ifdef CONFIG_IPV6_SUBTREES
2110 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2111#endif
14e50e57
DM
2112 }
2113
69ead7af
DM
2114 dst_release(dst_orig);
2115 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2116}
14e50e57 2117
1da177e4
LT
2118/*
2119 * Destination cache support functions
2120 */
2121
8d1c802b 2122static bool fib6_check(struct fib6_info *f6i, u32 cookie)
93531c67
DA
2123{
2124 u32 rt_cookie = 0;
2125
a269f1a7 2126 if ((f6i && !fib6_get_cookie_safe(f6i, &rt_cookie)) ||
93531c67
DA
2127 rt_cookie != cookie)
2128 return false;
2129
2130 if (fib6_check_expired(f6i))
2131 return false;
2132
2133 return true;
2134}
2135
3da59bd9
MKL
2136static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
2137{
36143645 2138 u32 rt_cookie = 0;
c5cff856 2139
a269f1a7 2140 if ((rt->from && !fib6_get_cookie_safe(rt->from, &rt_cookie)) ||
93531c67 2141 rt_cookie != cookie)
3da59bd9
MKL
2142 return NULL;
2143
2144 if (rt6_check_expired(rt))
2145 return NULL;
2146
2147 return &rt->dst;
2148}
2149
2150static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
2151{
5973fb1e
MKL
2152 if (!__rt6_check_expired(rt) &&
2153 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
93531c67 2154 fib6_check(rt->from, cookie))
3da59bd9
MKL
2155 return &rt->dst;
2156 else
2157 return NULL;
2158}
2159
1da177e4
LT
2160static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2161{
a87b7dc9 2162 struct dst_entry *dst_ret;
1da177e4
LT
2163 struct rt6_info *rt;
2164
a87b7dc9
DA
2165 rt = container_of(dst, struct rt6_info, dst);
2166
2167 rcu_read_lock();
1da177e4 2168
6f3118b5
ND
2169 /* All IPV6 dsts are created with ->obsolete set to the value
2170 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2171 * into this function always.
2172 */
e3bc10bd 2173
02bcf4e0 2174 if (rt->rt6i_flags & RTF_PCPU ||
3a2232e9 2175 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
a87b7dc9 2176 dst_ret = rt6_dst_from_check(rt, cookie);
3da59bd9 2177 else
a87b7dc9
DA
2178 dst_ret = rt6_check(rt, cookie);
2179
2180 rcu_read_unlock();
2181
2182 return dst_ret;
1da177e4
LT
2183}
2184
2185static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2186{
2187 struct rt6_info *rt = (struct rt6_info *) dst;
2188
2189 if (rt) {
54c1a859
YH
2190 if (rt->rt6i_flags & RTF_CACHE) {
2191 if (rt6_check_expired(rt)) {
93531c67 2192 rt6_remove_exception_rt(rt);
54c1a859
YH
2193 dst = NULL;
2194 }
2195 } else {
1da177e4 2196 dst_release(dst);
54c1a859
YH
2197 dst = NULL;
2198 }
1da177e4 2199 }
54c1a859 2200 return dst;
1da177e4
LT
2201}
2202
2203static void ip6_link_failure(struct sk_buff *skb)
2204{
2205 struct rt6_info *rt;
2206
3ffe533c 2207 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2208
adf30907 2209 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2210 if (rt) {
1eb4f758 2211 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0 2212 if (dst_hold_safe(&rt->dst))
93531c67
DA
2213 rt6_remove_exception_rt(rt);
2214 } else if (rt->from) {
c5cff856
WW
2215 struct fib6_node *fn;
2216
2217 rcu_read_lock();
93c2fb25 2218 fn = rcu_dereference(rt->from->fib6_node);
c5cff856
WW
2219 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2220 fn->fn_sernum = -1;
2221 rcu_read_unlock();
1eb4f758 2222 }
1da177e4
LT
2223 }
2224}
2225
6a3e030f
DA
2226static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2227{
2228 if (!(rt0->rt6i_flags & RTF_EXPIRES) && rt0->from)
2229 rt0->dst.expires = rt0->from->expires;
2230
2231 dst_set_expires(&rt0->dst, timeout);
2232 rt0->rt6i_flags |= RTF_EXPIRES;
2233}
2234
45e4fd26
MKL
2235static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2236{
2237 struct net *net = dev_net(rt->dst.dev);
2238
d4ead6b3 2239 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2240 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2241 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2242}
2243
0d3f6d29
MKL
2244static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2245{
2246 return !(rt->rt6i_flags & RTF_CACHE) &&
77634cc6 2247 (rt->rt6i_flags & RTF_PCPU || rt->from);
0d3f6d29
MKL
2248}
2249
45e4fd26
MKL
2250static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2251 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2252{
0dec879f 2253 const struct in6_addr *daddr, *saddr;
67ba4152 2254 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2255
45e4fd26
MKL
2256 if (rt6->rt6i_flags & RTF_LOCAL)
2257 return;
81aded24 2258
19bda36c
XL
2259 if (dst_metric_locked(dst, RTAX_MTU))
2260 return;
2261
0dec879f
JA
2262 if (iph) {
2263 daddr = &iph->daddr;
2264 saddr = &iph->saddr;
2265 } else if (sk) {
2266 daddr = &sk->sk_v6_daddr;
2267 saddr = &inet6_sk(sk)->saddr;
2268 } else {
2269 daddr = NULL;
2270 saddr = NULL;
2271 }
2272 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2273 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2274 if (mtu >= dst_mtu(dst))
2275 return;
9d289715 2276
0d3f6d29 2277 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2278 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2279 /* update rt6_ex->stamp for cache */
2280 if (rt6->rt6i_flags & RTF_CACHE)
2281 rt6_update_exception_stamp_rt(rt6);
0dec879f 2282 } else if (daddr) {
45e4fd26
MKL
2283 struct rt6_info *nrt6;
2284
4d85cd0c 2285 rcu_read_lock();
d4ead6b3 2286 nrt6 = ip6_rt_cache_alloc(rt6->from, daddr, saddr);
4d85cd0c 2287 rcu_read_unlock();
45e4fd26
MKL
2288 if (nrt6) {
2289 rt6_do_update_pmtu(nrt6, mtu);
d4ead6b3 2290 if (rt6_insert_exception(nrt6, rt6->from))
2b760fcf 2291 dst_release_immediate(&nrt6->dst);
45e4fd26 2292 }
1da177e4
LT
2293 }
2294}
2295
45e4fd26
MKL
2296static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2297 struct sk_buff *skb, u32 mtu)
2298{
2299 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2300}
2301
42ae66c8 2302void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2303 int oif, u32 mark, kuid_t uid)
81aded24
DM
2304{
2305 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2306 struct dst_entry *dst;
2307 struct flowi6 fl6;
2308
2309 memset(&fl6, 0, sizeof(fl6));
2310 fl6.flowi6_oif = oif;
1b3c61dc 2311 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2312 fl6.daddr = iph->daddr;
2313 fl6.saddr = iph->saddr;
6502ca52 2314 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2315 fl6.flowi6_uid = uid;
81aded24
DM
2316
2317 dst = ip6_route_output(net, NULL, &fl6);
2318 if (!dst->error)
45e4fd26 2319 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2320 dst_release(dst);
2321}
2322EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2323
2324void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2325{
33c162a9
MKL
2326 struct dst_entry *dst;
2327
81aded24 2328 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2329 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2330
2331 dst = __sk_dst_get(sk);
2332 if (!dst || !dst->obsolete ||
2333 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2334 return;
2335
2336 bh_lock_sock(sk);
2337 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2338 ip6_datagram_dst_update(sk, false);
2339 bh_unlock_sock(sk);
81aded24
DM
2340}
2341EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2342
7d6850f7
AK
2343void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2344 const struct flowi6 *fl6)
2345{
2346#ifdef CONFIG_IPV6_SUBTREES
2347 struct ipv6_pinfo *np = inet6_sk(sk);
2348#endif
2349
2350 ip6_dst_store(sk, dst,
2351 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2352 &sk->sk_v6_daddr : NULL,
2353#ifdef CONFIG_IPV6_SUBTREES
2354 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2355 &np->saddr :
2356#endif
2357 NULL);
2358}
2359
b55b76b2
DJ
2360/* Handle redirects */
2361struct ip6rd_flowi {
2362 struct flowi6 fl6;
2363 struct in6_addr gateway;
2364};
2365
2366static struct rt6_info *__ip6_route_redirect(struct net *net,
2367 struct fib6_table *table,
2368 struct flowi6 *fl6,
b75cc8f9 2369 const struct sk_buff *skb,
b55b76b2
DJ
2370 int flags)
2371{
2372 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
23fb93a4 2373 struct rt6_info *ret = NULL, *rt_cache;
8d1c802b 2374 struct fib6_info *rt;
b55b76b2
DJ
2375 struct fib6_node *fn;
2376
2377 /* Get the "current" route for this destination and
67c408cf 2378 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2379 *
2380 * RFC 4861 specifies that redirects should only be
2381 * accepted if they come from the nexthop to the target.
2382 * Due to the way the routes are chosen, this notion
2383 * is a bit fuzzy and one might need to check all possible
2384 * routes.
2385 */
2386
66f5d6ce 2387 rcu_read_lock();
b55b76b2
DJ
2388 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2389restart:
66f5d6ce 2390 for_each_fib6_node_rt_rcu(fn) {
5e670d84 2391 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c 2392 continue;
14895687 2393 if (fib6_check_expired(rt))
b55b76b2 2394 continue;
93c2fb25 2395 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2396 break;
93c2fb25 2397 if (!(rt->fib6_flags & RTF_GATEWAY))
b55b76b2 2398 continue;
5e670d84 2399 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
b55b76b2 2400 continue;
2b760fcf
WW
2401 /* rt_cache's gateway might be different from its 'parent'
2402 * in the case of an ip redirect.
2403 * So we keep searching in the exception table if the gateway
2404 * is different.
2405 */
5e670d84 2406 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2b760fcf
WW
2407 rt_cache = rt6_find_cached_rt(rt,
2408 &fl6->daddr,
2409 &fl6->saddr);
2410 if (rt_cache &&
2411 ipv6_addr_equal(&rdfl->gateway,
2412 &rt_cache->rt6i_gateway)) {
23fb93a4 2413 ret = rt_cache;
2b760fcf
WW
2414 break;
2415 }
b55b76b2 2416 continue;
2b760fcf 2417 }
b55b76b2
DJ
2418 break;
2419 }
2420
2421 if (!rt)
421842ed 2422 rt = net->ipv6.fib6_null_entry;
93c2fb25 2423 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2424 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2425 goto out;
2426 }
2427
421842ed 2428 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2429 fn = fib6_backtrack(fn, &fl6->saddr);
2430 if (fn)
2431 goto restart;
b55b76b2 2432 }
a3c00e46 2433
b0a1ba59 2434out:
23fb93a4
DA
2435 if (ret)
2436 dst_hold(&ret->dst);
2437 else
2438 ret = ip6_create_rt_rcu(rt);
b55b76b2 2439
66f5d6ce 2440 rcu_read_unlock();
b55b76b2 2441
23fb93a4
DA
2442 trace_fib6_table_lookup(net, ret, table, fl6);
2443 return ret;
b55b76b2
DJ
2444};
2445
2446static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2447 const struct flowi6 *fl6,
2448 const struct sk_buff *skb,
2449 const struct in6_addr *gateway)
b55b76b2
DJ
2450{
2451 int flags = RT6_LOOKUP_F_HAS_SADDR;
2452 struct ip6rd_flowi rdfl;
2453
2454 rdfl.fl6 = *fl6;
2455 rdfl.gateway = *gateway;
2456
b75cc8f9 2457 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2458 flags, __ip6_route_redirect);
2459}
2460
e2d118a1
LC
2461void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2462 kuid_t uid)
3a5ad2ee
DM
2463{
2464 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2465 struct dst_entry *dst;
2466 struct flowi6 fl6;
2467
2468 memset(&fl6, 0, sizeof(fl6));
e374c618 2469 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2470 fl6.flowi6_oif = oif;
2471 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2472 fl6.daddr = iph->daddr;
2473 fl6.saddr = iph->saddr;
6502ca52 2474 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2475 fl6.flowi6_uid = uid;
3a5ad2ee 2476
b75cc8f9 2477 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2478 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2479 dst_release(dst);
2480}
2481EXPORT_SYMBOL_GPL(ip6_redirect);
2482
c92a59ec
DJ
2483void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2484 u32 mark)
2485{
2486 const struct ipv6hdr *iph = ipv6_hdr(skb);
2487 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2488 struct dst_entry *dst;
2489 struct flowi6 fl6;
2490
2491 memset(&fl6, 0, sizeof(fl6));
e374c618 2492 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2493 fl6.flowi6_oif = oif;
2494 fl6.flowi6_mark = mark;
c92a59ec
DJ
2495 fl6.daddr = msg->dest;
2496 fl6.saddr = iph->daddr;
e2d118a1 2497 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2498
b75cc8f9 2499 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2500 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2501 dst_release(dst);
2502}
2503
3a5ad2ee
DM
2504void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2505{
e2d118a1
LC
2506 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2507 sk->sk_uid);
3a5ad2ee
DM
2508}
2509EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2510
0dbaee3b 2511static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2512{
0dbaee3b
DM
2513 struct net_device *dev = dst->dev;
2514 unsigned int mtu = dst_mtu(dst);
2515 struct net *net = dev_net(dev);
2516
1da177e4
LT
2517 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2518
5578689a
DL
2519 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2520 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2521
2522 /*
1ab1457c
YH
2523 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2524 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2525 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2526 * rely only on pmtu discovery"
2527 */
2528 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2529 mtu = IPV6_MAXPLEN;
2530 return mtu;
2531}
2532
ebb762f2 2533static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2534{
d33e4553 2535 struct inet6_dev *idev;
d4ead6b3 2536 unsigned int mtu;
4b32b5ad
MKL
2537
2538 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2539 if (mtu)
30f78d8e 2540 goto out;
618f9bc7
SK
2541
2542 mtu = IPV6_MIN_MTU;
d33e4553
DM
2543
2544 rcu_read_lock();
2545 idev = __in6_dev_get(dst->dev);
2546 if (idev)
2547 mtu = idev->cnf.mtu6;
2548 rcu_read_unlock();
2549
30f78d8e 2550out:
14972cbd
RP
2551 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2552
2553 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2554}
2555
3b00944c 2556struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2557 struct flowi6 *fl6)
1da177e4 2558{
87a11578 2559 struct dst_entry *dst;
1da177e4
LT
2560 struct rt6_info *rt;
2561 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2562 struct net *net = dev_net(dev);
1da177e4 2563
38308473 2564 if (unlikely(!idev))
122bdf67 2565 return ERR_PTR(-ENODEV);
1da177e4 2566
ad706862 2567 rt = ip6_dst_alloc(net, dev, 0);
38308473 2568 if (unlikely(!rt)) {
1da177e4 2569 in6_dev_put(idev);
87a11578 2570 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2571 goto out;
2572 }
2573
8e2ec639 2574 rt->dst.flags |= DST_HOST;
588753f1 2575 rt->dst.input = ip6_input;
8e2ec639 2576 rt->dst.output = ip6_output;
550bab42 2577 rt->rt6i_gateway = fl6->daddr;
87a11578 2578 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2579 rt->rt6i_dst.plen = 128;
2580 rt->rt6i_idev = idev;
14edd87d 2581 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2582
4c981e28 2583 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2584 * do proper release of the net_device
2585 */
2586 rt6_uncached_list_add(rt);
81eb8447 2587 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2588
87a11578
DM
2589 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2590
1da177e4 2591out:
87a11578 2592 return dst;
1da177e4
LT
2593}
2594
569d3645 2595static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2596{
86393e52 2597 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2598 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2599 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2600 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2601 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2602 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2603 int entries;
7019b78e 2604
fc66f95c 2605 entries = dst_entries_get_fast(ops);
49a18d86 2606 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2607 entries <= rt_max_size)
1da177e4
LT
2608 goto out;
2609
6891a346 2610 net->ipv6.ip6_rt_gc_expire++;
14956643 2611 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2612 entries = dst_entries_get_slow(ops);
2613 if (entries < ops->gc_thresh)
7019b78e 2614 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2615out:
7019b78e 2616 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2617 return entries > rt_max_size;
1da177e4
LT
2618}
2619
8d1c802b 2620static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
d4ead6b3 2621 struct fib6_config *cfg)
e715b6d3 2622{
263243d6 2623 struct dst_metrics *p;
e715b6d3 2624
263243d6
ED
2625 if (!cfg->fc_mx)
2626 return 0;
ea697639 2627
263243d6
ED
2628 p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
2629 if (unlikely(!p))
2630 return -ENOMEM;
e715b6d3 2631
263243d6
ED
2632 refcount_set(&p->refcnt, 1);
2633 rt->fib6_metrics = p;
e715b6d3 2634
263243d6 2635 return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
e715b6d3 2636}
1da177e4 2637
8c14586f
DA
2638static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2639 struct fib6_config *cfg,
f4797b33
DA
2640 const struct in6_addr *gw_addr,
2641 u32 tbid, int flags)
8c14586f
DA
2642{
2643 struct flowi6 fl6 = {
2644 .flowi6_oif = cfg->fc_ifindex,
2645 .daddr = *gw_addr,
2646 .saddr = cfg->fc_prefsrc,
2647 };
2648 struct fib6_table *table;
2649 struct rt6_info *rt;
8c14586f 2650
f4797b33 2651 table = fib6_get_table(net, tbid);
8c14586f
DA
2652 if (!table)
2653 return NULL;
2654
2655 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2656 flags |= RT6_LOOKUP_F_HAS_SADDR;
2657
f4797b33 2658 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2659 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2660
2661 /* if table lookup failed, fall back to full lookup */
2662 if (rt == net->ipv6.ip6_null_entry) {
2663 ip6_rt_put(rt);
2664 rt = NULL;
2665 }
2666
2667 return rt;
2668}
2669
fc1e64e1
DA
2670static int ip6_route_check_nh_onlink(struct net *net,
2671 struct fib6_config *cfg,
9fbb704c 2672 const struct net_device *dev,
fc1e64e1
DA
2673 struct netlink_ext_ack *extack)
2674{
44750f84 2675 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2676 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2677 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2678 struct rt6_info *grt;
2679 int err;
2680
2681 err = 0;
2682 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2683 if (grt) {
58e354c0
DA
2684 if (!grt->dst.error &&
2685 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2686 NL_SET_ERR_MSG(extack,
2687 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2688 err = -EINVAL;
2689 }
2690
2691 ip6_rt_put(grt);
2692 }
2693
2694 return err;
2695}
2696
1edce99f
DA
2697static int ip6_route_check_nh(struct net *net,
2698 struct fib6_config *cfg,
2699 struct net_device **_dev,
2700 struct inet6_dev **idev)
2701{
2702 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2703 struct net_device *dev = _dev ? *_dev : NULL;
2704 struct rt6_info *grt = NULL;
2705 int err = -EHOSTUNREACH;
2706
2707 if (cfg->fc_table) {
f4797b33
DA
2708 int flags = RT6_LOOKUP_F_IFACE;
2709
2710 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2711 cfg->fc_table, flags);
1edce99f
DA
2712 if (grt) {
2713 if (grt->rt6i_flags & RTF_GATEWAY ||
2714 (dev && dev != grt->dst.dev)) {
2715 ip6_rt_put(grt);
2716 grt = NULL;
2717 }
2718 }
2719 }
2720
2721 if (!grt)
b75cc8f9 2722 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2723
2724 if (!grt)
2725 goto out;
2726
2727 if (dev) {
2728 if (dev != grt->dst.dev) {
2729 ip6_rt_put(grt);
2730 goto out;
2731 }
2732 } else {
2733 *_dev = dev = grt->dst.dev;
2734 *idev = grt->rt6i_idev;
2735 dev_hold(dev);
2736 in6_dev_hold(grt->rt6i_idev);
2737 }
2738
2739 if (!(grt->rt6i_flags & RTF_GATEWAY))
2740 err = 0;
2741
2742 ip6_rt_put(grt);
2743
2744out:
2745 return err;
2746}
2747
9fbb704c
DA
2748static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2749 struct net_device **_dev, struct inet6_dev **idev,
2750 struct netlink_ext_ack *extack)
2751{
2752 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2753 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2754 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2755 const struct net_device *dev = *_dev;
232378e8 2756 bool need_addr_check = !dev;
9fbb704c
DA
2757 int err = -EINVAL;
2758
2759 /* if gw_addr is local we will fail to detect this in case
2760 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2761 * will return already-added prefix route via interface that
2762 * prefix route was assigned to, which might be non-loopback.
2763 */
232378e8
DA
2764 if (dev &&
2765 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2766 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2767 goto out;
2768 }
2769
2770 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2771 /* IPv6 strictly inhibits using not link-local
2772 * addresses as nexthop address.
2773 * Otherwise, router will not able to send redirects.
2774 * It is very good, but in some (rare!) circumstances
2775 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2776 * some exceptions. --ANK
2777 * We allow IPv4-mapped nexthops to support RFC4798-type
2778 * addressing
2779 */
2780 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2781 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2782 goto out;
2783 }
2784
2785 if (cfg->fc_flags & RTNH_F_ONLINK)
2786 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2787 else
2788 err = ip6_route_check_nh(net, cfg, _dev, idev);
2789
2790 if (err)
2791 goto out;
2792 }
2793
2794 /* reload in case device was changed */
2795 dev = *_dev;
2796
2797 err = -EINVAL;
2798 if (!dev) {
2799 NL_SET_ERR_MSG(extack, "Egress device not specified");
2800 goto out;
2801 } else if (dev->flags & IFF_LOOPBACK) {
2802 NL_SET_ERR_MSG(extack,
2803 "Egress device can not be loopback device for this route");
2804 goto out;
2805 }
232378e8
DA
2806
2807 /* if we did not check gw_addr above, do so now that the
2808 * egress device has been resolved.
2809 */
2810 if (need_addr_check &&
2811 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2812 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2813 goto out;
2814 }
2815
9fbb704c
DA
2816 err = 0;
2817out:
2818 return err;
2819}
2820
8d1c802b 2821static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 2822 gfp_t gfp_flags,
333c4301 2823 struct netlink_ext_ack *extack)
1da177e4 2824{
5578689a 2825 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 2826 struct fib6_info *rt = NULL;
1da177e4
LT
2827 struct net_device *dev = NULL;
2828 struct inet6_dev *idev = NULL;
c71099ac 2829 struct fib6_table *table;
1da177e4 2830 int addr_type;
8c5b83f0 2831 int err = -EINVAL;
1da177e4 2832
557c44be 2833 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2834 if (cfg->fc_flags & RTF_PCPU) {
2835 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2836 goto out;
d5d531cb 2837 }
557c44be 2838
2ea2352e
WW
2839 /* RTF_CACHE is an internal flag; can not be set by userspace */
2840 if (cfg->fc_flags & RTF_CACHE) {
2841 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2842 goto out;
2843 }
2844
e8478e80
DA
2845 if (cfg->fc_type > RTN_MAX) {
2846 NL_SET_ERR_MSG(extack, "Invalid route type");
2847 goto out;
2848 }
2849
d5d531cb
DA
2850 if (cfg->fc_dst_len > 128) {
2851 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2852 goto out;
2853 }
2854 if (cfg->fc_src_len > 128) {
2855 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2856 goto out;
d5d531cb 2857 }
1da177e4 2858#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2859 if (cfg->fc_src_len) {
2860 NL_SET_ERR_MSG(extack,
2861 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2862 goto out;
d5d531cb 2863 }
1da177e4 2864#endif
86872cb5 2865 if (cfg->fc_ifindex) {
1da177e4 2866 err = -ENODEV;
5578689a 2867 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2868 if (!dev)
2869 goto out;
2870 idev = in6_dev_get(dev);
2871 if (!idev)
2872 goto out;
2873 }
2874
86872cb5
TG
2875 if (cfg->fc_metric == 0)
2876 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2877
fc1e64e1
DA
2878 if (cfg->fc_flags & RTNH_F_ONLINK) {
2879 if (!dev) {
2880 NL_SET_ERR_MSG(extack,
2881 "Nexthop device required for onlink");
2882 err = -ENODEV;
2883 goto out;
2884 }
2885
2886 if (!(dev->flags & IFF_UP)) {
2887 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2888 err = -ENETDOWN;
2889 goto out;
2890 }
2891 }
2892
d71314b4 2893 err = -ENOBUFS;
38308473
DM
2894 if (cfg->fc_nlinfo.nlh &&
2895 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2896 table = fib6_get_table(net, cfg->fc_table);
38308473 2897 if (!table) {
f3213831 2898 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2899 table = fib6_new_table(net, cfg->fc_table);
2900 }
2901 } else {
2902 table = fib6_new_table(net, cfg->fc_table);
2903 }
38308473
DM
2904
2905 if (!table)
c71099ac 2906 goto out;
c71099ac 2907
93531c67
DA
2908 err = -ENOMEM;
2909 rt = fib6_info_alloc(gfp_flags);
2910 if (!rt)
1da177e4 2911 goto out;
93531c67
DA
2912
2913 if (cfg->fc_flags & RTF_ADDRCONF)
2914 rt->dst_nocount = true;
1da177e4 2915
d4ead6b3
DA
2916 err = ip6_convert_metrics(net, rt, cfg);
2917 if (err < 0)
2918 goto out;
2919
1716a961 2920 if (cfg->fc_flags & RTF_EXPIRES)
14895687 2921 fib6_set_expires(rt, jiffies +
1716a961
G
2922 clock_t_to_jiffies(cfg->fc_expires));
2923 else
14895687 2924 fib6_clean_expires(rt);
1da177e4 2925
86872cb5
TG
2926 if (cfg->fc_protocol == RTPROT_UNSPEC)
2927 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 2928 rt->fib6_protocol = cfg->fc_protocol;
86872cb5
TG
2929
2930 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4 2931
19e42e45
RP
2932 if (cfg->fc_encap) {
2933 struct lwtunnel_state *lwtstate;
2934
30357d7d 2935 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2936 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2937 &lwtstate, extack);
19e42e45
RP
2938 if (err)
2939 goto out;
5e670d84 2940 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
19e42e45
RP
2941 }
2942
93c2fb25
DA
2943 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2944 rt->fib6_dst.plen = cfg->fc_dst_len;
2945 if (rt->fib6_dst.plen == 128)
3b6761d1 2946 rt->dst_host = true;
e5fd387a 2947
1da177e4 2948#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
2949 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
2950 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4
LT
2951#endif
2952
93c2fb25 2953 rt->fib6_metric = cfg->fc_metric;
5e670d84 2954 rt->fib6_nh.nh_weight = 1;
1da177e4 2955
e8478e80
DA
2956 rt->fib6_type = cfg->fc_type;
2957
1da177e4
LT
2958 /* We cannot add true routes via loopback here,
2959 they would result in kernel looping; promote them to reject routes
2960 */
86872cb5 2961 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
2962 (dev && (dev->flags & IFF_LOOPBACK) &&
2963 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2964 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 2965 /* hold loopback dev/idev if we haven't done so. */
5578689a 2966 if (dev != net->loopback_dev) {
1da177e4
LT
2967 if (dev) {
2968 dev_put(dev);
2969 in6_dev_put(idev);
2970 }
5578689a 2971 dev = net->loopback_dev;
1da177e4
LT
2972 dev_hold(dev);
2973 idev = in6_dev_get(dev);
2974 if (!idev) {
2975 err = -ENODEV;
2976 goto out;
2977 }
2978 }
93c2fb25 2979 rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
1da177e4
LT
2980 goto install_route;
2981 }
2982
86872cb5 2983 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
2984 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
2985 if (err)
48ed7b26 2986 goto out;
1da177e4 2987
93531c67 2988 rt->fib6_nh.nh_gw = cfg->fc_gateway;
1da177e4
LT
2989 }
2990
2991 err = -ENODEV;
38308473 2992 if (!dev)
1da177e4
LT
2993 goto out;
2994
428604fb
LB
2995 if (idev->cnf.disable_ipv6) {
2996 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
2997 err = -EACCES;
2998 goto out;
2999 }
3000
955ec4cb
DA
3001 if (!(dev->flags & IFF_UP)) {
3002 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3003 err = -ENETDOWN;
3004 goto out;
3005 }
3006
c3968a85
DW
3007 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3008 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3009 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3010 err = -EINVAL;
3011 goto out;
3012 }
93c2fb25
DA
3013 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3014 rt->fib6_prefsrc.plen = 128;
c3968a85 3015 } else
93c2fb25 3016 rt->fib6_prefsrc.plen = 0;
c3968a85 3017
93c2fb25 3018 rt->fib6_flags = cfg->fc_flags;
1da177e4
LT
3019
3020install_route:
93c2fb25 3021 if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
5609b80a 3022 !netif_carrier_ok(dev))
5e670d84
DA
3023 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3024 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
93531c67 3025 rt->fib6_nh.nh_dev = dev;
93c2fb25 3026 rt->fib6_table = table;
63152fc0 3027
c346dca1 3028 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 3029
dcd1f572
DA
3030 if (idev)
3031 in6_dev_put(idev);
3032
8c5b83f0 3033 return rt;
6b9ea5a6
RP
3034out:
3035 if (dev)
3036 dev_put(dev);
3037 if (idev)
3038 in6_dev_put(idev);
6b9ea5a6 3039
93531c67 3040 fib6_info_release(rt);
8c5b83f0 3041 return ERR_PTR(err);
6b9ea5a6
RP
3042}
3043
acb54e3c
DA
3044int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3045 struct netlink_ext_ack *extack)
6b9ea5a6 3046{
8d1c802b 3047 struct fib6_info *rt;
6b9ea5a6
RP
3048 int err;
3049
acb54e3c 3050 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3051 if (IS_ERR(rt))
3052 return PTR_ERR(rt);
6b9ea5a6 3053
d4ead6b3 3054 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3055 fib6_info_release(rt);
6b9ea5a6 3056
1da177e4
LT
3057 return err;
3058}
3059
8d1c802b 3060static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3061{
afb1d4b5 3062 struct net *net = info->nl_net;
c71099ac 3063 struct fib6_table *table;
afb1d4b5 3064 int err;
1da177e4 3065
421842ed 3066 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3067 err = -ENOENT;
3068 goto out;
3069 }
6c813a72 3070
93c2fb25 3071 table = rt->fib6_table;
66f5d6ce 3072 spin_lock_bh(&table->tb6_lock);
86872cb5 3073 err = fib6_del(rt, info);
66f5d6ce 3074 spin_unlock_bh(&table->tb6_lock);
1da177e4 3075
6825a26c 3076out:
93531c67 3077 fib6_info_release(rt);
1da177e4
LT
3078 return err;
3079}
3080
8d1c802b 3081int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3082{
afb1d4b5
DA
3083 struct nl_info info = { .nl_net = net };
3084
528c4ceb 3085 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3086}
3087
8d1c802b 3088static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3089{
3090 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3091 struct net *net = info->nl_net;
16a16cd3 3092 struct sk_buff *skb = NULL;
0ae81335 3093 struct fib6_table *table;
e3330039 3094 int err = -ENOENT;
0ae81335 3095
421842ed 3096 if (rt == net->ipv6.fib6_null_entry)
e3330039 3097 goto out_put;
93c2fb25 3098 table = rt->fib6_table;
66f5d6ce 3099 spin_lock_bh(&table->tb6_lock);
0ae81335 3100
93c2fb25 3101 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3102 struct fib6_info *sibling, *next_sibling;
0ae81335 3103
16a16cd3
DA
3104 /* prefer to send a single notification with all hops */
3105 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3106 if (skb) {
3107 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3108
d4ead6b3 3109 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3110 NULL, NULL, 0, RTM_DELROUTE,
3111 info->portid, seq, 0) < 0) {
3112 kfree_skb(skb);
3113 skb = NULL;
3114 } else
3115 info->skip_notify = 1;
3116 }
3117
0ae81335 3118 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3119 &rt->fib6_siblings,
3120 fib6_siblings) {
0ae81335
DA
3121 err = fib6_del(sibling, info);
3122 if (err)
e3330039 3123 goto out_unlock;
0ae81335
DA
3124 }
3125 }
3126
3127 err = fib6_del(rt, info);
e3330039 3128out_unlock:
66f5d6ce 3129 spin_unlock_bh(&table->tb6_lock);
e3330039 3130out_put:
93531c67 3131 fib6_info_release(rt);
16a16cd3
DA
3132
3133 if (skb) {
e3330039 3134 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3135 info->nlh, gfp_any());
3136 }
0ae81335
DA
3137 return err;
3138}
3139
23fb93a4
DA
3140static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3141{
3142 int rc = -ESRCH;
3143
3144 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3145 goto out;
3146
3147 if (cfg->fc_flags & RTF_GATEWAY &&
3148 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3149 goto out;
3150 if (dst_hold_safe(&rt->dst))
3151 rc = rt6_remove_exception_rt(rt);
3152out:
3153 return rc;
3154}
3155
333c4301
DA
3156static int ip6_route_del(struct fib6_config *cfg,
3157 struct netlink_ext_ack *extack)
1da177e4 3158{
8d1c802b 3159 struct rt6_info *rt_cache;
c71099ac 3160 struct fib6_table *table;
8d1c802b 3161 struct fib6_info *rt;
1da177e4 3162 struct fib6_node *fn;
1da177e4
LT
3163 int err = -ESRCH;
3164
5578689a 3165 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3166 if (!table) {
3167 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3168 return err;
d5d531cb 3169 }
c71099ac 3170
66f5d6ce 3171 rcu_read_lock();
1da177e4 3172
c71099ac 3173 fn = fib6_locate(&table->tb6_root,
86872cb5 3174 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3175 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3176 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3177
1da177e4 3178 if (fn) {
66f5d6ce 3179 for_each_fib6_node_rt_rcu(fn) {
2b760fcf 3180 if (cfg->fc_flags & RTF_CACHE) {
23fb93a4
DA
3181 int rc;
3182
2b760fcf
WW
3183 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3184 &cfg->fc_src);
23fb93a4
DA
3185 if (rt_cache) {
3186 rc = ip6_del_cached_rt(rt_cache, cfg);
3187 if (rc != -ESRCH)
3188 return rc;
3189 }
3190 continue;
2b760fcf 3191 }
86872cb5 3192 if (cfg->fc_ifindex &&
5e670d84
DA
3193 (!rt->fib6_nh.nh_dev ||
3194 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3195 continue;
86872cb5 3196 if (cfg->fc_flags & RTF_GATEWAY &&
5e670d84 3197 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
1da177e4 3198 continue;
93c2fb25 3199 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3200 continue;
93c2fb25 3201 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3202 continue;
93531c67 3203 fib6_info_hold(rt);
66f5d6ce 3204 rcu_read_unlock();
1da177e4 3205
0ae81335
DA
3206 /* if gateway was specified only delete the one hop */
3207 if (cfg->fc_flags & RTF_GATEWAY)
3208 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3209
3210 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3211 }
3212 }
66f5d6ce 3213 rcu_read_unlock();
1da177e4
LT
3214
3215 return err;
3216}
3217
6700c270 3218static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3219{
a6279458 3220 struct netevent_redirect netevent;
e8599ff4 3221 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3222 struct ndisc_options ndopts;
3223 struct inet6_dev *in6_dev;
3224 struct neighbour *neigh;
71bcdba0 3225 struct rd_msg *msg;
6e157b6a
DM
3226 int optlen, on_link;
3227 u8 *lladdr;
e8599ff4 3228
29a3cad5 3229 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3230 optlen -= sizeof(*msg);
e8599ff4
DM
3231
3232 if (optlen < 0) {
6e157b6a 3233 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3234 return;
3235 }
3236
71bcdba0 3237 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3238
71bcdba0 3239 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3240 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3241 return;
3242 }
3243
6e157b6a 3244 on_link = 0;
71bcdba0 3245 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3246 on_link = 1;
71bcdba0 3247 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3248 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3249 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3250 return;
3251 }
3252
3253 in6_dev = __in6_dev_get(skb->dev);
3254 if (!in6_dev)
3255 return;
3256 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3257 return;
3258
3259 /* RFC2461 8.1:
3260 * The IP source address of the Redirect MUST be the same as the current
3261 * first-hop router for the specified ICMP Destination Address.
3262 */
3263
f997c55c 3264 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3265 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3266 return;
3267 }
6e157b6a
DM
3268
3269 lladdr = NULL;
e8599ff4
DM
3270 if (ndopts.nd_opts_tgt_lladdr) {
3271 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3272 skb->dev);
3273 if (!lladdr) {
3274 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3275 return;
3276 }
3277 }
3278
6e157b6a 3279 rt = (struct rt6_info *) dst;
ec13ad1d 3280 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3281 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3282 return;
6e157b6a 3283 }
e8599ff4 3284
6e157b6a
DM
3285 /* Redirect received -> path was valid.
3286 * Look, redirects are sent only in response to data packets,
3287 * so that this nexthop apparently is reachable. --ANK
3288 */
0dec879f 3289 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3290
71bcdba0 3291 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3292 if (!neigh)
3293 return;
a6279458 3294
1da177e4
LT
3295 /*
3296 * We have finally decided to accept it.
3297 */
3298
f997c55c 3299 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3300 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3301 NEIGH_UPDATE_F_OVERRIDE|
3302 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3303 NEIGH_UPDATE_F_ISROUTER)),
3304 NDISC_REDIRECT, &ndopts);
1da177e4 3305
4d85cd0c 3306 rcu_read_lock();
23fb93a4 3307 nrt = ip6_rt_cache_alloc(rt->from, &msg->dest, NULL);
4d85cd0c 3308 rcu_read_unlock();
38308473 3309 if (!nrt)
1da177e4
LT
3310 goto out;
3311
3312 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3313 if (on_link)
3314 nrt->rt6i_flags &= ~RTF_GATEWAY;
3315
4e3fd7a0 3316 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3317
2b760fcf
WW
3318 /* No need to remove rt from the exception table if rt is
3319 * a cached route because rt6_insert_exception() will
3320 * takes care of it
3321 */
d4ead6b3 3322 if (rt6_insert_exception(nrt, rt->from)) {
2b760fcf
WW
3323 dst_release_immediate(&nrt->dst);
3324 goto out;
3325 }
1da177e4 3326
d8d1f30b
CG
3327 netevent.old = &rt->dst;
3328 netevent.new = &nrt->dst;
71bcdba0 3329 netevent.daddr = &msg->dest;
60592833 3330 netevent.neigh = neigh;
8d71740c
TT
3331 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3332
1da177e4 3333out:
e8599ff4 3334 neigh_release(neigh);
6e157b6a
DM
3335}
3336
70ceb4f5 3337#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3338static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3339 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3340 const struct in6_addr *gwaddr,
3341 struct net_device *dev)
70ceb4f5 3342{
830218c1
DA
3343 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3344 int ifindex = dev->ifindex;
70ceb4f5 3345 struct fib6_node *fn;
8d1c802b 3346 struct fib6_info *rt = NULL;
c71099ac
TG
3347 struct fib6_table *table;
3348
830218c1 3349 table = fib6_get_table(net, tb_id);
38308473 3350 if (!table)
c71099ac 3351 return NULL;
70ceb4f5 3352
66f5d6ce 3353 rcu_read_lock();
38fbeeee 3354 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3355 if (!fn)
3356 goto out;
3357
66f5d6ce 3358 for_each_fib6_node_rt_rcu(fn) {
5e670d84 3359 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
70ceb4f5 3360 continue;
93c2fb25 3361 if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
70ceb4f5 3362 continue;
5e670d84 3363 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
70ceb4f5 3364 continue;
8d1c802b 3365 fib6_info_hold(rt);
70ceb4f5
YH
3366 break;
3367 }
3368out:
66f5d6ce 3369 rcu_read_unlock();
70ceb4f5
YH
3370 return rt;
3371}
3372
8d1c802b 3373static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3374 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3375 const struct in6_addr *gwaddr,
3376 struct net_device *dev,
95c96174 3377 unsigned int pref)
70ceb4f5 3378{
86872cb5 3379 struct fib6_config cfg = {
238fc7ea 3380 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3381 .fc_ifindex = dev->ifindex,
86872cb5
TG
3382 .fc_dst_len = prefixlen,
3383 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3384 RTF_UP | RTF_PREF(pref),
b91d5329 3385 .fc_protocol = RTPROT_RA,
e8478e80 3386 .fc_type = RTN_UNICAST,
15e47304 3387 .fc_nlinfo.portid = 0,
efa2cea0
DL
3388 .fc_nlinfo.nlh = NULL,
3389 .fc_nlinfo.nl_net = net,
86872cb5
TG
3390 };
3391
830218c1 3392 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3393 cfg.fc_dst = *prefix;
3394 cfg.fc_gateway = *gwaddr;
70ceb4f5 3395
e317da96
YH
3396 /* We should treat it as a default route if prefix length is 0. */
3397 if (!prefixlen)
86872cb5 3398 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3399
acb54e3c 3400 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3401
830218c1 3402 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3403}
3404#endif
3405
8d1c802b 3406struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3407 const struct in6_addr *addr,
3408 struct net_device *dev)
1ab1457c 3409{
830218c1 3410 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3411 struct fib6_info *rt;
c71099ac 3412 struct fib6_table *table;
1da177e4 3413
afb1d4b5 3414 table = fib6_get_table(net, tb_id);
38308473 3415 if (!table)
c71099ac 3416 return NULL;
1da177e4 3417
66f5d6ce
WW
3418 rcu_read_lock();
3419 for_each_fib6_node_rt_rcu(&table->tb6_root) {
5e670d84 3420 if (dev == rt->fib6_nh.nh_dev &&
93c2fb25 3421 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
5e670d84 3422 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
1da177e4
LT
3423 break;
3424 }
3425 if (rt)
8d1c802b 3426 fib6_info_hold(rt);
66f5d6ce 3427 rcu_read_unlock();
1da177e4
LT
3428 return rt;
3429}
3430
8d1c802b 3431struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3432 const struct in6_addr *gwaddr,
ebacaaa0
YH
3433 struct net_device *dev,
3434 unsigned int pref)
1da177e4 3435{
86872cb5 3436 struct fib6_config cfg = {
ca254490 3437 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3438 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3439 .fc_ifindex = dev->ifindex,
3440 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3441 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3442 .fc_protocol = RTPROT_RA,
e8478e80 3443 .fc_type = RTN_UNICAST,
15e47304 3444 .fc_nlinfo.portid = 0,
5578689a 3445 .fc_nlinfo.nlh = NULL,
afb1d4b5 3446 .fc_nlinfo.nl_net = net,
86872cb5 3447 };
1da177e4 3448
4e3fd7a0 3449 cfg.fc_gateway = *gwaddr;
1da177e4 3450
acb54e3c 3451 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3452 struct fib6_table *table;
3453
3454 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3455 if (table)
3456 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3457 }
1da177e4 3458
afb1d4b5 3459 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3460}
3461
afb1d4b5
DA
3462static void __rt6_purge_dflt_routers(struct net *net,
3463 struct fib6_table *table)
1da177e4 3464{
8d1c802b 3465 struct fib6_info *rt;
1da177e4
LT
3466
3467restart:
66f5d6ce
WW
3468 rcu_read_lock();
3469 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3470 struct net_device *dev = fib6_info_nh_dev(rt);
3471 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3472
93c2fb25 3473 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
dcd1f572 3474 (!idev || idev->cnf.accept_ra != 2)) {
93531c67
DA
3475 fib6_info_hold(rt);
3476 rcu_read_unlock();
3477 ip6_del_rt(net, rt);
1da177e4
LT
3478 goto restart;
3479 }
3480 }
66f5d6ce 3481 rcu_read_unlock();
830218c1
DA
3482
3483 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3484}
3485
3486void rt6_purge_dflt_routers(struct net *net)
3487{
3488 struct fib6_table *table;
3489 struct hlist_head *head;
3490 unsigned int h;
3491
3492 rcu_read_lock();
3493
3494 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3495 head = &net->ipv6.fib_table_hash[h];
3496 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3497 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3498 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3499 }
3500 }
3501
3502 rcu_read_unlock();
1da177e4
LT
3503}
3504
5578689a
DL
3505static void rtmsg_to_fib6_config(struct net *net,
3506 struct in6_rtmsg *rtmsg,
86872cb5
TG
3507 struct fib6_config *cfg)
3508{
3509 memset(cfg, 0, sizeof(*cfg));
3510
ca254490
DA
3511 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3512 : RT6_TABLE_MAIN;
86872cb5
TG
3513 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3514 cfg->fc_metric = rtmsg->rtmsg_metric;
3515 cfg->fc_expires = rtmsg->rtmsg_info;
3516 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3517 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3518 cfg->fc_flags = rtmsg->rtmsg_flags;
e8478e80 3519 cfg->fc_type = rtmsg->rtmsg_type;
86872cb5 3520
5578689a 3521 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3522
4e3fd7a0
AD
3523 cfg->fc_dst = rtmsg->rtmsg_dst;
3524 cfg->fc_src = rtmsg->rtmsg_src;
3525 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3526}
3527
5578689a 3528int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3529{
86872cb5 3530 struct fib6_config cfg;
1da177e4
LT
3531 struct in6_rtmsg rtmsg;
3532 int err;
3533
67ba4152 3534 switch (cmd) {
1da177e4
LT
3535 case SIOCADDRT: /* Add a route */
3536 case SIOCDELRT: /* Delete a route */
af31f412 3537 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3538 return -EPERM;
3539 err = copy_from_user(&rtmsg, arg,
3540 sizeof(struct in6_rtmsg));
3541 if (err)
3542 return -EFAULT;
86872cb5 3543
5578689a 3544 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3545
1da177e4
LT
3546 rtnl_lock();
3547 switch (cmd) {
3548 case SIOCADDRT:
acb54e3c 3549 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3550 break;
3551 case SIOCDELRT:
333c4301 3552 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3553 break;
3554 default:
3555 err = -EINVAL;
3556 }
3557 rtnl_unlock();
3558
3559 return err;
3ff50b79 3560 }
1da177e4
LT
3561
3562 return -EINVAL;
3563}
3564
3565/*
3566 * Drop the packet on the floor
3567 */
3568
d5fdd6ba 3569static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3570{
612f09e8 3571 int type;
adf30907 3572 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3573 switch (ipstats_mib_noroutes) {
3574 case IPSTATS_MIB_INNOROUTES:
0660e03f 3575 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3576 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3577 IP6_INC_STATS(dev_net(dst->dev),
3578 __in6_dev_get_safely(skb->dev),
3bd653c8 3579 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3580 break;
3581 }
3582 /* FALLTHROUGH */
3583 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3584 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3585 ipstats_mib_noroutes);
612f09e8
YH
3586 break;
3587 }
3ffe533c 3588 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3589 kfree_skb(skb);
3590 return 0;
3591}
3592
9ce8ade0
TG
3593static int ip6_pkt_discard(struct sk_buff *skb)
3594{
612f09e8 3595 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3596}
3597
ede2059d 3598static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3599{
adf30907 3600 skb->dev = skb_dst(skb)->dev;
612f09e8 3601 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3602}
3603
9ce8ade0
TG
3604static int ip6_pkt_prohibit(struct sk_buff *skb)
3605{
612f09e8 3606 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3607}
3608
ede2059d 3609static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3610{
adf30907 3611 skb->dev = skb_dst(skb)->dev;
612f09e8 3612 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3613}
3614
1da177e4
LT
3615/*
3616 * Allocate a dst for local (unicast / anycast) address.
3617 */
3618
360a9887
DA
3619struct fib6_info *addrconf_f6i_alloc(struct net *net,
3620 struct inet6_dev *idev,
3621 const struct in6_addr *addr,
3622 bool anycast, gfp_t gfp_flags)
1da177e4 3623{
ca254490 3624 u32 tb_id;
4832c30d 3625 struct net_device *dev = idev->dev;
360a9887 3626 struct fib6_info *f6i;
5f02ce24 3627
360a9887
DA
3628 f6i = fib6_info_alloc(gfp_flags);
3629 if (!f6i)
1da177e4
LT
3630 return ERR_PTR(-ENOMEM);
3631
360a9887 3632 f6i->dst_nocount = true;
360a9887
DA
3633 f6i->dst_host = true;
3634 f6i->fib6_protocol = RTPROT_KERNEL;
3635 f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
e8478e80 3636 if (anycast) {
360a9887
DA
3637 f6i->fib6_type = RTN_ANYCAST;
3638 f6i->fib6_flags |= RTF_ANYCAST;
e8478e80 3639 } else {
360a9887
DA
3640 f6i->fib6_type = RTN_LOCAL;
3641 f6i->fib6_flags |= RTF_LOCAL;
e8478e80 3642 }
1da177e4 3643
360a9887 3644 f6i->fib6_nh.nh_gw = *addr;
93531c67 3645 dev_hold(dev);
360a9887
DA
3646 f6i->fib6_nh.nh_dev = dev;
3647 f6i->fib6_dst.addr = *addr;
3648 f6i->fib6_dst.plen = 128;
ca254490 3649 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
360a9887 3650 f6i->fib6_table = fib6_get_table(net, tb_id);
1da177e4 3651
360a9887 3652 return f6i;
1da177e4
LT
3653}
3654
c3968a85
DW
3655/* remove deleted ip from prefsrc entries */
3656struct arg_dev_net_ip {
3657 struct net_device *dev;
3658 struct net *net;
3659 struct in6_addr *addr;
3660};
3661
8d1c802b 3662static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
3663{
3664 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3665 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3666 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3667
5e670d84 3668 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
421842ed 3669 rt != net->ipv6.fib6_null_entry &&
93c2fb25 3670 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 3671 spin_lock_bh(&rt6_exception_lock);
c3968a85 3672 /* remove prefsrc entry */
93c2fb25 3673 rt->fib6_prefsrc.plen = 0;
60006a48
WW
3674 /* need to update cache as well */
3675 rt6_exceptions_remove_prefsrc(rt);
3676 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3677 }
3678 return 0;
3679}
3680
3681void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3682{
3683 struct net *net = dev_net(ifp->idev->dev);
3684 struct arg_dev_net_ip adni = {
3685 .dev = ifp->idev->dev,
3686 .net = net,
3687 .addr = &ifp->addr,
3688 };
0c3584d5 3689 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3690}
3691
be7a010d 3692#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3693
3694/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 3695static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
3696{
3697 struct in6_addr *gateway = (struct in6_addr *)arg;
3698
93c2fb25 3699 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
5e670d84 3700 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
be7a010d
DJ
3701 return -1;
3702 }
b16cb459
WW
3703
3704 /* Further clean up cached routes in exception table.
3705 * This is needed because cached route may have a different
3706 * gateway than its 'parent' in the case of an ip redirect.
3707 */
3708 rt6_exceptions_clean_tohost(rt, gateway);
3709
be7a010d
DJ
3710 return 0;
3711}
3712
3713void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3714{
3715 fib6_clean_all(net, fib6_clean_tohost, gateway);
3716}
3717
2127d95a
IS
3718struct arg_netdev_event {
3719 const struct net_device *dev;
4c981e28
IS
3720 union {
3721 unsigned int nh_flags;
3722 unsigned long event;
3723 };
2127d95a
IS
3724};
3725
8d1c802b 3726static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 3727{
8d1c802b 3728 struct fib6_info *iter;
d7dedee1
IS
3729 struct fib6_node *fn;
3730
93c2fb25
DA
3731 fn = rcu_dereference_protected(rt->fib6_node,
3732 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3733 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 3734 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3735 while (iter) {
93c2fb25 3736 if (iter->fib6_metric == rt->fib6_metric &&
d7dedee1
IS
3737 rt6_qualify_for_ecmp(iter))
3738 return iter;
3739 iter = rcu_dereference_protected(iter->rt6_next,
93c2fb25 3740 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
3741 }
3742
3743 return NULL;
3744}
3745
8d1c802b 3746static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 3747{
5e670d84
DA
3748 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3749 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
dcd1f572 3750 fib6_ignore_linkdown(rt)))
d7dedee1
IS
3751 return true;
3752
3753 return false;
3754}
3755
8d1c802b 3756static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 3757{
8d1c802b 3758 struct fib6_info *iter;
d7dedee1
IS
3759 int total = 0;
3760
3761 if (!rt6_is_dead(rt))
5e670d84 3762 total += rt->fib6_nh.nh_weight;
d7dedee1 3763
93c2fb25 3764 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 3765 if (!rt6_is_dead(iter))
5e670d84 3766 total += iter->fib6_nh.nh_weight;
d7dedee1
IS
3767 }
3768
3769 return total;
3770}
3771
8d1c802b 3772static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
3773{
3774 int upper_bound = -1;
3775
3776 if (!rt6_is_dead(rt)) {
5e670d84 3777 *weight += rt->fib6_nh.nh_weight;
d7dedee1
IS
3778 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3779 total) - 1;
3780 }
5e670d84 3781 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
d7dedee1
IS
3782}
3783
8d1c802b 3784static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 3785{
8d1c802b 3786 struct fib6_info *iter;
d7dedee1
IS
3787 int weight = 0;
3788
3789 rt6_upper_bound_set(rt, &weight, total);
3790
93c2fb25 3791 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
3792 rt6_upper_bound_set(iter, &weight, total);
3793}
3794
8d1c802b 3795void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 3796{
8d1c802b 3797 struct fib6_info *first;
d7dedee1
IS
3798 int total;
3799
3800 /* In case the entire multipath route was marked for flushing,
3801 * then there is no need to rebalance upon the removal of every
3802 * sibling route.
3803 */
93c2fb25 3804 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
3805 return;
3806
3807 /* During lookup routes are evaluated in order, so we need to
3808 * make sure upper bounds are assigned from the first sibling
3809 * onwards.
3810 */
3811 first = rt6_multipath_first_sibling(rt);
3812 if (WARN_ON_ONCE(!first))
3813 return;
3814
3815 total = rt6_multipath_total_weight(first);
3816 rt6_multipath_upper_bound_set(first, total);
3817}
3818
8d1c802b 3819static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
3820{
3821 const struct arg_netdev_event *arg = p_arg;
7aef6859 3822 struct net *net = dev_net(arg->dev);
2127d95a 3823
421842ed 3824 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
5e670d84 3825 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
7aef6859 3826 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3827 rt6_multipath_rebalance(rt);
1de178ed 3828 }
2127d95a
IS
3829
3830 return 0;
3831}
3832
3833void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3834{
3835 struct arg_netdev_event arg = {
3836 .dev = dev,
6802f3ad
IS
3837 {
3838 .nh_flags = nh_flags,
3839 },
2127d95a
IS
3840 };
3841
3842 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3843 arg.nh_flags |= RTNH_F_LINKDOWN;
3844
3845 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3846}
3847
8d1c802b 3848static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
3849 const struct net_device *dev)
3850{
8d1c802b 3851 struct fib6_info *iter;
1de178ed 3852
5e670d84 3853 if (rt->fib6_nh.nh_dev == dev)
1de178ed 3854 return true;
93c2fb25 3855 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84 3856 if (iter->fib6_nh.nh_dev == dev)
1de178ed
IS
3857 return true;
3858
3859 return false;
3860}
3861
8d1c802b 3862static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 3863{
8d1c802b 3864 struct fib6_info *iter;
1de178ed
IS
3865
3866 rt->should_flush = 1;
93c2fb25 3867 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
3868 iter->should_flush = 1;
3869}
3870
8d1c802b 3871static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
3872 const struct net_device *down_dev)
3873{
8d1c802b 3874 struct fib6_info *iter;
1de178ed
IS
3875 unsigned int dead = 0;
3876
5e670d84
DA
3877 if (rt->fib6_nh.nh_dev == down_dev ||
3878 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed 3879 dead++;
93c2fb25 3880 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
3881 if (iter->fib6_nh.nh_dev == down_dev ||
3882 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
3883 dead++;
3884
3885 return dead;
3886}
3887
8d1c802b 3888static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed
IS
3889 const struct net_device *dev,
3890 unsigned int nh_flags)
3891{
8d1c802b 3892 struct fib6_info *iter;
1de178ed 3893
5e670d84
DA
3894 if (rt->fib6_nh.nh_dev == dev)
3895 rt->fib6_nh.nh_flags |= nh_flags;
93c2fb25 3896 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
3897 if (iter->fib6_nh.nh_dev == dev)
3898 iter->fib6_nh.nh_flags |= nh_flags;
1de178ed
IS
3899}
3900
a1a22c12 3901/* called with write lock held for table with rt */
8d1c802b 3902static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 3903{
4c981e28
IS
3904 const struct arg_netdev_event *arg = p_arg;
3905 const struct net_device *dev = arg->dev;
7aef6859 3906 struct net *net = dev_net(dev);
8ed67789 3907
421842ed 3908 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
3909 return 0;
3910
3911 switch (arg->event) {
3912 case NETDEV_UNREGISTER:
5e670d84 3913 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
27c6fa73 3914 case NETDEV_DOWN:
1de178ed 3915 if (rt->should_flush)
27c6fa73 3916 return -1;
93c2fb25 3917 if (!rt->fib6_nsiblings)
5e670d84 3918 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
1de178ed
IS
3919 if (rt6_multipath_uses_dev(rt, dev)) {
3920 unsigned int count;
3921
3922 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 3923 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
3924 rt6_multipath_flush(rt);
3925 return -1;
3926 }
3927 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3928 RTNH_F_LINKDOWN);
7aef6859 3929 fib6_update_sernum(net, rt);
d7dedee1 3930 rt6_multipath_rebalance(rt);
1de178ed
IS
3931 }
3932 return -2;
27c6fa73 3933 case NETDEV_CHANGE:
5e670d84 3934 if (rt->fib6_nh.nh_dev != dev ||
93c2fb25 3935 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 3936 break;
5e670d84 3937 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 3938 rt6_multipath_rebalance(rt);
27c6fa73 3939 break;
2b241361 3940 }
c159d30c 3941
1da177e4
LT
3942 return 0;
3943}
3944
27c6fa73 3945void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 3946{
4c981e28 3947 struct arg_netdev_event arg = {
8ed67789 3948 .dev = dev,
6802f3ad
IS
3949 {
3950 .event = event,
3951 },
8ed67789
DL
3952 };
3953
4c981e28
IS
3954 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
3955}
3956
3957void rt6_disable_ip(struct net_device *dev, unsigned long event)
3958{
3959 rt6_sync_down_dev(dev, event);
3960 rt6_uncached_list_flush_dev(dev_net(dev), dev);
3961 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
3962}
3963
95c96174 3964struct rt6_mtu_change_arg {
1da177e4 3965 struct net_device *dev;
95c96174 3966 unsigned int mtu;
1da177e4
LT
3967};
3968
8d1c802b 3969static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
3970{
3971 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3972 struct inet6_dev *idev;
3973
3974 /* In IPv6 pmtu discovery is not optional,
3975 so that RTAX_MTU lock cannot disable it.
3976 We still use this lock to block changes
3977 caused by addrconf/ndisc.
3978 */
3979
3980 idev = __in6_dev_get(arg->dev);
38308473 3981 if (!idev)
1da177e4
LT
3982 return 0;
3983
3984 /* For administrative MTU increase, there is no way to discover
3985 IPv6 PMTU increase, so PMTU increase should be updated here.
3986 Since RFC 1981 doesn't include administrative MTU increase
3987 update PMTU increase is a MUST. (i.e. jumbo frame)
3988 */
5e670d84 3989 if (rt->fib6_nh.nh_dev == arg->dev &&
d4ead6b3
DA
3990 !fib6_metric_locked(rt, RTAX_MTU)) {
3991 u32 mtu = rt->fib6_pmtu;
3992
3993 if (mtu >= arg->mtu ||
3994 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
3995 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
3996
f5bbe7ee 3997 spin_lock_bh(&rt6_exception_lock);
e9fa1495 3998 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 3999 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4000 }
1da177e4
LT
4001 return 0;
4002}
4003
95c96174 4004void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4005{
c71099ac
TG
4006 struct rt6_mtu_change_arg arg = {
4007 .dev = dev,
4008 .mtu = mtu,
4009 };
1da177e4 4010
0c3584d5 4011 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4012}
4013
ef7c79ed 4014static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4015 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 4016 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4017 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4018 [RTA_PRIORITY] = { .type = NLA_U32 },
4019 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4020 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4021 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4022 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4023 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4024 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4025 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4026 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
4027};
4028
4029static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4030 struct fib6_config *cfg,
4031 struct netlink_ext_ack *extack)
1da177e4 4032{
86872cb5
TG
4033 struct rtmsg *rtm;
4034 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4035 unsigned int pref;
86872cb5 4036 int err;
1da177e4 4037
fceb6435
JB
4038 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4039 NULL);
86872cb5
TG
4040 if (err < 0)
4041 goto errout;
1da177e4 4042
86872cb5
TG
4043 err = -EINVAL;
4044 rtm = nlmsg_data(nlh);
4045 memset(cfg, 0, sizeof(*cfg));
4046
4047 cfg->fc_table = rtm->rtm_table;
4048 cfg->fc_dst_len = rtm->rtm_dst_len;
4049 cfg->fc_src_len = rtm->rtm_src_len;
4050 cfg->fc_flags = RTF_UP;
4051 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 4052 cfg->fc_type = rtm->rtm_type;
86872cb5 4053
ef2c7d7b
ND
4054 if (rtm->rtm_type == RTN_UNREACHABLE ||
4055 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4056 rtm->rtm_type == RTN_PROHIBIT ||
4057 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4058 cfg->fc_flags |= RTF_REJECT;
4059
ab79ad14
4060 if (rtm->rtm_type == RTN_LOCAL)
4061 cfg->fc_flags |= RTF_LOCAL;
4062
1f56a01f
MKL
4063 if (rtm->rtm_flags & RTM_F_CLONED)
4064 cfg->fc_flags |= RTF_CACHE;
4065
fc1e64e1
DA
4066 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4067
15e47304 4068 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 4069 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 4070 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
4071
4072 if (tb[RTA_GATEWAY]) {
67b61f6c 4073 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4074 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4075 }
86872cb5
TG
4076
4077 if (tb[RTA_DST]) {
4078 int plen = (rtm->rtm_dst_len + 7) >> 3;
4079
4080 if (nla_len(tb[RTA_DST]) < plen)
4081 goto errout;
4082
4083 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4084 }
86872cb5
TG
4085
4086 if (tb[RTA_SRC]) {
4087 int plen = (rtm->rtm_src_len + 7) >> 3;
4088
4089 if (nla_len(tb[RTA_SRC]) < plen)
4090 goto errout;
4091
4092 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4093 }
86872cb5 4094
c3968a85 4095 if (tb[RTA_PREFSRC])
67b61f6c 4096 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4097
86872cb5
TG
4098 if (tb[RTA_OIF])
4099 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4100
4101 if (tb[RTA_PRIORITY])
4102 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4103
4104 if (tb[RTA_METRICS]) {
4105 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4106 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4107 }
86872cb5
TG
4108
4109 if (tb[RTA_TABLE])
4110 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4111
51ebd318
ND
4112 if (tb[RTA_MULTIPATH]) {
4113 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4114 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4115
4116 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4117 cfg->fc_mp_len, extack);
9ed59592
DA
4118 if (err < 0)
4119 goto errout;
51ebd318
ND
4120 }
4121
c78ba6d6
LR
4122 if (tb[RTA_PREF]) {
4123 pref = nla_get_u8(tb[RTA_PREF]);
4124 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4125 pref != ICMPV6_ROUTER_PREF_HIGH)
4126 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4127 cfg->fc_flags |= RTF_PREF(pref);
4128 }
4129
19e42e45
RP
4130 if (tb[RTA_ENCAP])
4131 cfg->fc_encap = tb[RTA_ENCAP];
4132
9ed59592 4133 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4134 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4135
c255bd68 4136 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4137 if (err < 0)
4138 goto errout;
4139 }
4140
32bc201e
XL
4141 if (tb[RTA_EXPIRES]) {
4142 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4143
4144 if (addrconf_finite_timeout(timeout)) {
4145 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4146 cfg->fc_flags |= RTF_EXPIRES;
4147 }
4148 }
4149
86872cb5
TG
4150 err = 0;
4151errout:
4152 return err;
1da177e4
LT
4153}
4154
6b9ea5a6 4155struct rt6_nh {
8d1c802b 4156 struct fib6_info *fib6_info;
6b9ea5a6 4157 struct fib6_config r_cfg;
6b9ea5a6
RP
4158 struct list_head next;
4159};
4160
4161static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4162{
4163 struct rt6_nh *nh;
4164
4165 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4166 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4167 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4168 nh->r_cfg.fc_ifindex);
4169 }
4170}
4171
d4ead6b3
DA
4172static int ip6_route_info_append(struct net *net,
4173 struct list_head *rt6_nh_list,
8d1c802b
DA
4174 struct fib6_info *rt,
4175 struct fib6_config *r_cfg)
6b9ea5a6
RP
4176{
4177 struct rt6_nh *nh;
6b9ea5a6
RP
4178 int err = -EEXIST;
4179
4180 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4181 /* check if fib6_info already exists */
4182 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4183 return err;
4184 }
4185
4186 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4187 if (!nh)
4188 return -ENOMEM;
8d1c802b 4189 nh->fib6_info = rt;
d4ead6b3 4190 err = ip6_convert_metrics(net, rt, r_cfg);
6b9ea5a6
RP
4191 if (err) {
4192 kfree(nh);
4193 return err;
4194 }
4195 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4196 list_add_tail(&nh->next, rt6_nh_list);
4197
4198 return 0;
4199}
4200
8d1c802b
DA
4201static void ip6_route_mpath_notify(struct fib6_info *rt,
4202 struct fib6_info *rt_last,
3b1137fe
DA
4203 struct nl_info *info,
4204 __u16 nlflags)
4205{
4206 /* if this is an APPEND route, then rt points to the first route
4207 * inserted and rt_last points to last route inserted. Userspace
4208 * wants a consistent dump of the route which starts at the first
4209 * nexthop. Since sibling routes are always added at the end of
4210 * the list, find the first sibling of the last route appended
4211 */
93c2fb25
DA
4212 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4213 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4214 struct fib6_info,
93c2fb25 4215 fib6_siblings);
3b1137fe
DA
4216 }
4217
4218 if (rt)
4219 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4220}
4221
333c4301
DA
4222static int ip6_route_multipath_add(struct fib6_config *cfg,
4223 struct netlink_ext_ack *extack)
51ebd318 4224{
8d1c802b 4225 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4226 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4227 struct fib6_config r_cfg;
4228 struct rtnexthop *rtnh;
8d1c802b 4229 struct fib6_info *rt;
6b9ea5a6
RP
4230 struct rt6_nh *err_nh;
4231 struct rt6_nh *nh, *nh_safe;
3b1137fe 4232 __u16 nlflags;
51ebd318
ND
4233 int remaining;
4234 int attrlen;
6b9ea5a6
RP
4235 int err = 1;
4236 int nhn = 0;
4237 int replace = (cfg->fc_nlinfo.nlh &&
4238 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4239 LIST_HEAD(rt6_nh_list);
51ebd318 4240
3b1137fe
DA
4241 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4242 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4243 nlflags |= NLM_F_APPEND;
4244
35f1b4e9 4245 remaining = cfg->fc_mp_len;
51ebd318 4246 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4247
6b9ea5a6 4248 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4249 * fib6_info structs per nexthop
6b9ea5a6 4250 */
51ebd318
ND
4251 while (rtnh_ok(rtnh, remaining)) {
4252 memcpy(&r_cfg, cfg, sizeof(*cfg));
4253 if (rtnh->rtnh_ifindex)
4254 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4255
4256 attrlen = rtnh_attrlen(rtnh);
4257 if (attrlen > 0) {
4258 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4259
4260 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4261 if (nla) {
67b61f6c 4262 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4263 r_cfg.fc_flags |= RTF_GATEWAY;
4264 }
19e42e45
RP
4265 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4266 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4267 if (nla)
4268 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4269 }
6b9ea5a6 4270
68e2ffde 4271 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4272 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4273 if (IS_ERR(rt)) {
4274 err = PTR_ERR(rt);
4275 rt = NULL;
6b9ea5a6 4276 goto cleanup;
8c5b83f0 4277 }
6b9ea5a6 4278
5e670d84 4279 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
398958ae 4280
d4ead6b3
DA
4281 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4282 rt, &r_cfg);
51ebd318 4283 if (err) {
93531c67 4284 fib6_info_release(rt);
6b9ea5a6
RP
4285 goto cleanup;
4286 }
4287
4288 rtnh = rtnh_next(rtnh, &remaining);
4289 }
4290
3b1137fe
DA
4291 /* for add and replace send one notification with all nexthops.
4292 * Skip the notification in fib6_add_rt2node and send one with
4293 * the full route when done
4294 */
4295 info->skip_notify = 1;
4296
6b9ea5a6
RP
4297 err_nh = NULL;
4298 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4299 rt_last = nh->fib6_info;
4300 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4301 fib6_info_release(nh->fib6_info);
93531c67 4302
3b1137fe
DA
4303 /* save reference to first route for notification */
4304 if (!rt_notif && !err)
8d1c802b 4305 rt_notif = nh->fib6_info;
3b1137fe 4306
8d1c802b
DA
4307 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4308 nh->fib6_info = NULL;
6b9ea5a6
RP
4309 if (err) {
4310 if (replace && nhn)
4311 ip6_print_replace_route_err(&rt6_nh_list);
4312 err_nh = nh;
4313 goto add_errout;
51ebd318 4314 }
6b9ea5a6 4315
1a72418b 4316 /* Because each route is added like a single route we remove
27596472
MK
4317 * these flags after the first nexthop: if there is a collision,
4318 * we have already failed to add the first nexthop:
4319 * fib6_add_rt2node() has rejected it; when replacing, old
4320 * nexthops have been replaced by first new, the rest should
4321 * be added to it.
1a72418b 4322 */
27596472
MK
4323 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4324 NLM_F_REPLACE);
6b9ea5a6
RP
4325 nhn++;
4326 }
4327
3b1137fe
DA
4328 /* success ... tell user about new route */
4329 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4330 goto cleanup;
4331
4332add_errout:
3b1137fe
DA
4333 /* send notification for routes that were added so that
4334 * the delete notifications sent by ip6_route_del are
4335 * coherent
4336 */
4337 if (rt_notif)
4338 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4339
6b9ea5a6
RP
4340 /* Delete routes that were already added */
4341 list_for_each_entry(nh, &rt6_nh_list, next) {
4342 if (err_nh == nh)
4343 break;
333c4301 4344 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4345 }
4346
4347cleanup:
4348 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4349 if (nh->fib6_info)
4350 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4351 list_del(&nh->next);
4352 kfree(nh);
4353 }
4354
4355 return err;
4356}
4357
333c4301
DA
4358static int ip6_route_multipath_del(struct fib6_config *cfg,
4359 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4360{
4361 struct fib6_config r_cfg;
4362 struct rtnexthop *rtnh;
4363 int remaining;
4364 int attrlen;
4365 int err = 1, last_err = 0;
4366
4367 remaining = cfg->fc_mp_len;
4368 rtnh = (struct rtnexthop *)cfg->fc_mp;
4369
4370 /* Parse a Multipath Entry */
4371 while (rtnh_ok(rtnh, remaining)) {
4372 memcpy(&r_cfg, cfg, sizeof(*cfg));
4373 if (rtnh->rtnh_ifindex)
4374 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4375
4376 attrlen = rtnh_attrlen(rtnh);
4377 if (attrlen > 0) {
4378 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4379
4380 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4381 if (nla) {
4382 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4383 r_cfg.fc_flags |= RTF_GATEWAY;
4384 }
4385 }
333c4301 4386 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4387 if (err)
4388 last_err = err;
4389
51ebd318
ND
4390 rtnh = rtnh_next(rtnh, &remaining);
4391 }
4392
4393 return last_err;
4394}
4395
c21ef3e3
DA
4396static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4397 struct netlink_ext_ack *extack)
1da177e4 4398{
86872cb5
TG
4399 struct fib6_config cfg;
4400 int err;
1da177e4 4401
333c4301 4402 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4403 if (err < 0)
4404 return err;
4405
51ebd318 4406 if (cfg.fc_mp)
333c4301 4407 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4408 else {
4409 cfg.fc_delete_all_nh = 1;
333c4301 4410 return ip6_route_del(&cfg, extack);
0ae81335 4411 }
1da177e4
LT
4412}
4413
c21ef3e3
DA
4414static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4415 struct netlink_ext_ack *extack)
1da177e4 4416{
86872cb5
TG
4417 struct fib6_config cfg;
4418 int err;
1da177e4 4419
333c4301 4420 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4421 if (err < 0)
4422 return err;
4423
51ebd318 4424 if (cfg.fc_mp)
333c4301 4425 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4426 else
acb54e3c 4427 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4428}
4429
8d1c802b 4430static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4431{
beb1afac
DA
4432 int nexthop_len = 0;
4433
93c2fb25 4434 if (rt->fib6_nsiblings) {
beb1afac
DA
4435 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4436 + NLA_ALIGN(sizeof(struct rtnexthop))
4437 + nla_total_size(16) /* RTA_GATEWAY */
5e670d84 4438 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
beb1afac 4439
93c2fb25 4440 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4441 }
4442
339bf98f
TG
4443 return NLMSG_ALIGN(sizeof(struct rtmsg))
4444 + nla_total_size(16) /* RTA_SRC */
4445 + nla_total_size(16) /* RTA_DST */
4446 + nla_total_size(16) /* RTA_GATEWAY */
4447 + nla_total_size(16) /* RTA_PREFSRC */
4448 + nla_total_size(4) /* RTA_TABLE */
4449 + nla_total_size(4) /* RTA_IIF */
4450 + nla_total_size(4) /* RTA_OIF */
4451 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4452 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4453 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4454 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4455 + nla_total_size(1) /* RTA_PREF */
5e670d84 4456 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
beb1afac
DA
4457 + nexthop_len;
4458}
4459
8d1c802b 4460static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
5be083ce 4461 unsigned int *flags, bool skip_oif)
beb1afac 4462{
5e670d84 4463 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
f9d882ea
IS
4464 *flags |= RTNH_F_DEAD;
4465
5e670d84 4466 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
beb1afac 4467 *flags |= RTNH_F_LINKDOWN;
dcd1f572
DA
4468
4469 rcu_read_lock();
4470 if (fib6_ignore_linkdown(rt))
beb1afac 4471 *flags |= RTNH_F_DEAD;
dcd1f572 4472 rcu_read_unlock();
beb1afac
DA
4473 }
4474
93c2fb25 4475 if (rt->fib6_flags & RTF_GATEWAY) {
5e670d84 4476 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
beb1afac
DA
4477 goto nla_put_failure;
4478 }
4479
5e670d84
DA
4480 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4481 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4482 *flags |= RTNH_F_OFFLOAD;
4483
5be083ce 4484 /* not needed for multipath encoding b/c it has a rtnexthop struct */
5e670d84
DA
4485 if (!skip_oif && rt->fib6_nh.nh_dev &&
4486 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
beb1afac
DA
4487 goto nla_put_failure;
4488
5e670d84
DA
4489 if (rt->fib6_nh.nh_lwtstate &&
4490 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
beb1afac
DA
4491 goto nla_put_failure;
4492
4493 return 0;
4494
4495nla_put_failure:
4496 return -EMSGSIZE;
4497}
4498
5be083ce 4499/* add multipath next hop */
8d1c802b 4500static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
beb1afac 4501{
5e670d84 4502 const struct net_device *dev = rt->fib6_nh.nh_dev;
beb1afac
DA
4503 struct rtnexthop *rtnh;
4504 unsigned int flags = 0;
4505
4506 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4507 if (!rtnh)
4508 goto nla_put_failure;
4509
5e670d84
DA
4510 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4511 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
beb1afac 4512
5be083ce 4513 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4514 goto nla_put_failure;
4515
4516 rtnh->rtnh_flags = flags;
4517
4518 /* length of rtnetlink header + attributes */
4519 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4520
4521 return 0;
4522
4523nla_put_failure:
4524 return -EMSGSIZE;
339bf98f
TG
4525}
4526
d4ead6b3 4527static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4528 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4529 struct in6_addr *dest, struct in6_addr *src,
15e47304 4530 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4531 unsigned int flags)
1da177e4
LT
4532{
4533 struct rtmsg *rtm;
2d7202bf 4534 struct nlmsghdr *nlh;
d4ead6b3
DA
4535 long expires = 0;
4536 u32 *pmetrics;
9e762a4a 4537 u32 table;
1da177e4 4538
15e47304 4539 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4540 if (!nlh)
26932566 4541 return -EMSGSIZE;
2d7202bf
TG
4542
4543 rtm = nlmsg_data(nlh);
1da177e4 4544 rtm->rtm_family = AF_INET6;
93c2fb25
DA
4545 rtm->rtm_dst_len = rt->fib6_dst.plen;
4546 rtm->rtm_src_len = rt->fib6_src.plen;
1da177e4 4547 rtm->rtm_tos = 0;
93c2fb25
DA
4548 if (rt->fib6_table)
4549 table = rt->fib6_table->tb6_id;
c71099ac 4550 else
9e762a4a
PM
4551 table = RT6_TABLE_UNSPEC;
4552 rtm->rtm_table = table;
c78679e8
DM
4553 if (nla_put_u32(skb, RTA_TABLE, table))
4554 goto nla_put_failure;
e8478e80
DA
4555
4556 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4557 rtm->rtm_flags = 0;
4558 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4559 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4560
93c2fb25 4561 if (rt->fib6_flags & RTF_CACHE)
1da177e4
LT
4562 rtm->rtm_flags |= RTM_F_CLONED;
4563
d4ead6b3
DA
4564 if (dest) {
4565 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4566 goto nla_put_failure;
1ab1457c 4567 rtm->rtm_dst_len = 128;
1da177e4 4568 } else if (rtm->rtm_dst_len)
93c2fb25 4569 if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr))
c78679e8 4570 goto nla_put_failure;
1da177e4
LT
4571#ifdef CONFIG_IPV6_SUBTREES
4572 if (src) {
930345ea 4573 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4574 goto nla_put_failure;
1ab1457c 4575 rtm->rtm_src_len = 128;
c78679e8 4576 } else if (rtm->rtm_src_len &&
93c2fb25 4577 nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr))
c78679e8 4578 goto nla_put_failure;
1da177e4 4579#endif
7bc570c8
YH
4580 if (iif) {
4581#ifdef CONFIG_IPV6_MROUTE
93c2fb25 4582 if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) {
fd61c6ba
DA
4583 int err = ip6mr_get_route(net, skb, rtm, portid);
4584
4585 if (err == 0)
4586 return 0;
4587 if (err < 0)
4588 goto nla_put_failure;
7bc570c8
YH
4589 } else
4590#endif
c78679e8
DM
4591 if (nla_put_u32(skb, RTA_IIF, iif))
4592 goto nla_put_failure;
d4ead6b3 4593 } else if (dest) {
1da177e4 4594 struct in6_addr saddr_buf;
d4ead6b3 4595 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4596 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4597 goto nla_put_failure;
1da177e4 4598 }
2d7202bf 4599
93c2fb25 4600 if (rt->fib6_prefsrc.plen) {
c3968a85 4601 struct in6_addr saddr_buf;
93c2fb25 4602 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4603 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4604 goto nla_put_failure;
c3968a85
DW
4605 }
4606
d4ead6b3
DA
4607 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4608 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4609 goto nla_put_failure;
4610
93c2fb25 4611 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 4612 goto nla_put_failure;
8253947e 4613
beb1afac
DA
4614 /* For multipath routes, walk the siblings list and add
4615 * each as a nexthop within RTA_MULTIPATH.
4616 */
93c2fb25 4617 if (rt->fib6_nsiblings) {
8d1c802b 4618 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
4619 struct nlattr *mp;
4620
4621 mp = nla_nest_start(skb, RTA_MULTIPATH);
4622 if (!mp)
4623 goto nla_put_failure;
4624
4625 if (rt6_add_nexthop(skb, rt) < 0)
4626 goto nla_put_failure;
4627
4628 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 4629 &rt->fib6_siblings, fib6_siblings) {
beb1afac
DA
4630 if (rt6_add_nexthop(skb, sibling) < 0)
4631 goto nla_put_failure;
4632 }
4633
4634 nla_nest_end(skb, mp);
4635 } else {
5be083ce 4636 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4637 goto nla_put_failure;
4638 }
4639
93c2fb25 4640 if (rt->fib6_flags & RTF_EXPIRES) {
14895687
DA
4641 expires = dst ? dst->expires : rt->expires;
4642 expires -= jiffies;
4643 }
69cdf8f9 4644
d4ead6b3 4645 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4646 goto nla_put_failure;
2d7202bf 4647
93c2fb25 4648 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags)))
c78ba6d6
LR
4649 goto nla_put_failure;
4650
19e42e45 4651
053c095a
JB
4652 nlmsg_end(skb, nlh);
4653 return 0;
2d7202bf
TG
4654
4655nla_put_failure:
26932566
PM
4656 nlmsg_cancel(skb, nlh);
4657 return -EMSGSIZE;
1da177e4
LT
4658}
4659
8d1c802b 4660int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4661{
4662 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4663 struct net *net = arg->net;
4664
421842ed 4665 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4666 return 0;
1da177e4 4667
2d7202bf
TG
4668 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4669 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4670
4671 /* user wants prefix routes only */
4672 if (rtm->rtm_flags & RTM_F_PREFIX &&
93c2fb25 4673 !(rt->fib6_flags & RTF_PREFIX_RT)) {
f8cfe2ce
DA
4674 /* success since this is not a prefix route */
4675 return 1;
4676 }
4677 }
1da177e4 4678
d4ead6b3
DA
4679 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4680 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4681 arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
1da177e4
LT
4682}
4683
c21ef3e3
DA
4684static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4685 struct netlink_ext_ack *extack)
1da177e4 4686{
3b1e0a65 4687 struct net *net = sock_net(in_skb->sk);
ab364a6f 4688 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
4689 int err, iif = 0, oif = 0;
4690 struct dst_entry *dst;
ab364a6f 4691 struct rt6_info *rt;
1da177e4 4692 struct sk_buff *skb;
ab364a6f 4693 struct rtmsg *rtm;
4c9483b2 4694 struct flowi6 fl6;
18c3a61c 4695 bool fibmatch;
1da177e4 4696
fceb6435 4697 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4698 extack);
ab364a6f
TG
4699 if (err < 0)
4700 goto errout;
1da177e4 4701
ab364a6f 4702 err = -EINVAL;
4c9483b2 4703 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4704 rtm = nlmsg_data(nlh);
4705 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4706 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4707
ab364a6f
TG
4708 if (tb[RTA_SRC]) {
4709 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4710 goto errout;
4711
4e3fd7a0 4712 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4713 }
4714
4715 if (tb[RTA_DST]) {
4716 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4717 goto errout;
4718
4e3fd7a0 4719 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4720 }
4721
4722 if (tb[RTA_IIF])
4723 iif = nla_get_u32(tb[RTA_IIF]);
4724
4725 if (tb[RTA_OIF])
72331bc0 4726 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4727
2e47b291
LC
4728 if (tb[RTA_MARK])
4729 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4730
622ec2c9
LC
4731 if (tb[RTA_UID])
4732 fl6.flowi6_uid = make_kuid(current_user_ns(),
4733 nla_get_u32(tb[RTA_UID]));
4734 else
4735 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4736
1da177e4
LT
4737 if (iif) {
4738 struct net_device *dev;
72331bc0
SL
4739 int flags = 0;
4740
121622db
FW
4741 rcu_read_lock();
4742
4743 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4744 if (!dev) {
121622db 4745 rcu_read_unlock();
1da177e4 4746 err = -ENODEV;
ab364a6f 4747 goto errout;
1da177e4 4748 }
72331bc0
SL
4749
4750 fl6.flowi6_iif = iif;
4751
4752 if (!ipv6_addr_any(&fl6.saddr))
4753 flags |= RT6_LOOKUP_F_HAS_SADDR;
4754
b75cc8f9 4755 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4756
4757 rcu_read_unlock();
72331bc0
SL
4758 } else {
4759 fl6.flowi6_oif = oif;
4760
58acfd71 4761 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4762 }
4763
18c3a61c
RP
4764
4765 rt = container_of(dst, struct rt6_info, dst);
4766 if (rt->dst.error) {
4767 err = rt->dst.error;
4768 ip6_rt_put(rt);
4769 goto errout;
1da177e4
LT
4770 }
4771
9d6acb3b
WC
4772 if (rt == net->ipv6.ip6_null_entry) {
4773 err = rt->dst.error;
4774 ip6_rt_put(rt);
4775 goto errout;
4776 }
4777
ab364a6f 4778 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4779 if (!skb) {
94e187c0 4780 ip6_rt_put(rt);
ab364a6f
TG
4781 err = -ENOBUFS;
4782 goto errout;
4783 }
1da177e4 4784
d8d1f30b 4785 skb_dst_set(skb, &rt->dst);
18c3a61c 4786 if (fibmatch)
93531c67 4787 err = rt6_fill_node(net, skb, rt->from, NULL, NULL, NULL, iif,
18c3a61c
RP
4788 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4789 nlh->nlmsg_seq, 0);
4790 else
93531c67
DA
4791 err = rt6_fill_node(net, skb, rt->from, dst,
4792 &fl6.daddr, &fl6.saddr, iif, RTM_NEWROUTE,
d4ead6b3
DA
4793 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4794 0);
1da177e4 4795 if (err < 0) {
ab364a6f
TG
4796 kfree_skb(skb);
4797 goto errout;
1da177e4
LT
4798 }
4799
15e47304 4800 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4801errout:
1da177e4 4802 return err;
1da177e4
LT
4803}
4804
8d1c802b 4805void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 4806 unsigned int nlm_flags)
1da177e4
LT
4807{
4808 struct sk_buff *skb;
5578689a 4809 struct net *net = info->nl_net;
528c4ceb
DL
4810 u32 seq;
4811 int err;
4812
4813 err = -ENOBUFS;
38308473 4814 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4815
19e42e45 4816 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4817 if (!skb)
21713ebc
TG
4818 goto errout;
4819
d4ead6b3
DA
4820 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4821 event, info->portid, seq, nlm_flags);
26932566
PM
4822 if (err < 0) {
4823 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4824 WARN_ON(err == -EMSGSIZE);
4825 kfree_skb(skb);
4826 goto errout;
4827 }
15e47304 4828 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4829 info->nlh, gfp_any());
4830 return;
21713ebc
TG
4831errout:
4832 if (err < 0)
5578689a 4833 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4834}
4835
8ed67789 4836static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4837 unsigned long event, void *ptr)
8ed67789 4838{
351638e7 4839 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4840 struct net *net = dev_net(dev);
8ed67789 4841
242d3a49
WC
4842 if (!(dev->flags & IFF_LOOPBACK))
4843 return NOTIFY_OK;
4844
4845 if (event == NETDEV_REGISTER) {
421842ed 4846 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
d8d1f30b 4847 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4848 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4849#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4850 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4851 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4852 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4853 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4854#endif
76da0704
WC
4855 } else if (event == NETDEV_UNREGISTER &&
4856 dev->reg_state != NETREG_UNREGISTERED) {
4857 /* NETDEV_UNREGISTER could be fired for multiple times by
4858 * netdev_wait_allrefs(). Make sure we only call this once.
4859 */
12d94a80 4860 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4861#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4862 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4863 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4864#endif
4865 }
4866
4867 return NOTIFY_OK;
4868}
4869
1da177e4
LT
4870/*
4871 * /proc
4872 */
4873
4874#ifdef CONFIG_PROC_FS
4875
33120b30 4876static const struct file_operations ipv6_route_proc_fops = {
33120b30
AD
4877 .open = ipv6_route_open,
4878 .read = seq_read,
4879 .llseek = seq_lseek,
8d2ca1d7 4880 .release = seq_release_net,
33120b30
AD
4881};
4882
1da177e4
LT
4883static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4884{
69ddb805 4885 struct net *net = (struct net *)seq->private;
1da177e4 4886 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4887 net->ipv6.rt6_stats->fib_nodes,
4888 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 4889 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
4890 net->ipv6.rt6_stats->fib_rt_entries,
4891 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4892 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4893 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4894
4895 return 0;
4896}
4897
4898static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4899{
de05c557 4900 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4901}
4902
9a32144e 4903static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4904 .open = rt6_stats_seq_open,
4905 .read = seq_read,
4906 .llseek = seq_lseek,
b6fcbdb4 4907 .release = single_release_net,
1da177e4
LT
4908};
4909#endif /* CONFIG_PROC_FS */
4910
4911#ifdef CONFIG_SYSCTL
4912
1da177e4 4913static
fe2c6338 4914int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4915 void __user *buffer, size_t *lenp, loff_t *ppos)
4916{
c486da34
LAG
4917 struct net *net;
4918 int delay;
4919 if (!write)
1da177e4 4920 return -EINVAL;
c486da34
LAG
4921
4922 net = (struct net *)ctl->extra1;
4923 delay = net->ipv6.sysctl.flush_delay;
4924 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4925 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4926 return 0;
1da177e4
LT
4927}
4928
fe2c6338 4929struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4930 {
1da177e4 4931 .procname = "flush",
4990509f 4932 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4933 .maxlen = sizeof(int),
89c8b3a1 4934 .mode = 0200,
6d9f239a 4935 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4936 },
4937 {
1da177e4 4938 .procname = "gc_thresh",
9a7ec3a9 4939 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4940 .maxlen = sizeof(int),
4941 .mode = 0644,
6d9f239a 4942 .proc_handler = proc_dointvec,
1da177e4
LT
4943 },
4944 {
1da177e4 4945 .procname = "max_size",
4990509f 4946 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
4947 .maxlen = sizeof(int),
4948 .mode = 0644,
6d9f239a 4949 .proc_handler = proc_dointvec,
1da177e4
LT
4950 },
4951 {
1da177e4 4952 .procname = "gc_min_interval",
4990509f 4953 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4954 .maxlen = sizeof(int),
4955 .mode = 0644,
6d9f239a 4956 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4957 },
4958 {
1da177e4 4959 .procname = "gc_timeout",
4990509f 4960 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
4961 .maxlen = sizeof(int),
4962 .mode = 0644,
6d9f239a 4963 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4964 },
4965 {
1da177e4 4966 .procname = "gc_interval",
4990509f 4967 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
4968 .maxlen = sizeof(int),
4969 .mode = 0644,
6d9f239a 4970 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4971 },
4972 {
1da177e4 4973 .procname = "gc_elasticity",
4990509f 4974 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
4975 .maxlen = sizeof(int),
4976 .mode = 0644,
f3d3f616 4977 .proc_handler = proc_dointvec,
1da177e4
LT
4978 },
4979 {
1da177e4 4980 .procname = "mtu_expires",
4990509f 4981 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
4982 .maxlen = sizeof(int),
4983 .mode = 0644,
6d9f239a 4984 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4985 },
4986 {
1da177e4 4987 .procname = "min_adv_mss",
4990509f 4988 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
4989 .maxlen = sizeof(int),
4990 .mode = 0644,
f3d3f616 4991 .proc_handler = proc_dointvec,
1da177e4
LT
4992 },
4993 {
1da177e4 4994 .procname = "gc_min_interval_ms",
4990509f 4995 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4996 .maxlen = sizeof(int),
4997 .mode = 0644,
6d9f239a 4998 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 4999 },
f8572d8f 5000 { }
1da177e4
LT
5001};
5002
2c8c1e72 5003struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5004{
5005 struct ctl_table *table;
5006
5007 table = kmemdup(ipv6_route_table_template,
5008 sizeof(ipv6_route_table_template),
5009 GFP_KERNEL);
5ee09105
YH
5010
5011 if (table) {
5012 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5013 table[0].extra1 = net;
86393e52 5014 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5015 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5016 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5017 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5018 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5019 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5020 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5021 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5022 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
5023
5024 /* Don't export sysctls to unprivileged users */
5025 if (net->user_ns != &init_user_ns)
5026 table[0].procname = NULL;
5ee09105
YH
5027 }
5028
760f2d01
DL
5029 return table;
5030}
1da177e4
LT
5031#endif
5032
2c8c1e72 5033static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5034{
633d424b 5035 int ret = -ENOMEM;
8ed67789 5036
86393e52
AD
5037 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5038 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5039
fc66f95c
ED
5040 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5041 goto out_ip6_dst_ops;
5042
421842ed
DA
5043 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5044 sizeof(*net->ipv6.fib6_null_entry),
5045 GFP_KERNEL);
5046 if (!net->ipv6.fib6_null_entry)
5047 goto out_ip6_dst_entries;
5048
8ed67789
DL
5049 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5050 sizeof(*net->ipv6.ip6_null_entry),
5051 GFP_KERNEL);
5052 if (!net->ipv6.ip6_null_entry)
421842ed 5053 goto out_fib6_null_entry;
d8d1f30b 5054 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5055 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5056 ip6_template_metrics, true);
8ed67789
DL
5057
5058#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5059 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5060 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5061 sizeof(*net->ipv6.ip6_prohibit_entry),
5062 GFP_KERNEL);
68fffc67
PZ
5063 if (!net->ipv6.ip6_prohibit_entry)
5064 goto out_ip6_null_entry;
d8d1f30b 5065 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5066 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5067 ip6_template_metrics, true);
8ed67789
DL
5068
5069 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5070 sizeof(*net->ipv6.ip6_blk_hole_entry),
5071 GFP_KERNEL);
68fffc67
PZ
5072 if (!net->ipv6.ip6_blk_hole_entry)
5073 goto out_ip6_prohibit_entry;
d8d1f30b 5074 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5075 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5076 ip6_template_metrics, true);
8ed67789
DL
5077#endif
5078
b339a47c
PZ
5079 net->ipv6.sysctl.flush_delay = 0;
5080 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5081 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5082 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5083 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5084 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5085 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5086 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5087
6891a346
BT
5088 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5089
8ed67789
DL
5090 ret = 0;
5091out:
5092 return ret;
f2fc6a54 5093
68fffc67
PZ
5094#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5095out_ip6_prohibit_entry:
5096 kfree(net->ipv6.ip6_prohibit_entry);
5097out_ip6_null_entry:
5098 kfree(net->ipv6.ip6_null_entry);
5099#endif
421842ed
DA
5100out_fib6_null_entry:
5101 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5102out_ip6_dst_entries:
5103 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5104out_ip6_dst_ops:
f2fc6a54 5105 goto out;
cdb18761
DL
5106}
5107
2c8c1e72 5108static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5109{
421842ed 5110 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5111 kfree(net->ipv6.ip6_null_entry);
5112#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5113 kfree(net->ipv6.ip6_prohibit_entry);
5114 kfree(net->ipv6.ip6_blk_hole_entry);
5115#endif
41bb78b4 5116 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5117}
5118
d189634e
TG
5119static int __net_init ip6_route_net_init_late(struct net *net)
5120{
5121#ifdef CONFIG_PROC_FS
d4beaa66 5122 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
d6444062 5123 proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
5124#endif
5125 return 0;
5126}
5127
5128static void __net_exit ip6_route_net_exit_late(struct net *net)
5129{
5130#ifdef CONFIG_PROC_FS
ece31ffd
G
5131 remove_proc_entry("ipv6_route", net->proc_net);
5132 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5133#endif
5134}
5135
cdb18761
DL
5136static struct pernet_operations ip6_route_net_ops = {
5137 .init = ip6_route_net_init,
5138 .exit = ip6_route_net_exit,
5139};
5140
c3426b47
DM
5141static int __net_init ipv6_inetpeer_init(struct net *net)
5142{
5143 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5144
5145 if (!bp)
5146 return -ENOMEM;
5147 inet_peer_base_init(bp);
5148 net->ipv6.peers = bp;
5149 return 0;
5150}
5151
5152static void __net_exit ipv6_inetpeer_exit(struct net *net)
5153{
5154 struct inet_peer_base *bp = net->ipv6.peers;
5155
5156 net->ipv6.peers = NULL;
56a6b248 5157 inetpeer_invalidate_tree(bp);
c3426b47
DM
5158 kfree(bp);
5159}
5160
2b823f72 5161static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5162 .init = ipv6_inetpeer_init,
5163 .exit = ipv6_inetpeer_exit,
5164};
5165
d189634e
TG
5166static struct pernet_operations ip6_route_net_late_ops = {
5167 .init = ip6_route_net_init_late,
5168 .exit = ip6_route_net_exit_late,
5169};
5170
8ed67789
DL
5171static struct notifier_block ip6_route_dev_notifier = {
5172 .notifier_call = ip6_route_dev_notify,
242d3a49 5173 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5174};
5175
2f460933
WC
5176void __init ip6_route_init_special_entries(void)
5177{
5178 /* Registering of the loopback is done before this portion of code,
5179 * the loopback reference in rt6_info will not be taken, do it
5180 * manually for init_net */
421842ed 5181 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
2f460933
WC
5182 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5183 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5184 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5185 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5186 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5187 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5188 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5189 #endif
5190}
5191
433d49c3 5192int __init ip6_route_init(void)
1da177e4 5193{
433d49c3 5194 int ret;
8d0b94af 5195 int cpu;
433d49c3 5196
9a7ec3a9
DL
5197 ret = -ENOMEM;
5198 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5199 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5200 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5201 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5202 goto out;
14e50e57 5203
fc66f95c 5204 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5205 if (ret)
bdb3289f 5206 goto out_kmem_cache;
bdb3289f 5207
c3426b47
DM
5208 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5209 if (ret)
e8803b6c 5210 goto out_dst_entries;
2a0c451a 5211
7e52b33b
DM
5212 ret = register_pernet_subsys(&ip6_route_net_ops);
5213 if (ret)
5214 goto out_register_inetpeer;
c3426b47 5215
5dc121e9
AE
5216 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5217
e8803b6c 5218 ret = fib6_init();
433d49c3 5219 if (ret)
8ed67789 5220 goto out_register_subsys;
433d49c3 5221
433d49c3
DL
5222 ret = xfrm6_init();
5223 if (ret)
e8803b6c 5224 goto out_fib6_init;
c35b7e72 5225
433d49c3
DL
5226 ret = fib6_rules_init();
5227 if (ret)
5228 goto xfrm6_init;
7e5449c2 5229
d189634e
TG
5230 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5231 if (ret)
5232 goto fib6_rules_init;
5233
16feebcf
FW
5234 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5235 inet6_rtm_newroute, NULL, 0);
5236 if (ret < 0)
5237 goto out_register_late_subsys;
5238
5239 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5240 inet6_rtm_delroute, NULL, 0);
5241 if (ret < 0)
5242 goto out_register_late_subsys;
5243
5244 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5245 inet6_rtm_getroute, NULL,
5246 RTNL_FLAG_DOIT_UNLOCKED);
5247 if (ret < 0)
d189634e 5248 goto out_register_late_subsys;
c127ea2c 5249
8ed67789 5250 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5251 if (ret)
d189634e 5252 goto out_register_late_subsys;
8ed67789 5253
8d0b94af
MKL
5254 for_each_possible_cpu(cpu) {
5255 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5256
5257 INIT_LIST_HEAD(&ul->head);
5258 spin_lock_init(&ul->lock);
5259 }
5260
433d49c3
DL
5261out:
5262 return ret;
5263
d189634e 5264out_register_late_subsys:
16feebcf 5265 rtnl_unregister_all(PF_INET6);
d189634e 5266 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5267fib6_rules_init:
433d49c3
DL
5268 fib6_rules_cleanup();
5269xfrm6_init:
433d49c3 5270 xfrm6_fini();
2a0c451a
TG
5271out_fib6_init:
5272 fib6_gc_cleanup();
8ed67789
DL
5273out_register_subsys:
5274 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5275out_register_inetpeer:
5276 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5277out_dst_entries:
5278 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5279out_kmem_cache:
f2fc6a54 5280 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5281 goto out;
1da177e4
LT
5282}
5283
5284void ip6_route_cleanup(void)
5285{
8ed67789 5286 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5287 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5288 fib6_rules_cleanup();
1da177e4 5289 xfrm6_fini();
1da177e4 5290 fib6_gc_cleanup();
c3426b47 5291 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5292 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5293 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5294 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5295}