net/ipv6: Make from in rt6_info rcu protected
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
8d1c802b
DA
99static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
100static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 101static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 102 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 103 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
104 int iif, int type, u32 portid, u32 seq,
105 unsigned int flags);
8d1c802b 106static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
107 struct in6_addr *daddr,
108 struct in6_addr *saddr);
1da177e4 109
70ceb4f5 110#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 111static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 112 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
113 const struct in6_addr *gwaddr,
114 struct net_device *dev,
95c96174 115 unsigned int pref);
8d1c802b 116static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 117 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
118 const struct in6_addr *gwaddr,
119 struct net_device *dev);
70ceb4f5
YH
120#endif
121
8d0b94af
MKL
122struct uncached_list {
123 spinlock_t lock;
124 struct list_head head;
125};
126
127static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
128
510c321b 129void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
130{
131 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
132
8d0b94af
MKL
133 rt->rt6i_uncached_list = ul;
134
135 spin_lock_bh(&ul->lock);
136 list_add_tail(&rt->rt6i_uncached, &ul->head);
137 spin_unlock_bh(&ul->lock);
138}
139
510c321b 140void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
141{
142 if (!list_empty(&rt->rt6i_uncached)) {
143 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 144 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
145
146 spin_lock_bh(&ul->lock);
147 list_del(&rt->rt6i_uncached);
81eb8447 148 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
149 spin_unlock_bh(&ul->lock);
150 }
151}
152
153static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154{
155 struct net_device *loopback_dev = net->loopback_dev;
156 int cpu;
157
e332bc67
EB
158 if (dev == loopback_dev)
159 return;
160
8d0b94af
MKL
161 for_each_possible_cpu(cpu) {
162 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 struct rt6_info *rt;
164
165 spin_lock_bh(&ul->lock);
166 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 struct inet6_dev *rt_idev = rt->rt6i_idev;
168 struct net_device *rt_dev = rt->dst.dev;
169
e332bc67 170 if (rt_idev->dev == dev) {
8d0b94af
MKL
171 rt->rt6i_idev = in6_dev_get(loopback_dev);
172 in6_dev_put(rt_idev);
173 }
174
e332bc67 175 if (rt_dev == dev) {
8d0b94af
MKL
176 rt->dst.dev = loopback_dev;
177 dev_hold(rt->dst.dev);
178 dev_put(rt_dev);
179 }
180 }
181 spin_unlock_bh(&ul->lock);
182 }
183}
184
f8a1b43b 185static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
186 struct sk_buff *skb,
187 const void *daddr)
39232973 188{
a7563f34 189 if (!ipv6_addr_any(p))
39232973 190 return (const void *) p;
f894cbf8
DM
191 else if (skb)
192 return &ipv6_hdr(skb)->daddr;
39232973
DM
193 return daddr;
194}
195
f8a1b43b
DA
196struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
197 struct net_device *dev,
198 struct sk_buff *skb,
199 const void *daddr)
d3aaeb38 200{
39232973
DM
201 struct neighbour *n;
202
f8a1b43b
DA
203 daddr = choose_neigh_daddr(gw, skb, daddr);
204 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
205 if (n)
206 return n;
f8a1b43b
DA
207 return neigh_create(&nd_tbl, daddr, dev);
208}
209
210static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
211 struct sk_buff *skb,
212 const void *daddr)
213{
214 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
215
216 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
217}
218
63fca65d
JA
219static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
220{
221 struct net_device *dev = dst->dev;
222 struct rt6_info *rt = (struct rt6_info *)dst;
223
f8a1b43b 224 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
225 if (!daddr)
226 return;
227 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
228 return;
229 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
230 return;
231 __ipv6_confirm_neigh(dev, daddr);
232}
233
9a7ec3a9 234static struct dst_ops ip6_dst_ops_template = {
1da177e4 235 .family = AF_INET6,
1da177e4
LT
236 .gc = ip6_dst_gc,
237 .gc_thresh = 1024,
238 .check = ip6_dst_check,
0dbaee3b 239 .default_advmss = ip6_default_advmss,
ebb762f2 240 .mtu = ip6_mtu,
d4ead6b3 241 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
242 .destroy = ip6_dst_destroy,
243 .ifdown = ip6_dst_ifdown,
244 .negative_advice = ip6_negative_advice,
245 .link_failure = ip6_link_failure,
246 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 247 .redirect = rt6_do_redirect,
9f8955cc 248 .local_out = __ip6_local_out,
f8a1b43b 249 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 250 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
251};
252
ebb762f2 253static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 254{
618f9bc7
SK
255 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
256
257 return mtu ? : dst->dev->mtu;
ec831ea7
RD
258}
259
6700c270
DM
260static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
261 struct sk_buff *skb, u32 mtu)
14e50e57
DM
262{
263}
264
6700c270
DM
265static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
266 struct sk_buff *skb)
b587ee3b
DM
267{
268}
269
14e50e57
DM
270static struct dst_ops ip6_dst_blackhole_ops = {
271 .family = AF_INET6,
14e50e57
DM
272 .destroy = ip6_dst_destroy,
273 .check = ip6_dst_check,
ebb762f2 274 .mtu = ip6_blackhole_mtu,
214f45c9 275 .default_advmss = ip6_default_advmss,
14e50e57 276 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 277 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 278 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 279 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
280};
281
62fa8a84 282static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 283 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
284};
285
8d1c802b 286static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
287 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
288 .fib6_protocol = RTPROT_KERNEL,
289 .fib6_metric = ~(u32)0,
290 .fib6_ref = ATOMIC_INIT(1),
421842ed
DA
291 .fib6_type = RTN_UNREACHABLE,
292 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
293};
294
fb0af4c7 295static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
296 .dst = {
297 .__refcnt = ATOMIC_INIT(1),
298 .__use = 1,
2c20cbd7 299 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 300 .error = -ENETUNREACH,
d8d1f30b
CG
301 .input = ip6_pkt_discard,
302 .output = ip6_pkt_discard_out,
1da177e4
LT
303 },
304 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
305};
306
101367c2
TG
307#ifdef CONFIG_IPV6_MULTIPLE_TABLES
308
fb0af4c7 309static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
310 .dst = {
311 .__refcnt = ATOMIC_INIT(1),
312 .__use = 1,
2c20cbd7 313 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 314 .error = -EACCES,
d8d1f30b
CG
315 .input = ip6_pkt_prohibit,
316 .output = ip6_pkt_prohibit_out,
101367c2
TG
317 },
318 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
319};
320
fb0af4c7 321static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
322 .dst = {
323 .__refcnt = ATOMIC_INIT(1),
324 .__use = 1,
2c20cbd7 325 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 326 .error = -EINVAL,
d8d1f30b 327 .input = dst_discard,
ede2059d 328 .output = dst_discard_out,
101367c2
TG
329 },
330 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
331};
332
333#endif
334
ebfa45f0
MKL
335static void rt6_info_init(struct rt6_info *rt)
336{
337 struct dst_entry *dst = &rt->dst;
338
339 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
340 INIT_LIST_HEAD(&rt->rt6i_uncached);
341}
342
1da177e4 343/* allocate dst with ip6_dst_ops */
93531c67
DA
344struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
345 int flags)
1da177e4 346{
97bab73f 347 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 348 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 349
81eb8447 350 if (rt) {
ebfa45f0 351 rt6_info_init(rt);
81eb8447
WW
352 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
353 }
8104891b 354
cf911662 355 return rt;
1da177e4 356}
9ab179d8 357EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 358
1da177e4
LT
359static void ip6_dst_destroy(struct dst_entry *dst)
360{
361 struct rt6_info *rt = (struct rt6_info *)dst;
a68886a6 362 struct fib6_info *from;
8d0b94af 363 struct inet6_dev *idev;
1da177e4 364
4b32b5ad 365 dst_destroy_metrics_generic(dst);
8d0b94af
MKL
366 rt6_uncached_list_del(rt);
367
368 idev = rt->rt6i_idev;
38308473 369 if (idev) {
1da177e4
LT
370 rt->rt6i_idev = NULL;
371 in6_dev_put(idev);
1ab1457c 372 }
d4ead6b3 373
a68886a6
DA
374 rcu_read_lock();
375 from = rcu_dereference(rt->from);
376 rcu_assign_pointer(rt->from, NULL);
93531c67 377 fib6_info_release(from);
a68886a6 378 rcu_read_unlock();
b3419363
DM
379}
380
1da177e4
LT
381static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
382 int how)
383{
384 struct rt6_info *rt = (struct rt6_info *)dst;
385 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 386 struct net_device *loopback_dev =
c346dca1 387 dev_net(dev)->loopback_dev;
1da177e4 388
e5645f51
WW
389 if (idev && idev->dev != loopback_dev) {
390 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
391 if (loopback_idev) {
392 rt->rt6i_idev = loopback_idev;
393 in6_dev_put(idev);
97cac082 394 }
1da177e4
LT
395 }
396}
397
5973fb1e
MKL
398static bool __rt6_check_expired(const struct rt6_info *rt)
399{
400 if (rt->rt6i_flags & RTF_EXPIRES)
401 return time_after(jiffies, rt->dst.expires);
402 else
403 return false;
404}
405
a50feda5 406static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 407{
a68886a6
DA
408 struct fib6_info *from;
409
410 from = rcu_dereference(rt->from);
411
1716a961
G
412 if (rt->rt6i_flags & RTF_EXPIRES) {
413 if (time_after(jiffies, rt->dst.expires))
a50feda5 414 return true;
a68886a6 415 } else if (from) {
1e2ea8ad 416 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 417 fib6_check_expired(from);
1716a961 418 }
a50feda5 419 return false;
1da177e4
LT
420}
421
8d1c802b
DA
422static struct fib6_info *rt6_multipath_select(const struct net *net,
423 struct fib6_info *match,
52bd4c0c 424 struct flowi6 *fl6, int oif,
b75cc8f9 425 const struct sk_buff *skb,
52bd4c0c 426 int strict)
51ebd318 427{
8d1c802b 428 struct fib6_info *sibling, *next_sibling;
51ebd318 429
b673d6cc
JS
430 /* We might have already computed the hash for ICMPv6 errors. In such
431 * case it will always be non-zero. Otherwise now is the time to do it.
432 */
433 if (!fl6->mp_hash)
b4bac172 434 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 435
5e670d84 436 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
3d709f69
IS
437 return match;
438
93c2fb25
DA
439 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
440 fib6_siblings) {
5e670d84
DA
441 int nh_upper_bound;
442
443 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
444 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
445 continue;
446 if (rt6_score_route(sibling, oif, strict) < 0)
447 break;
448 match = sibling;
449 break;
450 }
451
51ebd318
ND
452 return match;
453}
454
1da177e4 455/*
66f5d6ce 456 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
457 */
458
8d1c802b
DA
459static inline struct fib6_info *rt6_device_match(struct net *net,
460 struct fib6_info *rt,
b71d1d42 461 const struct in6_addr *saddr,
1da177e4 462 int oif,
d420895e 463 int flags)
1da177e4 464{
8d1c802b 465 struct fib6_info *sprt;
1da177e4 466
5e670d84
DA
467 if (!oif && ipv6_addr_any(saddr) &&
468 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
8067bb8c 469 return rt;
dd3abc4e 470
071fb37e 471 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
5e670d84 472 const struct net_device *dev = sprt->fib6_nh.nh_dev;
dd3abc4e 473
5e670d84 474 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
475 continue;
476
dd3abc4e 477 if (oif) {
1da177e4
LT
478 if (dev->ifindex == oif)
479 return sprt;
dd3abc4e
YH
480 } else {
481 if (ipv6_chk_addr(net, saddr, dev,
482 flags & RT6_LOOKUP_F_IFACE))
483 return sprt;
1da177e4 484 }
dd3abc4e 485 }
1da177e4 486
eea68cd3
DA
487 if (oif && flags & RT6_LOOKUP_F_IFACE)
488 return net->ipv6.fib6_null_entry;
8067bb8c 489
421842ed 490 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
491}
492
27097255 493#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
494struct __rt6_probe_work {
495 struct work_struct work;
496 struct in6_addr target;
497 struct net_device *dev;
498};
499
500static void rt6_probe_deferred(struct work_struct *w)
501{
502 struct in6_addr mcaddr;
503 struct __rt6_probe_work *work =
504 container_of(w, struct __rt6_probe_work, work);
505
506 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 507 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 508 dev_put(work->dev);
662f5533 509 kfree(work);
c2f17e82
HFS
510}
511
8d1c802b 512static void rt6_probe(struct fib6_info *rt)
27097255 513{
990edb42 514 struct __rt6_probe_work *work;
5e670d84 515 const struct in6_addr *nh_gw;
f2c31e32 516 struct neighbour *neigh;
5e670d84
DA
517 struct net_device *dev;
518
27097255
YH
519 /*
520 * Okay, this does not seem to be appropriate
521 * for now, however, we need to check if it
522 * is really so; aka Router Reachability Probing.
523 *
524 * Router Reachability Probe MUST be rate-limited
525 * to no more than one per minute.
526 */
93c2fb25 527 if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
7ff74a59 528 return;
5e670d84
DA
529
530 nh_gw = &rt->fib6_nh.nh_gw;
531 dev = rt->fib6_nh.nh_dev;
2152caea 532 rcu_read_lock_bh();
5e670d84 533 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 534 if (neigh) {
dcd1f572
DA
535 struct inet6_dev *idev;
536
8d6c31bf
MKL
537 if (neigh->nud_state & NUD_VALID)
538 goto out;
539
dcd1f572 540 idev = __in6_dev_get(dev);
990edb42 541 work = NULL;
2152caea 542 write_lock(&neigh->lock);
990edb42
MKL
543 if (!(neigh->nud_state & NUD_VALID) &&
544 time_after(jiffies,
dcd1f572 545 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
546 work = kmalloc(sizeof(*work), GFP_ATOMIC);
547 if (work)
548 __neigh_set_probe_once(neigh);
c2f17e82 549 }
2152caea 550 write_unlock(&neigh->lock);
990edb42
MKL
551 } else {
552 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 553 }
990edb42
MKL
554
555 if (work) {
556 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
557 work->target = *nh_gw;
558 dev_hold(dev);
559 work->dev = dev;
990edb42
MKL
560 schedule_work(&work->work);
561 }
562
8d6c31bf 563out:
2152caea 564 rcu_read_unlock_bh();
27097255
YH
565}
566#else
8d1c802b 567static inline void rt6_probe(struct fib6_info *rt)
27097255 568{
27097255
YH
569}
570#endif
571
1da177e4 572/*
554cfb7e 573 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 574 */
8d1c802b 575static inline int rt6_check_dev(struct fib6_info *rt, int oif)
554cfb7e 576{
5e670d84
DA
577 const struct net_device *dev = rt->fib6_nh.nh_dev;
578
161980f4 579 if (!oif || dev->ifindex == oif)
554cfb7e 580 return 2;
161980f4 581 return 0;
554cfb7e 582}
1da177e4 583
8d1c802b 584static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
1da177e4 585{
afc154e9 586 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 587 struct neighbour *neigh;
f2c31e32 588
93c2fb25
DA
589 if (rt->fib6_flags & RTF_NONEXTHOP ||
590 !(rt->fib6_flags & RTF_GATEWAY))
afc154e9 591 return RT6_NUD_SUCCEED;
145a3621
YH
592
593 rcu_read_lock_bh();
5e670d84
DA
594 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
595 &rt->fib6_nh.nh_gw);
145a3621
YH
596 if (neigh) {
597 read_lock(&neigh->lock);
554cfb7e 598 if (neigh->nud_state & NUD_VALID)
afc154e9 599 ret = RT6_NUD_SUCCEED;
398bcbeb 600#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 601 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 602 ret = RT6_NUD_SUCCEED;
7e980569
JB
603 else
604 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 605#endif
145a3621 606 read_unlock(&neigh->lock);
afc154e9
HFS
607 } else {
608 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 609 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 610 }
145a3621
YH
611 rcu_read_unlock_bh();
612
a5a81f0b 613 return ret;
1da177e4
LT
614}
615
8d1c802b 616static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
1da177e4 617{
a5a81f0b 618 int m;
1ab1457c 619
4d0c5911 620 m = rt6_check_dev(rt, oif);
77d16f45 621 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 622 return RT6_NUD_FAIL_HARD;
ebacaaa0 623#ifdef CONFIG_IPV6_ROUTER_PREF
93c2fb25 624 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
ebacaaa0 625#endif
afc154e9
HFS
626 if (strict & RT6_LOOKUP_F_REACHABLE) {
627 int n = rt6_check_neigh(rt);
628 if (n < 0)
629 return n;
630 }
554cfb7e
YH
631 return m;
632}
633
dcd1f572
DA
634/* called with rc_read_lock held */
635static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
636{
637 const struct net_device *dev = fib6_info_nh_dev(f6i);
638 bool rc = false;
639
640 if (dev) {
641 const struct inet6_dev *idev = __in6_dev_get(dev);
642
643 rc = !!idev->cnf.ignore_routes_with_linkdown;
644 }
645
646 return rc;
647}
648
8d1c802b
DA
649static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
650 int *mpri, struct fib6_info *match,
afc154e9 651 bool *do_rr)
554cfb7e 652{
f11e6659 653 int m;
afc154e9 654 bool match_do_rr = false;
35103d11 655
5e670d84 656 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
657 goto out;
658
dcd1f572 659 if (fib6_ignore_linkdown(rt) &&
5e670d84 660 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 661 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 662 goto out;
f11e6659 663
14895687 664 if (fib6_check_expired(rt))
f11e6659
DM
665 goto out;
666
667 m = rt6_score_route(rt, oif, strict);
7e980569 668 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
669 match_do_rr = true;
670 m = 0; /* lowest valid score */
7e980569 671 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 672 goto out;
afc154e9
HFS
673 }
674
675 if (strict & RT6_LOOKUP_F_REACHABLE)
676 rt6_probe(rt);
f11e6659 677
7e980569 678 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 679 if (m > *mpri) {
afc154e9 680 *do_rr = match_do_rr;
f11e6659
DM
681 *mpri = m;
682 match = rt;
f11e6659 683 }
f11e6659
DM
684out:
685 return match;
686}
687
8d1c802b
DA
688static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
689 struct fib6_info *leaf,
690 struct fib6_info *rr_head,
afc154e9
HFS
691 u32 metric, int oif, int strict,
692 bool *do_rr)
f11e6659 693{
8d1c802b 694 struct fib6_info *rt, *match, *cont;
554cfb7e 695 int mpri = -1;
1da177e4 696
f11e6659 697 match = NULL;
9fbdcfaf 698 cont = NULL;
071fb37e 699 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
93c2fb25 700 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
701 cont = rt;
702 break;
703 }
704
705 match = find_match(rt, oif, strict, &mpri, match, do_rr);
706 }
707
66f5d6ce 708 for (rt = leaf; rt && rt != rr_head;
071fb37e 709 rt = rcu_dereference(rt->rt6_next)) {
93c2fb25 710 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
711 cont = rt;
712 break;
713 }
714
afc154e9 715 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
716 }
717
718 if (match || !cont)
719 return match;
720
071fb37e 721 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
afc154e9 722 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 723
f11e6659
DM
724 return match;
725}
1da177e4 726
8d1c802b 727static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
8d1040e8 728 int oif, int strict)
f11e6659 729{
8d1c802b
DA
730 struct fib6_info *leaf = rcu_dereference(fn->leaf);
731 struct fib6_info *match, *rt0;
afc154e9 732 bool do_rr = false;
17ecf590 733 int key_plen;
1da177e4 734
421842ed
DA
735 if (!leaf || leaf == net->ipv6.fib6_null_entry)
736 return net->ipv6.fib6_null_entry;
8d1040e8 737
66f5d6ce 738 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 739 if (!rt0)
66f5d6ce 740 rt0 = leaf;
1da177e4 741
17ecf590
WW
742 /* Double check to make sure fn is not an intermediate node
743 * and fn->leaf does not points to its child's leaf
744 * (This might happen if all routes under fn are deleted from
745 * the tree and fib6_repair_tree() is called on the node.)
746 */
93c2fb25 747 key_plen = rt0->fib6_dst.plen;
17ecf590 748#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
749 if (rt0->fib6_src.plen)
750 key_plen = rt0->fib6_src.plen;
17ecf590
WW
751#endif
752 if (fn->fn_bit != key_plen)
421842ed 753 return net->ipv6.fib6_null_entry;
17ecf590 754
93c2fb25 755 match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
afc154e9 756 &do_rr);
1da177e4 757
afc154e9 758 if (do_rr) {
8d1c802b 759 struct fib6_info *next = rcu_dereference(rt0->rt6_next);
f11e6659 760
554cfb7e 761 /* no entries matched; do round-robin */
93c2fb25 762 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 763 next = leaf;
f11e6659 764
66f5d6ce 765 if (next != rt0) {
93c2fb25 766 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 767 /* make sure next is not being deleted from the tree */
93c2fb25 768 if (next->fib6_node)
66f5d6ce 769 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 770 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 771 }
1da177e4 772 }
1da177e4 773
421842ed 774 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
775}
776
8d1c802b 777static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
8b9df265 778{
93c2fb25 779 return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
8b9df265
MKL
780}
781
70ceb4f5
YH
782#ifdef CONFIG_IPV6_ROUTE_INFO
783int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 784 const struct in6_addr *gwaddr)
70ceb4f5 785{
c346dca1 786 struct net *net = dev_net(dev);
70ceb4f5
YH
787 struct route_info *rinfo = (struct route_info *) opt;
788 struct in6_addr prefix_buf, *prefix;
789 unsigned int pref;
4bed72e4 790 unsigned long lifetime;
8d1c802b 791 struct fib6_info *rt;
70ceb4f5
YH
792
793 if (len < sizeof(struct route_info)) {
794 return -EINVAL;
795 }
796
797 /* Sanity check for prefix_len and length */
798 if (rinfo->length > 3) {
799 return -EINVAL;
800 } else if (rinfo->prefix_len > 128) {
801 return -EINVAL;
802 } else if (rinfo->prefix_len > 64) {
803 if (rinfo->length < 2) {
804 return -EINVAL;
805 }
806 } else if (rinfo->prefix_len > 0) {
807 if (rinfo->length < 1) {
808 return -EINVAL;
809 }
810 }
811
812 pref = rinfo->route_pref;
813 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 814 return -EINVAL;
70ceb4f5 815
4bed72e4 816 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
817
818 if (rinfo->length == 3)
819 prefix = (struct in6_addr *)rinfo->prefix;
820 else {
821 /* this function is safe */
822 ipv6_addr_prefix(&prefix_buf,
823 (struct in6_addr *)rinfo->prefix,
824 rinfo->prefix_len);
825 prefix = &prefix_buf;
826 }
827
f104a567 828 if (rinfo->prefix_len == 0)
afb1d4b5 829 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
830 else
831 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 832 gwaddr, dev);
70ceb4f5
YH
833
834 if (rt && !lifetime) {
afb1d4b5 835 ip6_del_rt(net, rt);
70ceb4f5
YH
836 rt = NULL;
837 }
838
839 if (!rt && lifetime)
830218c1
DA
840 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
841 dev, pref);
70ceb4f5 842 else if (rt)
93c2fb25
DA
843 rt->fib6_flags = RTF_ROUTEINFO |
844 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
845
846 if (rt) {
1716a961 847 if (!addrconf_finite_timeout(lifetime))
14895687 848 fib6_clean_expires(rt);
1716a961 849 else
14895687 850 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 851
93531c67 852 fib6_info_release(rt);
70ceb4f5
YH
853 }
854 return 0;
855}
856#endif
857
ae90d867
DA
858/*
859 * Misc support functions
860 */
861
862/* called with rcu_lock held */
8d1c802b 863static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
ae90d867 864{
5e670d84 865 struct net_device *dev = rt->fib6_nh.nh_dev;
ae90d867 866
93c2fb25 867 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
868 /* for copies of local routes, dst->dev needs to be the
869 * device if it is a master device, the master device if
870 * device is enslaved, and the loopback as the default
871 */
872 if (netif_is_l3_slave(dev) &&
93c2fb25 873 !rt6_need_strict(&rt->fib6_dst.addr))
ae90d867
DA
874 dev = l3mdev_master_dev_rcu(dev);
875 else if (!netif_is_l3_master(dev))
876 dev = dev_net(dev)->loopback_dev;
877 /* last case is netif_is_l3_master(dev) is true in which
878 * case we want dev returned to be dev
879 */
880 }
881
882 return dev;
883}
884
6edb3c96
DA
885static const int fib6_prop[RTN_MAX + 1] = {
886 [RTN_UNSPEC] = 0,
887 [RTN_UNICAST] = 0,
888 [RTN_LOCAL] = 0,
889 [RTN_BROADCAST] = 0,
890 [RTN_ANYCAST] = 0,
891 [RTN_MULTICAST] = 0,
892 [RTN_BLACKHOLE] = -EINVAL,
893 [RTN_UNREACHABLE] = -EHOSTUNREACH,
894 [RTN_PROHIBIT] = -EACCES,
895 [RTN_THROW] = -EAGAIN,
896 [RTN_NAT] = -EINVAL,
897 [RTN_XRESOLVE] = -EINVAL,
898};
899
900static int ip6_rt_type_to_error(u8 fib6_type)
901{
902 return fib6_prop[fib6_type];
903}
904
8d1c802b 905static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
906{
907 unsigned short flags = 0;
908
909 if (rt->dst_nocount)
910 flags |= DST_NOCOUNT;
911 if (rt->dst_nopolicy)
912 flags |= DST_NOPOLICY;
913 if (rt->dst_host)
914 flags |= DST_HOST;
915
916 return flags;
917}
918
8d1c802b 919static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96
DA
920{
921 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
922
923 switch (ort->fib6_type) {
924 case RTN_BLACKHOLE:
925 rt->dst.output = dst_discard_out;
926 rt->dst.input = dst_discard;
927 break;
928 case RTN_PROHIBIT:
929 rt->dst.output = ip6_pkt_prohibit_out;
930 rt->dst.input = ip6_pkt_prohibit;
931 break;
932 case RTN_THROW:
933 case RTN_UNREACHABLE:
934 default:
935 rt->dst.output = ip6_pkt_discard_out;
936 rt->dst.input = ip6_pkt_discard;
937 break;
938 }
939}
940
8d1c802b 941static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96 942{
3b6761d1
DA
943 rt->dst.flags |= fib6_info_dst_flags(ort);
944
93c2fb25 945 if (ort->fib6_flags & RTF_REJECT) {
6edb3c96
DA
946 ip6_rt_init_dst_reject(rt, ort);
947 return;
948 }
949
950 rt->dst.error = 0;
951 rt->dst.output = ip6_output;
952
953 if (ort->fib6_type == RTN_LOCAL) {
6edb3c96 954 rt->dst.input = ip6_input;
93c2fb25 955 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
956 rt->dst.input = ip6_mc_input;
957 } else {
958 rt->dst.input = ip6_forward;
959 }
960
961 if (ort->fib6_nh.nh_lwtstate) {
962 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
963 lwtunnel_set_redirect(&rt->dst);
964 }
965
966 rt->dst.lastuse = jiffies;
967}
968
8d1c802b 969static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 970{
ae90d867 971 rt->rt6i_flags &= ~RTF_EXPIRES;
93531c67 972 fib6_info_hold(from);
a68886a6 973 rcu_assign_pointer(rt->from, from);
d4ead6b3
DA
974 dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
975 if (from->fib6_metrics != &dst_default_metrics) {
976 rt->dst._metrics |= DST_METRICS_REFCOUNTED;
977 refcount_inc(&from->fib6_metrics->refcnt);
978 }
ae90d867
DA
979}
980
8d1c802b 981static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
ae90d867 982{
dcd1f572
DA
983 struct net_device *dev = fib6_info_nh_dev(ort);
984
6edb3c96
DA
985 ip6_rt_init_dst(rt, ort);
986
93c2fb25 987 rt->rt6i_dst = ort->fib6_dst;
dcd1f572 988 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
5e670d84 989 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
93c2fb25 990 rt->rt6i_flags = ort->fib6_flags;
ae90d867 991 rt6_set_from(rt, ort);
ae90d867 992#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 993 rt->rt6i_src = ort->fib6_src;
ae90d867 994#endif
93c2fb25 995 rt->rt6i_prefsrc = ort->fib6_prefsrc;
5e670d84 996 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
ae90d867
DA
997}
998
a3c00e46
MKL
999static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1000 struct in6_addr *saddr)
1001{
66f5d6ce 1002 struct fib6_node *pn, *sn;
a3c00e46
MKL
1003 while (1) {
1004 if (fn->fn_flags & RTN_TL_ROOT)
1005 return NULL;
66f5d6ce
WW
1006 pn = rcu_dereference(fn->parent);
1007 sn = FIB6_SUBTREE(pn);
1008 if (sn && sn != fn)
1009 fn = fib6_lookup(sn, NULL, saddr);
a3c00e46
MKL
1010 else
1011 fn = pn;
1012 if (fn->fn_flags & RTN_RTINFO)
1013 return fn;
1014 }
1015}
c71099ac 1016
d3843fe5
WW
1017static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1018 bool null_fallback)
1019{
1020 struct rt6_info *rt = *prt;
1021
1022 if (dst_hold_safe(&rt->dst))
1023 return true;
1024 if (null_fallback) {
1025 rt = net->ipv6.ip6_null_entry;
1026 dst_hold(&rt->dst);
1027 } else {
1028 rt = NULL;
1029 }
1030 *prt = rt;
1031 return false;
1032}
1033
dec9b0e2 1034/* called with rcu_lock held */
8d1c802b 1035static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
dec9b0e2 1036{
3b6761d1 1037 unsigned short flags = fib6_info_dst_flags(rt);
dec9b0e2
DA
1038 struct net_device *dev = rt->fib6_nh.nh_dev;
1039 struct rt6_info *nrt;
1040
93531c67 1041 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
dec9b0e2
DA
1042 if (nrt)
1043 ip6_rt_copy_init(nrt, rt);
1044
1045 return nrt;
1046}
1047
8ed67789
DL
1048static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1049 struct fib6_table *table,
b75cc8f9
DA
1050 struct flowi6 *fl6,
1051 const struct sk_buff *skb,
1052 int flags)
1da177e4 1053{
8d1c802b 1054 struct fib6_info *f6i;
1da177e4 1055 struct fib6_node *fn;
23fb93a4 1056 struct rt6_info *rt;
1da177e4 1057
b6cdbc85
DA
1058 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1059 flags &= ~RT6_LOOKUP_F_IFACE;
1060
66f5d6ce 1061 rcu_read_lock();
4c9483b2 1062 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1063restart:
23fb93a4
DA
1064 f6i = rcu_dereference(fn->leaf);
1065 if (!f6i) {
1066 f6i = net->ipv6.fib6_null_entry;
66f5d6ce 1067 } else {
23fb93a4 1068 f6i = rt6_device_match(net, f6i, &fl6->saddr,
66f5d6ce 1069 fl6->flowi6_oif, flags);
93c2fb25 1070 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
23fb93a4
DA
1071 f6i = rt6_multipath_select(net, f6i, fl6,
1072 fl6->flowi6_oif, skb, flags);
66f5d6ce 1073 }
23fb93a4 1074 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1075 fn = fib6_backtrack(fn, &fl6->saddr);
1076 if (fn)
1077 goto restart;
1078 }
23fb93a4 1079
2b760fcf 1080 /* Search through exception table */
23fb93a4
DA
1081 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1082 if (rt) {
dec9b0e2
DA
1083 if (ip6_hold_safe(net, &rt, true))
1084 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1085 } else if (f6i == net->ipv6.fib6_null_entry) {
dec9b0e2
DA
1086 rt = net->ipv6.ip6_null_entry;
1087 dst_hold(&rt->dst);
23fb93a4
DA
1088 } else {
1089 rt = ip6_create_rt_rcu(f6i);
1090 if (!rt) {
1091 rt = net->ipv6.ip6_null_entry;
1092 dst_hold(&rt->dst);
1093 }
dec9b0e2 1094 }
d3843fe5 1095
66f5d6ce 1096 rcu_read_unlock();
b811580d 1097
b65f164d 1098 trace_fib6_table_lookup(net, rt, table, fl6);
b811580d 1099
c71099ac 1100 return rt;
c71099ac
TG
1101}
1102
67ba4152 1103struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1104 const struct sk_buff *skb, int flags)
ea6e574e 1105{
b75cc8f9 1106 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1107}
1108EXPORT_SYMBOL_GPL(ip6_route_lookup);
1109
9acd9f3a 1110struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1111 const struct in6_addr *saddr, int oif,
1112 const struct sk_buff *skb, int strict)
c71099ac 1113{
4c9483b2
DM
1114 struct flowi6 fl6 = {
1115 .flowi6_oif = oif,
1116 .daddr = *daddr,
c71099ac
TG
1117 };
1118 struct dst_entry *dst;
77d16f45 1119 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1120
adaa70bb 1121 if (saddr) {
4c9483b2 1122 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1123 flags |= RT6_LOOKUP_F_HAS_SADDR;
1124 }
1125
b75cc8f9 1126 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1127 if (dst->error == 0)
1128 return (struct rt6_info *) dst;
1129
1130 dst_release(dst);
1131
1da177e4
LT
1132 return NULL;
1133}
7159039a
YH
1134EXPORT_SYMBOL(rt6_lookup);
1135
c71099ac 1136/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1137 * It takes new route entry, the addition fails by any reason the
1138 * route is released.
1139 * Caller must hold dst before calling it.
1da177e4
LT
1140 */
1141
8d1c802b 1142static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1143 struct netlink_ext_ack *extack)
1da177e4
LT
1144{
1145 int err;
c71099ac 1146 struct fib6_table *table;
1da177e4 1147
93c2fb25 1148 table = rt->fib6_table;
66f5d6ce 1149 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1150 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1151 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1152
1153 return err;
1154}
1155
8d1c802b 1156int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1157{
afb1d4b5 1158 struct nl_info info = { .nl_net = net, };
e715b6d3 1159
d4ead6b3 1160 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1161}
1162
8d1c802b 1163static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
8b9df265
MKL
1164 const struct in6_addr *daddr,
1165 const struct in6_addr *saddr)
1da177e4 1166{
4832c30d 1167 struct net_device *dev;
1da177e4
LT
1168 struct rt6_info *rt;
1169
1170 /*
1171 * Clone the route.
1172 */
1173
4832c30d 1174 dev = ip6_rt_get_dev_rcu(ort);
93531c67 1175 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
83a09abd
MKL
1176 if (!rt)
1177 return NULL;
1178
1179 ip6_rt_copy_init(rt, ort);
1180 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1181 rt->dst.flags |= DST_HOST;
1182 rt->rt6i_dst.addr = *daddr;
1183 rt->rt6i_dst.plen = 128;
1da177e4 1184
83a09abd 1185 if (!rt6_is_gw_or_nonexthop(ort)) {
93c2fb25
DA
1186 if (ort->fib6_dst.plen != 128 &&
1187 ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
83a09abd 1188 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1189#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1190 if (rt->rt6i_src.plen && saddr) {
1191 rt->rt6i_src.addr = *saddr;
1192 rt->rt6i_src.plen = 128;
8b9df265 1193 }
83a09abd 1194#endif
95a9a5ba 1195 }
1da177e4 1196
95a9a5ba
YH
1197 return rt;
1198}
1da177e4 1199
8d1c802b 1200static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
d52d3997 1201{
3b6761d1 1202 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1203 struct net_device *dev;
d52d3997
MKL
1204 struct rt6_info *pcpu_rt;
1205
4832c30d
DA
1206 rcu_read_lock();
1207 dev = ip6_rt_get_dev_rcu(rt);
93531c67 1208 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1209 rcu_read_unlock();
d52d3997
MKL
1210 if (!pcpu_rt)
1211 return NULL;
1212 ip6_rt_copy_init(pcpu_rt, rt);
d52d3997
MKL
1213 pcpu_rt->rt6i_flags |= RTF_PCPU;
1214 return pcpu_rt;
1215}
1216
66f5d6ce 1217/* It should be called with rcu_read_lock() acquired */
8d1c802b 1218static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
d52d3997 1219{
a73e4195 1220 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1221
1222 p = this_cpu_ptr(rt->rt6i_pcpu);
1223 pcpu_rt = *p;
1224
d4ead6b3
DA
1225 if (pcpu_rt)
1226 ip6_hold_safe(NULL, &pcpu_rt, false);
d3843fe5 1227
a73e4195
MKL
1228 return pcpu_rt;
1229}
1230
afb1d4b5 1231static struct rt6_info *rt6_make_pcpu_route(struct net *net,
8d1c802b 1232 struct fib6_info *rt)
a73e4195
MKL
1233{
1234 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1235
1236 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1237 if (!pcpu_rt) {
9c7370a1
MKL
1238 dst_hold(&net->ipv6.ip6_null_entry->dst);
1239 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1240 }
1241
a94b9367
WW
1242 dst_hold(&pcpu_rt->dst);
1243 p = this_cpu_ptr(rt->rt6i_pcpu);
1244 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1245 BUG_ON(prev);
a94b9367 1246
d52d3997
MKL
1247 return pcpu_rt;
1248}
1249
35732d01
WW
1250/* exception hash table implementation
1251 */
1252static DEFINE_SPINLOCK(rt6_exception_lock);
1253
1254/* Remove rt6_ex from hash table and free the memory
1255 * Caller must hold rt6_exception_lock
1256 */
1257static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1258 struct rt6_exception *rt6_ex)
1259{
b2427e67 1260 struct net *net;
81eb8447 1261
35732d01
WW
1262 if (!bucket || !rt6_ex)
1263 return;
b2427e67
CIK
1264
1265 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01 1266 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1267 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1268 kfree_rcu(rt6_ex, rcu);
1269 WARN_ON_ONCE(!bucket->depth);
1270 bucket->depth--;
81eb8447 1271 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1272}
1273
1274/* Remove oldest rt6_ex in bucket and free the memory
1275 * Caller must hold rt6_exception_lock
1276 */
1277static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1278{
1279 struct rt6_exception *rt6_ex, *oldest = NULL;
1280
1281 if (!bucket)
1282 return;
1283
1284 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1285 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1286 oldest = rt6_ex;
1287 }
1288 rt6_remove_exception(bucket, oldest);
1289}
1290
1291static u32 rt6_exception_hash(const struct in6_addr *dst,
1292 const struct in6_addr *src)
1293{
1294 static u32 seed __read_mostly;
1295 u32 val;
1296
1297 net_get_random_once(&seed, sizeof(seed));
1298 val = jhash(dst, sizeof(*dst), seed);
1299
1300#ifdef CONFIG_IPV6_SUBTREES
1301 if (src)
1302 val = jhash(src, sizeof(*src), val);
1303#endif
1304 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1305}
1306
1307/* Helper function to find the cached rt in the hash table
1308 * and update bucket pointer to point to the bucket for this
1309 * (daddr, saddr) pair
1310 * Caller must hold rt6_exception_lock
1311 */
1312static struct rt6_exception *
1313__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1314 const struct in6_addr *daddr,
1315 const struct in6_addr *saddr)
1316{
1317 struct rt6_exception *rt6_ex;
1318 u32 hval;
1319
1320 if (!(*bucket) || !daddr)
1321 return NULL;
1322
1323 hval = rt6_exception_hash(daddr, saddr);
1324 *bucket += hval;
1325
1326 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1327 struct rt6_info *rt6 = rt6_ex->rt6i;
1328 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1329
1330#ifdef CONFIG_IPV6_SUBTREES
1331 if (matched && saddr)
1332 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1333#endif
1334 if (matched)
1335 return rt6_ex;
1336 }
1337 return NULL;
1338}
1339
1340/* Helper function to find the cached rt in the hash table
1341 * and update bucket pointer to point to the bucket for this
1342 * (daddr, saddr) pair
1343 * Caller must hold rcu_read_lock()
1344 */
1345static struct rt6_exception *
1346__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1347 const struct in6_addr *daddr,
1348 const struct in6_addr *saddr)
1349{
1350 struct rt6_exception *rt6_ex;
1351 u32 hval;
1352
1353 WARN_ON_ONCE(!rcu_read_lock_held());
1354
1355 if (!(*bucket) || !daddr)
1356 return NULL;
1357
1358 hval = rt6_exception_hash(daddr, saddr);
1359 *bucket += hval;
1360
1361 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1362 struct rt6_info *rt6 = rt6_ex->rt6i;
1363 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1364
1365#ifdef CONFIG_IPV6_SUBTREES
1366 if (matched && saddr)
1367 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1368#endif
1369 if (matched)
1370 return rt6_ex;
1371 }
1372 return NULL;
1373}
1374
8d1c802b 1375static unsigned int fib6_mtu(const struct fib6_info *rt)
d4ead6b3
DA
1376{
1377 unsigned int mtu;
1378
dcd1f572
DA
1379 if (rt->fib6_pmtu) {
1380 mtu = rt->fib6_pmtu;
1381 } else {
1382 struct net_device *dev = fib6_info_nh_dev(rt);
1383 struct inet6_dev *idev;
1384
1385 rcu_read_lock();
1386 idev = __in6_dev_get(dev);
1387 mtu = idev->cnf.mtu6;
1388 rcu_read_unlock();
1389 }
1390
d4ead6b3
DA
1391 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1392
1393 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1394}
1395
35732d01 1396static int rt6_insert_exception(struct rt6_info *nrt,
8d1c802b 1397 struct fib6_info *ort)
35732d01 1398{
5e670d84 1399 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1400 struct rt6_exception_bucket *bucket;
1401 struct in6_addr *src_key = NULL;
1402 struct rt6_exception *rt6_ex;
1403 int err = 0;
1404
35732d01
WW
1405 spin_lock_bh(&rt6_exception_lock);
1406
1407 if (ort->exception_bucket_flushed) {
1408 err = -EINVAL;
1409 goto out;
1410 }
1411
1412 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1413 lockdep_is_held(&rt6_exception_lock));
1414 if (!bucket) {
1415 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1416 GFP_ATOMIC);
1417 if (!bucket) {
1418 err = -ENOMEM;
1419 goto out;
1420 }
1421 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1422 }
1423
1424#ifdef CONFIG_IPV6_SUBTREES
1425 /* rt6i_src.plen != 0 indicates ort is in subtree
1426 * and exception table is indexed by a hash of
1427 * both rt6i_dst and rt6i_src.
1428 * Otherwise, the exception table is indexed by
1429 * a hash of only rt6i_dst.
1430 */
93c2fb25 1431 if (ort->fib6_src.plen)
35732d01
WW
1432 src_key = &nrt->rt6i_src.addr;
1433#endif
60006a48
WW
1434
1435 /* Update rt6i_prefsrc as it could be changed
1436 * in rt6_remove_prefsrc()
1437 */
93c2fb25 1438 nrt->rt6i_prefsrc = ort->fib6_prefsrc;
f5bbe7ee
WW
1439 /* rt6_mtu_change() might lower mtu on ort.
1440 * Only insert this exception route if its mtu
1441 * is less than ort's mtu value.
1442 */
d4ead6b3 1443 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1444 err = -EINVAL;
1445 goto out;
1446 }
60006a48 1447
35732d01
WW
1448 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1449 src_key);
1450 if (rt6_ex)
1451 rt6_remove_exception(bucket, rt6_ex);
1452
1453 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1454 if (!rt6_ex) {
1455 err = -ENOMEM;
1456 goto out;
1457 }
1458 rt6_ex->rt6i = nrt;
1459 rt6_ex->stamp = jiffies;
35732d01
WW
1460 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1461 bucket->depth++;
81eb8447 1462 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1463
1464 if (bucket->depth > FIB6_MAX_DEPTH)
1465 rt6_exception_remove_oldest(bucket);
1466
1467out:
1468 spin_unlock_bh(&rt6_exception_lock);
1469
1470 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1471 if (!err) {
93c2fb25 1472 spin_lock_bh(&ort->fib6_table->tb6_lock);
7aef6859 1473 fib6_update_sernum(net, ort);
93c2fb25 1474 spin_unlock_bh(&ort->fib6_table->tb6_lock);
b886d5f2
PA
1475 fib6_force_start_gc(net);
1476 }
35732d01
WW
1477
1478 return err;
1479}
1480
8d1c802b 1481void rt6_flush_exceptions(struct fib6_info *rt)
35732d01
WW
1482{
1483 struct rt6_exception_bucket *bucket;
1484 struct rt6_exception *rt6_ex;
1485 struct hlist_node *tmp;
1486 int i;
1487
1488 spin_lock_bh(&rt6_exception_lock);
1489 /* Prevent rt6_insert_exception() to recreate the bucket list */
1490 rt->exception_bucket_flushed = 1;
1491
1492 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1493 lockdep_is_held(&rt6_exception_lock));
1494 if (!bucket)
1495 goto out;
1496
1497 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1498 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1499 rt6_remove_exception(bucket, rt6_ex);
1500 WARN_ON_ONCE(bucket->depth);
1501 bucket++;
1502 }
1503
1504out:
1505 spin_unlock_bh(&rt6_exception_lock);
1506}
1507
1508/* Find cached rt in the hash table inside passed in rt
1509 * Caller has to hold rcu_read_lock()
1510 */
8d1c802b 1511static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
1512 struct in6_addr *daddr,
1513 struct in6_addr *saddr)
1514{
1515 struct rt6_exception_bucket *bucket;
1516 struct in6_addr *src_key = NULL;
1517 struct rt6_exception *rt6_ex;
1518 struct rt6_info *res = NULL;
1519
1520 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1521
1522#ifdef CONFIG_IPV6_SUBTREES
1523 /* rt6i_src.plen != 0 indicates rt is in subtree
1524 * and exception table is indexed by a hash of
1525 * both rt6i_dst and rt6i_src.
1526 * Otherwise, the exception table is indexed by
1527 * a hash of only rt6i_dst.
1528 */
93c2fb25 1529 if (rt->fib6_src.plen)
35732d01
WW
1530 src_key = saddr;
1531#endif
1532 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1533
1534 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1535 res = rt6_ex->rt6i;
1536
1537 return res;
1538}
1539
1540/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1541static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1542{
35732d01 1543 struct rt6_exception_bucket *bucket;
8d1c802b 1544 struct fib6_info *from = rt->from;
35732d01
WW
1545 struct in6_addr *src_key = NULL;
1546 struct rt6_exception *rt6_ex;
1547 int err;
1548
1549 if (!from ||
442d713b 1550 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1551 return -EINVAL;
1552
1553 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1554 return -ENOENT;
1555
1556 spin_lock_bh(&rt6_exception_lock);
1557 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1558 lockdep_is_held(&rt6_exception_lock));
1559#ifdef CONFIG_IPV6_SUBTREES
1560 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1561 * and exception table is indexed by a hash of
1562 * both rt6i_dst and rt6i_src.
1563 * Otherwise, the exception table is indexed by
1564 * a hash of only rt6i_dst.
1565 */
93c2fb25 1566 if (from->fib6_src.plen)
35732d01
WW
1567 src_key = &rt->rt6i_src.addr;
1568#endif
1569 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1570 &rt->rt6i_dst.addr,
1571 src_key);
1572 if (rt6_ex) {
1573 rt6_remove_exception(bucket, rt6_ex);
1574 err = 0;
1575 } else {
1576 err = -ENOENT;
1577 }
1578
1579 spin_unlock_bh(&rt6_exception_lock);
1580 return err;
1581}
1582
1583/* Find rt6_ex which contains the passed in rt cache and
1584 * refresh its stamp
1585 */
1586static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1587{
35732d01 1588 struct rt6_exception_bucket *bucket;
8d1c802b 1589 struct fib6_info *from = rt->from;
35732d01
WW
1590 struct in6_addr *src_key = NULL;
1591 struct rt6_exception *rt6_ex;
1592
1593 if (!from ||
442d713b 1594 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1595 return;
1596
1597 rcu_read_lock();
1598 bucket = rcu_dereference(from->rt6i_exception_bucket);
1599
1600#ifdef CONFIG_IPV6_SUBTREES
1601 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1602 * and exception table is indexed by a hash of
1603 * both rt6i_dst and rt6i_src.
1604 * Otherwise, the exception table is indexed by
1605 * a hash of only rt6i_dst.
1606 */
93c2fb25 1607 if (from->fib6_src.plen)
35732d01
WW
1608 src_key = &rt->rt6i_src.addr;
1609#endif
1610 rt6_ex = __rt6_find_exception_rcu(&bucket,
1611 &rt->rt6i_dst.addr,
1612 src_key);
1613 if (rt6_ex)
1614 rt6_ex->stamp = jiffies;
1615
1616 rcu_read_unlock();
1617}
1618
8d1c802b 1619static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
60006a48
WW
1620{
1621 struct rt6_exception_bucket *bucket;
1622 struct rt6_exception *rt6_ex;
1623 int i;
1624
1625 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1626 lockdep_is_held(&rt6_exception_lock));
1627
1628 if (bucket) {
1629 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1630 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1631 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1632 }
1633 bucket++;
1634 }
1635 }
1636}
1637
e9fa1495
SB
1638static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1639 struct rt6_info *rt, int mtu)
1640{
1641 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1642 * lowest MTU in the path: always allow updating the route PMTU to
1643 * reflect PMTU decreases.
1644 *
1645 * If the new MTU is higher, and the route PMTU is equal to the local
1646 * MTU, this means the old MTU is the lowest in the path, so allow
1647 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1648 * handle this.
1649 */
1650
1651 if (dst_mtu(&rt->dst) >= mtu)
1652 return true;
1653
1654 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1655 return true;
1656
1657 return false;
1658}
1659
1660static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
8d1c802b 1661 struct fib6_info *rt, int mtu)
f5bbe7ee
WW
1662{
1663 struct rt6_exception_bucket *bucket;
1664 struct rt6_exception *rt6_ex;
1665 int i;
1666
1667 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1668 lockdep_is_held(&rt6_exception_lock));
1669
e9fa1495
SB
1670 if (!bucket)
1671 return;
1672
1673 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1674 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1675 struct rt6_info *entry = rt6_ex->rt6i;
1676
1677 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1678 * route), the metrics of its rt->from have already
e9fa1495
SB
1679 * been updated.
1680 */
d4ead6b3 1681 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1682 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1683 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1684 }
e9fa1495 1685 bucket++;
f5bbe7ee
WW
1686 }
1687}
1688
b16cb459
WW
1689#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1690
8d1c802b 1691static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
b16cb459
WW
1692 struct in6_addr *gateway)
1693{
1694 struct rt6_exception_bucket *bucket;
1695 struct rt6_exception *rt6_ex;
1696 struct hlist_node *tmp;
1697 int i;
1698
1699 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1700 return;
1701
1702 spin_lock_bh(&rt6_exception_lock);
1703 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1704 lockdep_is_held(&rt6_exception_lock));
1705
1706 if (bucket) {
1707 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1708 hlist_for_each_entry_safe(rt6_ex, tmp,
1709 &bucket->chain, hlist) {
1710 struct rt6_info *entry = rt6_ex->rt6i;
1711
1712 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1713 RTF_CACHE_GATEWAY &&
1714 ipv6_addr_equal(gateway,
1715 &entry->rt6i_gateway)) {
1716 rt6_remove_exception(bucket, rt6_ex);
1717 }
1718 }
1719 bucket++;
1720 }
1721 }
1722
1723 spin_unlock_bh(&rt6_exception_lock);
1724}
1725
c757faa8
WW
1726static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1727 struct rt6_exception *rt6_ex,
1728 struct fib6_gc_args *gc_args,
1729 unsigned long now)
1730{
1731 struct rt6_info *rt = rt6_ex->rt6i;
1732
1859bac0
PA
1733 /* we are pruning and obsoleting aged-out and non gateway exceptions
1734 * even if others have still references to them, so that on next
1735 * dst_check() such references can be dropped.
1736 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1737 * expired, independently from their aging, as per RFC 8201 section 4
1738 */
31afeb42
WW
1739 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1740 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1741 RT6_TRACE("aging clone %p\n", rt);
1742 rt6_remove_exception(bucket, rt6_ex);
1743 return;
1744 }
1745 } else if (time_after(jiffies, rt->dst.expires)) {
1746 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1747 rt6_remove_exception(bucket, rt6_ex);
1748 return;
31afeb42
WW
1749 }
1750
1751 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1752 struct neighbour *neigh;
1753 __u8 neigh_flags = 0;
1754
1bfa26ff
ED
1755 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1756 if (neigh)
c757faa8 1757 neigh_flags = neigh->flags;
1bfa26ff 1758
c757faa8
WW
1759 if (!(neigh_flags & NTF_ROUTER)) {
1760 RT6_TRACE("purging route %p via non-router but gateway\n",
1761 rt);
1762 rt6_remove_exception(bucket, rt6_ex);
1763 return;
1764 }
1765 }
31afeb42 1766
c757faa8
WW
1767 gc_args->more++;
1768}
1769
8d1c802b 1770void rt6_age_exceptions(struct fib6_info *rt,
c757faa8
WW
1771 struct fib6_gc_args *gc_args,
1772 unsigned long now)
1773{
1774 struct rt6_exception_bucket *bucket;
1775 struct rt6_exception *rt6_ex;
1776 struct hlist_node *tmp;
1777 int i;
1778
1779 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1780 return;
1781
1bfa26ff
ED
1782 rcu_read_lock_bh();
1783 spin_lock(&rt6_exception_lock);
c757faa8
WW
1784 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1785 lockdep_is_held(&rt6_exception_lock));
1786
1787 if (bucket) {
1788 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1789 hlist_for_each_entry_safe(rt6_ex, tmp,
1790 &bucket->chain, hlist) {
1791 rt6_age_examine_exception(bucket, rt6_ex,
1792 gc_args, now);
1793 }
1794 bucket++;
1795 }
1796 }
1bfa26ff
ED
1797 spin_unlock(&rt6_exception_lock);
1798 rcu_read_unlock_bh();
c757faa8
WW
1799}
1800
9ff74384 1801struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
b75cc8f9
DA
1802 int oif, struct flowi6 *fl6,
1803 const struct sk_buff *skb, int flags)
1da177e4 1804{
367efcb9 1805 struct fib6_node *fn, *saved_fn;
8d1c802b 1806 struct fib6_info *f6i;
23fb93a4 1807 struct rt6_info *rt;
c71099ac 1808 int strict = 0;
1da177e4 1809
77d16f45 1810 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1811 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1812 if (net->ipv6.devconf_all->forwarding == 0)
1813 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1814
66f5d6ce 1815 rcu_read_lock();
1da177e4 1816
4c9483b2 1817 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1818 saved_fn = fn;
1da177e4 1819
ca254490
DA
1820 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1821 oif = 0;
1822
a3c00e46 1823redo_rt6_select:
23fb93a4 1824 f6i = rt6_select(net, fn, oif, strict);
93c2fb25 1825 if (f6i->fib6_nsiblings)
23fb93a4
DA
1826 f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
1827 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1828 fn = fib6_backtrack(fn, &fl6->saddr);
1829 if (fn)
1830 goto redo_rt6_select;
367efcb9
MKL
1831 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1832 /* also consider unreachable route */
1833 strict &= ~RT6_LOOKUP_F_REACHABLE;
1834 fn = saved_fn;
1835 goto redo_rt6_select;
367efcb9 1836 }
a3c00e46
MKL
1837 }
1838
23fb93a4 1839 if (f6i == net->ipv6.fib6_null_entry) {
421842ed 1840 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1841 rcu_read_unlock();
d3843fe5 1842 dst_hold(&rt->dst);
b65f164d 1843 trace_fib6_table_lookup(net, rt, table, fl6);
d3843fe5 1844 return rt;
23fb93a4
DA
1845 }
1846
1847 /*Search through exception table */
1848 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1849 if (rt) {
d4ead6b3 1850 if (ip6_hold_safe(net, &rt, true))
d3843fe5 1851 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1852
66f5d6ce 1853 rcu_read_unlock();
b65f164d 1854 trace_fib6_table_lookup(net, rt, table, fl6);
d52d3997 1855 return rt;
3da59bd9 1856 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
93c2fb25 1857 !(f6i->fib6_flags & RTF_GATEWAY))) {
3da59bd9
MKL
1858 /* Create a RTF_CACHE clone which will not be
1859 * owned by the fib6 tree. It is for the special case where
1860 * the daddr in the skb during the neighbor look-up is different
1861 * from the fl6->daddr used to look-up route here.
1862 */
3da59bd9
MKL
1863 struct rt6_info *uncached_rt;
1864
23fb93a4 1865 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
4d85cd0c
DA
1866
1867 rcu_read_unlock();
c71099ac 1868
1cfb71ee
WW
1869 if (uncached_rt) {
1870 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1871 * No need for another dst_hold()
1872 */
8d0b94af 1873 rt6_uncached_list_add(uncached_rt);
81eb8447 1874 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1875 } else {
3da59bd9 1876 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1877 dst_hold(&uncached_rt->dst);
1878 }
b811580d 1879
b65f164d 1880 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
3da59bd9 1881 return uncached_rt;
3da59bd9 1882
d52d3997
MKL
1883 } else {
1884 /* Get a percpu copy */
1885
1886 struct rt6_info *pcpu_rt;
1887
951f788a 1888 local_bh_disable();
23fb93a4 1889 pcpu_rt = rt6_get_pcpu_route(f6i);
d52d3997 1890
93531c67
DA
1891 if (!pcpu_rt)
1892 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1893
951f788a
ED
1894 local_bh_enable();
1895 rcu_read_unlock();
b65f164d 1896 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
d52d3997
MKL
1897 return pcpu_rt;
1898 }
1da177e4 1899}
9ff74384 1900EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1901
b75cc8f9
DA
1902static struct rt6_info *ip6_pol_route_input(struct net *net,
1903 struct fib6_table *table,
1904 struct flowi6 *fl6,
1905 const struct sk_buff *skb,
1906 int flags)
4acad72d 1907{
b75cc8f9 1908 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1909}
1910
d409b847
MB
1911struct dst_entry *ip6_route_input_lookup(struct net *net,
1912 struct net_device *dev,
b75cc8f9
DA
1913 struct flowi6 *fl6,
1914 const struct sk_buff *skb,
1915 int flags)
72331bc0
SL
1916{
1917 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1918 flags |= RT6_LOOKUP_F_IFACE;
1919
b75cc8f9 1920 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1921}
d409b847 1922EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1923
23aebdac 1924static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1925 struct flow_keys *keys,
1926 struct flow_keys *flkeys)
23aebdac
JS
1927{
1928 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1929 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1930 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1931 const struct ipv6hdr *inner_iph;
1932 const struct icmp6hdr *icmph;
1933 struct ipv6hdr _inner_iph;
1934
1935 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1936 goto out;
1937
1938 icmph = icmp6_hdr(skb);
1939 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1940 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1941 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1942 icmph->icmp6_type != ICMPV6_PARAMPROB)
1943 goto out;
1944
1945 inner_iph = skb_header_pointer(skb,
1946 skb_transport_offset(skb) + sizeof(*icmph),
1947 sizeof(_inner_iph), &_inner_iph);
1948 if (!inner_iph)
1949 goto out;
1950
1951 key_iph = inner_iph;
5e5d6fed 1952 _flkeys = NULL;
23aebdac 1953out:
5e5d6fed
RP
1954 if (_flkeys) {
1955 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1956 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1957 keys->tags.flow_label = _flkeys->tags.flow_label;
1958 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1959 } else {
1960 keys->addrs.v6addrs.src = key_iph->saddr;
1961 keys->addrs.v6addrs.dst = key_iph->daddr;
1962 keys->tags.flow_label = ip6_flowinfo(key_iph);
1963 keys->basic.ip_proto = key_iph->nexthdr;
1964 }
23aebdac
JS
1965}
1966
1967/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
1968u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1969 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
1970{
1971 struct flow_keys hash_keys;
9a2a537a 1972 u32 mhash;
23aebdac 1973
bbfa047a 1974 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
1975 case 0:
1976 memset(&hash_keys, 0, sizeof(hash_keys));
1977 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1978 if (skb) {
1979 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1980 } else {
1981 hash_keys.addrs.v6addrs.src = fl6->saddr;
1982 hash_keys.addrs.v6addrs.dst = fl6->daddr;
1983 hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
1984 hash_keys.basic.ip_proto = fl6->flowi6_proto;
1985 }
1986 break;
1987 case 1:
1988 if (skb) {
1989 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1990 struct flow_keys keys;
1991
1992 /* short-circuit if we already have L4 hash present */
1993 if (skb->l4_hash)
1994 return skb_get_hash_raw(skb) >> 1;
1995
1996 memset(&hash_keys, 0, sizeof(hash_keys));
1997
1998 if (!flkeys) {
1999 skb_flow_dissect_flow_keys(skb, &keys, flag);
2000 flkeys = &keys;
2001 }
2002 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2003 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2004 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2005 hash_keys.ports.src = flkeys->ports.src;
2006 hash_keys.ports.dst = flkeys->ports.dst;
2007 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2008 } else {
2009 memset(&hash_keys, 0, sizeof(hash_keys));
2010 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2011 hash_keys.addrs.v6addrs.src = fl6->saddr;
2012 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2013 hash_keys.ports.src = fl6->fl6_sport;
2014 hash_keys.ports.dst = fl6->fl6_dport;
2015 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2016 }
2017 break;
23aebdac 2018 }
9a2a537a 2019 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2020
9a2a537a 2021 return mhash >> 1;
23aebdac
JS
2022}
2023
c71099ac
TG
2024void ip6_route_input(struct sk_buff *skb)
2025{
b71d1d42 2026 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2027 struct net *net = dev_net(skb->dev);
adaa70bb 2028 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2029 struct ip_tunnel_info *tun_info;
4c9483b2 2030 struct flowi6 fl6 = {
e0d56fdd 2031 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2032 .daddr = iph->daddr,
2033 .saddr = iph->saddr,
6502ca52 2034 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2035 .flowi6_mark = skb->mark,
2036 .flowi6_proto = iph->nexthdr,
c71099ac 2037 };
5e5d6fed 2038 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2039
904af04d 2040 tun_info = skb_tunnel_info(skb);
46fa062a 2041 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2042 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2043
2044 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2045 flkeys = &_flkeys;
2046
23aebdac 2047 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2048 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2049 skb_dst_drop(skb);
b75cc8f9
DA
2050 skb_dst_set(skb,
2051 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2052}
2053
b75cc8f9
DA
2054static struct rt6_info *ip6_pol_route_output(struct net *net,
2055 struct fib6_table *table,
2056 struct flowi6 *fl6,
2057 const struct sk_buff *skb,
2058 int flags)
1da177e4 2059{
b75cc8f9 2060 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2061}
2062
6f21c96a
PA
2063struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2064 struct flowi6 *fl6, int flags)
c71099ac 2065{
d46a9d67 2066 bool any_src;
c71099ac 2067
4c1feac5
DA
2068 if (rt6_need_strict(&fl6->daddr)) {
2069 struct dst_entry *dst;
2070
2071 dst = l3mdev_link_scope_lookup(net, fl6);
2072 if (dst)
2073 return dst;
2074 }
ca254490 2075
1fb9489b 2076 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2077
d46a9d67 2078 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2079 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2080 (fl6->flowi6_oif && any_src))
77d16f45 2081 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2082
d46a9d67 2083 if (!any_src)
adaa70bb 2084 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2085 else if (sk)
2086 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2087
b75cc8f9 2088 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2089}
6f21c96a 2090EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2091
2774c131 2092struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2093{
5c1e6aa3 2094 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2095 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2096 struct dst_entry *new = NULL;
2097
1dbe3252 2098 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2099 DST_OBSOLETE_DEAD, 0);
14e50e57 2100 if (rt) {
0a1f5962 2101 rt6_info_init(rt);
81eb8447 2102 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2103
0a1f5962 2104 new = &rt->dst;
14e50e57 2105 new->__use = 1;
352e512c 2106 new->input = dst_discard;
ede2059d 2107 new->output = dst_discard_out;
14e50e57 2108
0a1f5962 2109 dst_copy_metrics(new, &ort->dst);
14e50e57 2110
1dbe3252 2111 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2112 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2113 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2114
2115 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2116#ifdef CONFIG_IPV6_SUBTREES
2117 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2118#endif
14e50e57
DM
2119 }
2120
69ead7af
DM
2121 dst_release(dst_orig);
2122 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2123}
14e50e57 2124
1da177e4
LT
2125/*
2126 * Destination cache support functions
2127 */
2128
8d1c802b 2129static bool fib6_check(struct fib6_info *f6i, u32 cookie)
93531c67
DA
2130{
2131 u32 rt_cookie = 0;
2132
a269f1a7 2133 if ((f6i && !fib6_get_cookie_safe(f6i, &rt_cookie)) ||
93531c67
DA
2134 rt_cookie != cookie)
2135 return false;
2136
2137 if (fib6_check_expired(f6i))
2138 return false;
2139
2140 return true;
2141}
2142
a68886a6
DA
2143static struct dst_entry *rt6_check(struct rt6_info *rt,
2144 struct fib6_info *from,
2145 u32 cookie)
3da59bd9 2146{
36143645 2147 u32 rt_cookie = 0;
c5cff856 2148
a68886a6 2149 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
93531c67 2150 rt_cookie != cookie)
3da59bd9
MKL
2151 return NULL;
2152
2153 if (rt6_check_expired(rt))
2154 return NULL;
2155
2156 return &rt->dst;
2157}
2158
a68886a6
DA
2159static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2160 struct fib6_info *from,
2161 u32 cookie)
3da59bd9 2162{
5973fb1e
MKL
2163 if (!__rt6_check_expired(rt) &&
2164 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2165 fib6_check(from, cookie))
3da59bd9
MKL
2166 return &rt->dst;
2167 else
2168 return NULL;
2169}
2170
1da177e4
LT
2171static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2172{
a87b7dc9 2173 struct dst_entry *dst_ret;
a68886a6 2174 struct fib6_info *from;
1da177e4
LT
2175 struct rt6_info *rt;
2176
a87b7dc9
DA
2177 rt = container_of(dst, struct rt6_info, dst);
2178
2179 rcu_read_lock();
1da177e4 2180
6f3118b5
ND
2181 /* All IPV6 dsts are created with ->obsolete set to the value
2182 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2183 * into this function always.
2184 */
e3bc10bd 2185
a68886a6
DA
2186 from = rcu_dereference(rt->from);
2187
2188 if (from && (rt->rt6i_flags & RTF_PCPU ||
2189 unlikely(!list_empty(&rt->rt6i_uncached))))
2190 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2191 else
a68886a6 2192 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2193
2194 rcu_read_unlock();
2195
2196 return dst_ret;
1da177e4
LT
2197}
2198
2199static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2200{
2201 struct rt6_info *rt = (struct rt6_info *) dst;
2202
2203 if (rt) {
54c1a859
YH
2204 if (rt->rt6i_flags & RTF_CACHE) {
2205 if (rt6_check_expired(rt)) {
93531c67 2206 rt6_remove_exception_rt(rt);
54c1a859
YH
2207 dst = NULL;
2208 }
2209 } else {
1da177e4 2210 dst_release(dst);
54c1a859
YH
2211 dst = NULL;
2212 }
1da177e4 2213 }
54c1a859 2214 return dst;
1da177e4
LT
2215}
2216
2217static void ip6_link_failure(struct sk_buff *skb)
2218{
2219 struct rt6_info *rt;
2220
3ffe533c 2221 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2222
adf30907 2223 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2224 if (rt) {
1eb4f758 2225 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0 2226 if (dst_hold_safe(&rt->dst))
93531c67 2227 rt6_remove_exception_rt(rt);
a68886a6
DA
2228 } else {
2229 struct fib6_info *from;
c5cff856
WW
2230 struct fib6_node *fn;
2231
2232 rcu_read_lock();
a68886a6
DA
2233 from = rcu_dereference(rt->from);
2234 if (from) {
2235 fn = rcu_dereference(from->fib6_node);
2236 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2237 fn->fn_sernum = -1;
2238 }
c5cff856 2239 rcu_read_unlock();
1eb4f758 2240 }
1da177e4
LT
2241 }
2242}
2243
6a3e030f
DA
2244static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2245{
a68886a6
DA
2246 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2247 struct fib6_info *from;
2248
2249 rcu_read_lock();
2250 from = rcu_dereference(rt0->from);
2251 if (from)
2252 rt0->dst.expires = from->expires;
2253 rcu_read_unlock();
2254 }
6a3e030f
DA
2255
2256 dst_set_expires(&rt0->dst, timeout);
2257 rt0->rt6i_flags |= RTF_EXPIRES;
2258}
2259
45e4fd26
MKL
2260static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2261{
2262 struct net *net = dev_net(rt->dst.dev);
2263
d4ead6b3 2264 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2265 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2266 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2267}
2268
0d3f6d29
MKL
2269static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2270{
a68886a6
DA
2271 bool from_set;
2272
2273 rcu_read_lock();
2274 from_set = !!rcu_dereference(rt->from);
2275 rcu_read_unlock();
2276
0d3f6d29 2277 return !(rt->rt6i_flags & RTF_CACHE) &&
a68886a6 2278 (rt->rt6i_flags & RTF_PCPU || from_set);
0d3f6d29
MKL
2279}
2280
45e4fd26
MKL
2281static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2282 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2283{
0dec879f 2284 const struct in6_addr *daddr, *saddr;
67ba4152 2285 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2286
45e4fd26
MKL
2287 if (rt6->rt6i_flags & RTF_LOCAL)
2288 return;
81aded24 2289
19bda36c
XL
2290 if (dst_metric_locked(dst, RTAX_MTU))
2291 return;
2292
0dec879f
JA
2293 if (iph) {
2294 daddr = &iph->daddr;
2295 saddr = &iph->saddr;
2296 } else if (sk) {
2297 daddr = &sk->sk_v6_daddr;
2298 saddr = &inet6_sk(sk)->saddr;
2299 } else {
2300 daddr = NULL;
2301 saddr = NULL;
2302 }
2303 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2304 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2305 if (mtu >= dst_mtu(dst))
2306 return;
9d289715 2307
0d3f6d29 2308 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2309 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2310 /* update rt6_ex->stamp for cache */
2311 if (rt6->rt6i_flags & RTF_CACHE)
2312 rt6_update_exception_stamp_rt(rt6);
0dec879f 2313 } else if (daddr) {
a68886a6 2314 struct fib6_info *from;
45e4fd26
MKL
2315 struct rt6_info *nrt6;
2316
4d85cd0c 2317 rcu_read_lock();
a68886a6
DA
2318 from = rcu_dereference(rt6->from);
2319 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
45e4fd26
MKL
2320 if (nrt6) {
2321 rt6_do_update_pmtu(nrt6, mtu);
a68886a6 2322 if (rt6_insert_exception(nrt6, from))
2b760fcf 2323 dst_release_immediate(&nrt6->dst);
45e4fd26 2324 }
a68886a6 2325 rcu_read_unlock();
1da177e4
LT
2326 }
2327}
2328
45e4fd26
MKL
2329static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2330 struct sk_buff *skb, u32 mtu)
2331{
2332 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2333}
2334
42ae66c8 2335void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2336 int oif, u32 mark, kuid_t uid)
81aded24
DM
2337{
2338 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2339 struct dst_entry *dst;
2340 struct flowi6 fl6;
2341
2342 memset(&fl6, 0, sizeof(fl6));
2343 fl6.flowi6_oif = oif;
1b3c61dc 2344 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2345 fl6.daddr = iph->daddr;
2346 fl6.saddr = iph->saddr;
6502ca52 2347 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2348 fl6.flowi6_uid = uid;
81aded24
DM
2349
2350 dst = ip6_route_output(net, NULL, &fl6);
2351 if (!dst->error)
45e4fd26 2352 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2353 dst_release(dst);
2354}
2355EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2356
2357void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2358{
33c162a9
MKL
2359 struct dst_entry *dst;
2360
81aded24 2361 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2362 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2363
2364 dst = __sk_dst_get(sk);
2365 if (!dst || !dst->obsolete ||
2366 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2367 return;
2368
2369 bh_lock_sock(sk);
2370 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2371 ip6_datagram_dst_update(sk, false);
2372 bh_unlock_sock(sk);
81aded24
DM
2373}
2374EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2375
7d6850f7
AK
2376void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2377 const struct flowi6 *fl6)
2378{
2379#ifdef CONFIG_IPV6_SUBTREES
2380 struct ipv6_pinfo *np = inet6_sk(sk);
2381#endif
2382
2383 ip6_dst_store(sk, dst,
2384 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2385 &sk->sk_v6_daddr : NULL,
2386#ifdef CONFIG_IPV6_SUBTREES
2387 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2388 &np->saddr :
2389#endif
2390 NULL);
2391}
2392
b55b76b2
DJ
2393/* Handle redirects */
2394struct ip6rd_flowi {
2395 struct flowi6 fl6;
2396 struct in6_addr gateway;
2397};
2398
2399static struct rt6_info *__ip6_route_redirect(struct net *net,
2400 struct fib6_table *table,
2401 struct flowi6 *fl6,
b75cc8f9 2402 const struct sk_buff *skb,
b55b76b2
DJ
2403 int flags)
2404{
2405 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
23fb93a4 2406 struct rt6_info *ret = NULL, *rt_cache;
8d1c802b 2407 struct fib6_info *rt;
b55b76b2
DJ
2408 struct fib6_node *fn;
2409
2410 /* Get the "current" route for this destination and
67c408cf 2411 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2412 *
2413 * RFC 4861 specifies that redirects should only be
2414 * accepted if they come from the nexthop to the target.
2415 * Due to the way the routes are chosen, this notion
2416 * is a bit fuzzy and one might need to check all possible
2417 * routes.
2418 */
2419
66f5d6ce 2420 rcu_read_lock();
b55b76b2
DJ
2421 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2422restart:
66f5d6ce 2423 for_each_fib6_node_rt_rcu(fn) {
5e670d84 2424 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c 2425 continue;
14895687 2426 if (fib6_check_expired(rt))
b55b76b2 2427 continue;
93c2fb25 2428 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2429 break;
93c2fb25 2430 if (!(rt->fib6_flags & RTF_GATEWAY))
b55b76b2 2431 continue;
5e670d84 2432 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
b55b76b2 2433 continue;
2b760fcf
WW
2434 /* rt_cache's gateway might be different from its 'parent'
2435 * in the case of an ip redirect.
2436 * So we keep searching in the exception table if the gateway
2437 * is different.
2438 */
5e670d84 2439 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2b760fcf
WW
2440 rt_cache = rt6_find_cached_rt(rt,
2441 &fl6->daddr,
2442 &fl6->saddr);
2443 if (rt_cache &&
2444 ipv6_addr_equal(&rdfl->gateway,
2445 &rt_cache->rt6i_gateway)) {
23fb93a4 2446 ret = rt_cache;
2b760fcf
WW
2447 break;
2448 }
b55b76b2 2449 continue;
2b760fcf 2450 }
b55b76b2
DJ
2451 break;
2452 }
2453
2454 if (!rt)
421842ed 2455 rt = net->ipv6.fib6_null_entry;
93c2fb25 2456 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2457 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2458 goto out;
2459 }
2460
421842ed 2461 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2462 fn = fib6_backtrack(fn, &fl6->saddr);
2463 if (fn)
2464 goto restart;
b55b76b2 2465 }
a3c00e46 2466
b0a1ba59 2467out:
23fb93a4
DA
2468 if (ret)
2469 dst_hold(&ret->dst);
2470 else
2471 ret = ip6_create_rt_rcu(rt);
b55b76b2 2472
66f5d6ce 2473 rcu_read_unlock();
b55b76b2 2474
23fb93a4
DA
2475 trace_fib6_table_lookup(net, ret, table, fl6);
2476 return ret;
b55b76b2
DJ
2477};
2478
2479static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2480 const struct flowi6 *fl6,
2481 const struct sk_buff *skb,
2482 const struct in6_addr *gateway)
b55b76b2
DJ
2483{
2484 int flags = RT6_LOOKUP_F_HAS_SADDR;
2485 struct ip6rd_flowi rdfl;
2486
2487 rdfl.fl6 = *fl6;
2488 rdfl.gateway = *gateway;
2489
b75cc8f9 2490 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2491 flags, __ip6_route_redirect);
2492}
2493
e2d118a1
LC
2494void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2495 kuid_t uid)
3a5ad2ee
DM
2496{
2497 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2498 struct dst_entry *dst;
2499 struct flowi6 fl6;
2500
2501 memset(&fl6, 0, sizeof(fl6));
e374c618 2502 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2503 fl6.flowi6_oif = oif;
2504 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2505 fl6.daddr = iph->daddr;
2506 fl6.saddr = iph->saddr;
6502ca52 2507 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2508 fl6.flowi6_uid = uid;
3a5ad2ee 2509
b75cc8f9 2510 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2511 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2512 dst_release(dst);
2513}
2514EXPORT_SYMBOL_GPL(ip6_redirect);
2515
c92a59ec
DJ
2516void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2517 u32 mark)
2518{
2519 const struct ipv6hdr *iph = ipv6_hdr(skb);
2520 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2521 struct dst_entry *dst;
2522 struct flowi6 fl6;
2523
2524 memset(&fl6, 0, sizeof(fl6));
e374c618 2525 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2526 fl6.flowi6_oif = oif;
2527 fl6.flowi6_mark = mark;
c92a59ec
DJ
2528 fl6.daddr = msg->dest;
2529 fl6.saddr = iph->daddr;
e2d118a1 2530 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2531
b75cc8f9 2532 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2533 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2534 dst_release(dst);
2535}
2536
3a5ad2ee
DM
2537void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2538{
e2d118a1
LC
2539 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2540 sk->sk_uid);
3a5ad2ee
DM
2541}
2542EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2543
0dbaee3b 2544static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2545{
0dbaee3b
DM
2546 struct net_device *dev = dst->dev;
2547 unsigned int mtu = dst_mtu(dst);
2548 struct net *net = dev_net(dev);
2549
1da177e4
LT
2550 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2551
5578689a
DL
2552 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2553 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2554
2555 /*
1ab1457c
YH
2556 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2557 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2558 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2559 * rely only on pmtu discovery"
2560 */
2561 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2562 mtu = IPV6_MAXPLEN;
2563 return mtu;
2564}
2565
ebb762f2 2566static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2567{
d33e4553 2568 struct inet6_dev *idev;
d4ead6b3 2569 unsigned int mtu;
4b32b5ad
MKL
2570
2571 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2572 if (mtu)
30f78d8e 2573 goto out;
618f9bc7
SK
2574
2575 mtu = IPV6_MIN_MTU;
d33e4553
DM
2576
2577 rcu_read_lock();
2578 idev = __in6_dev_get(dst->dev);
2579 if (idev)
2580 mtu = idev->cnf.mtu6;
2581 rcu_read_unlock();
2582
30f78d8e 2583out:
14972cbd
RP
2584 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2585
2586 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2587}
2588
3b00944c 2589struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2590 struct flowi6 *fl6)
1da177e4 2591{
87a11578 2592 struct dst_entry *dst;
1da177e4
LT
2593 struct rt6_info *rt;
2594 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2595 struct net *net = dev_net(dev);
1da177e4 2596
38308473 2597 if (unlikely(!idev))
122bdf67 2598 return ERR_PTR(-ENODEV);
1da177e4 2599
ad706862 2600 rt = ip6_dst_alloc(net, dev, 0);
38308473 2601 if (unlikely(!rt)) {
1da177e4 2602 in6_dev_put(idev);
87a11578 2603 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2604 goto out;
2605 }
2606
8e2ec639 2607 rt->dst.flags |= DST_HOST;
588753f1 2608 rt->dst.input = ip6_input;
8e2ec639 2609 rt->dst.output = ip6_output;
550bab42 2610 rt->rt6i_gateway = fl6->daddr;
87a11578 2611 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2612 rt->rt6i_dst.plen = 128;
2613 rt->rt6i_idev = idev;
14edd87d 2614 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2615
4c981e28 2616 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2617 * do proper release of the net_device
2618 */
2619 rt6_uncached_list_add(rt);
81eb8447 2620 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2621
87a11578
DM
2622 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2623
1da177e4 2624out:
87a11578 2625 return dst;
1da177e4
LT
2626}
2627
569d3645 2628static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2629{
86393e52 2630 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2631 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2632 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2633 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2634 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2635 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2636 int entries;
7019b78e 2637
fc66f95c 2638 entries = dst_entries_get_fast(ops);
49a18d86 2639 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2640 entries <= rt_max_size)
1da177e4
LT
2641 goto out;
2642
6891a346 2643 net->ipv6.ip6_rt_gc_expire++;
14956643 2644 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2645 entries = dst_entries_get_slow(ops);
2646 if (entries < ops->gc_thresh)
7019b78e 2647 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2648out:
7019b78e 2649 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2650 return entries > rt_max_size;
1da177e4
LT
2651}
2652
8d1c802b 2653static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
d4ead6b3 2654 struct fib6_config *cfg)
e715b6d3 2655{
263243d6 2656 struct dst_metrics *p;
e715b6d3 2657
263243d6
ED
2658 if (!cfg->fc_mx)
2659 return 0;
ea697639 2660
263243d6
ED
2661 p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
2662 if (unlikely(!p))
2663 return -ENOMEM;
e715b6d3 2664
263243d6
ED
2665 refcount_set(&p->refcnt, 1);
2666 rt->fib6_metrics = p;
e715b6d3 2667
263243d6 2668 return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
e715b6d3 2669}
1da177e4 2670
8c14586f
DA
2671static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2672 struct fib6_config *cfg,
f4797b33
DA
2673 const struct in6_addr *gw_addr,
2674 u32 tbid, int flags)
8c14586f
DA
2675{
2676 struct flowi6 fl6 = {
2677 .flowi6_oif = cfg->fc_ifindex,
2678 .daddr = *gw_addr,
2679 .saddr = cfg->fc_prefsrc,
2680 };
2681 struct fib6_table *table;
2682 struct rt6_info *rt;
8c14586f 2683
f4797b33 2684 table = fib6_get_table(net, tbid);
8c14586f
DA
2685 if (!table)
2686 return NULL;
2687
2688 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2689 flags |= RT6_LOOKUP_F_HAS_SADDR;
2690
f4797b33 2691 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2692 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2693
2694 /* if table lookup failed, fall back to full lookup */
2695 if (rt == net->ipv6.ip6_null_entry) {
2696 ip6_rt_put(rt);
2697 rt = NULL;
2698 }
2699
2700 return rt;
2701}
2702
fc1e64e1
DA
2703static int ip6_route_check_nh_onlink(struct net *net,
2704 struct fib6_config *cfg,
9fbb704c 2705 const struct net_device *dev,
fc1e64e1
DA
2706 struct netlink_ext_ack *extack)
2707{
44750f84 2708 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2709 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2710 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2711 struct rt6_info *grt;
2712 int err;
2713
2714 err = 0;
2715 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2716 if (grt) {
58e354c0
DA
2717 if (!grt->dst.error &&
2718 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2719 NL_SET_ERR_MSG(extack,
2720 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2721 err = -EINVAL;
2722 }
2723
2724 ip6_rt_put(grt);
2725 }
2726
2727 return err;
2728}
2729
1edce99f
DA
2730static int ip6_route_check_nh(struct net *net,
2731 struct fib6_config *cfg,
2732 struct net_device **_dev,
2733 struct inet6_dev **idev)
2734{
2735 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2736 struct net_device *dev = _dev ? *_dev : NULL;
2737 struct rt6_info *grt = NULL;
2738 int err = -EHOSTUNREACH;
2739
2740 if (cfg->fc_table) {
f4797b33
DA
2741 int flags = RT6_LOOKUP_F_IFACE;
2742
2743 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2744 cfg->fc_table, flags);
1edce99f
DA
2745 if (grt) {
2746 if (grt->rt6i_flags & RTF_GATEWAY ||
2747 (dev && dev != grt->dst.dev)) {
2748 ip6_rt_put(grt);
2749 grt = NULL;
2750 }
2751 }
2752 }
2753
2754 if (!grt)
b75cc8f9 2755 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2756
2757 if (!grt)
2758 goto out;
2759
2760 if (dev) {
2761 if (dev != grt->dst.dev) {
2762 ip6_rt_put(grt);
2763 goto out;
2764 }
2765 } else {
2766 *_dev = dev = grt->dst.dev;
2767 *idev = grt->rt6i_idev;
2768 dev_hold(dev);
2769 in6_dev_hold(grt->rt6i_idev);
2770 }
2771
2772 if (!(grt->rt6i_flags & RTF_GATEWAY))
2773 err = 0;
2774
2775 ip6_rt_put(grt);
2776
2777out:
2778 return err;
2779}
2780
9fbb704c
DA
2781static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2782 struct net_device **_dev, struct inet6_dev **idev,
2783 struct netlink_ext_ack *extack)
2784{
2785 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2786 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2787 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2788 const struct net_device *dev = *_dev;
232378e8 2789 bool need_addr_check = !dev;
9fbb704c
DA
2790 int err = -EINVAL;
2791
2792 /* if gw_addr is local we will fail to detect this in case
2793 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2794 * will return already-added prefix route via interface that
2795 * prefix route was assigned to, which might be non-loopback.
2796 */
232378e8
DA
2797 if (dev &&
2798 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2799 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2800 goto out;
2801 }
2802
2803 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2804 /* IPv6 strictly inhibits using not link-local
2805 * addresses as nexthop address.
2806 * Otherwise, router will not able to send redirects.
2807 * It is very good, but in some (rare!) circumstances
2808 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2809 * some exceptions. --ANK
2810 * We allow IPv4-mapped nexthops to support RFC4798-type
2811 * addressing
2812 */
2813 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2814 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2815 goto out;
2816 }
2817
2818 if (cfg->fc_flags & RTNH_F_ONLINK)
2819 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2820 else
2821 err = ip6_route_check_nh(net, cfg, _dev, idev);
2822
2823 if (err)
2824 goto out;
2825 }
2826
2827 /* reload in case device was changed */
2828 dev = *_dev;
2829
2830 err = -EINVAL;
2831 if (!dev) {
2832 NL_SET_ERR_MSG(extack, "Egress device not specified");
2833 goto out;
2834 } else if (dev->flags & IFF_LOOPBACK) {
2835 NL_SET_ERR_MSG(extack,
2836 "Egress device can not be loopback device for this route");
2837 goto out;
2838 }
232378e8
DA
2839
2840 /* if we did not check gw_addr above, do so now that the
2841 * egress device has been resolved.
2842 */
2843 if (need_addr_check &&
2844 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2845 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2846 goto out;
2847 }
2848
9fbb704c
DA
2849 err = 0;
2850out:
2851 return err;
2852}
2853
8d1c802b 2854static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 2855 gfp_t gfp_flags,
333c4301 2856 struct netlink_ext_ack *extack)
1da177e4 2857{
5578689a 2858 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 2859 struct fib6_info *rt = NULL;
1da177e4
LT
2860 struct net_device *dev = NULL;
2861 struct inet6_dev *idev = NULL;
c71099ac 2862 struct fib6_table *table;
1da177e4 2863 int addr_type;
8c5b83f0 2864 int err = -EINVAL;
1da177e4 2865
557c44be 2866 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2867 if (cfg->fc_flags & RTF_PCPU) {
2868 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2869 goto out;
d5d531cb 2870 }
557c44be 2871
2ea2352e
WW
2872 /* RTF_CACHE is an internal flag; can not be set by userspace */
2873 if (cfg->fc_flags & RTF_CACHE) {
2874 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2875 goto out;
2876 }
2877
e8478e80
DA
2878 if (cfg->fc_type > RTN_MAX) {
2879 NL_SET_ERR_MSG(extack, "Invalid route type");
2880 goto out;
2881 }
2882
d5d531cb
DA
2883 if (cfg->fc_dst_len > 128) {
2884 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2885 goto out;
2886 }
2887 if (cfg->fc_src_len > 128) {
2888 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2889 goto out;
d5d531cb 2890 }
1da177e4 2891#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2892 if (cfg->fc_src_len) {
2893 NL_SET_ERR_MSG(extack,
2894 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2895 goto out;
d5d531cb 2896 }
1da177e4 2897#endif
86872cb5 2898 if (cfg->fc_ifindex) {
1da177e4 2899 err = -ENODEV;
5578689a 2900 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2901 if (!dev)
2902 goto out;
2903 idev = in6_dev_get(dev);
2904 if (!idev)
2905 goto out;
2906 }
2907
86872cb5
TG
2908 if (cfg->fc_metric == 0)
2909 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2910
fc1e64e1
DA
2911 if (cfg->fc_flags & RTNH_F_ONLINK) {
2912 if (!dev) {
2913 NL_SET_ERR_MSG(extack,
2914 "Nexthop device required for onlink");
2915 err = -ENODEV;
2916 goto out;
2917 }
2918
2919 if (!(dev->flags & IFF_UP)) {
2920 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2921 err = -ENETDOWN;
2922 goto out;
2923 }
2924 }
2925
d71314b4 2926 err = -ENOBUFS;
38308473
DM
2927 if (cfg->fc_nlinfo.nlh &&
2928 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2929 table = fib6_get_table(net, cfg->fc_table);
38308473 2930 if (!table) {
f3213831 2931 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2932 table = fib6_new_table(net, cfg->fc_table);
2933 }
2934 } else {
2935 table = fib6_new_table(net, cfg->fc_table);
2936 }
38308473
DM
2937
2938 if (!table)
c71099ac 2939 goto out;
c71099ac 2940
93531c67
DA
2941 err = -ENOMEM;
2942 rt = fib6_info_alloc(gfp_flags);
2943 if (!rt)
1da177e4 2944 goto out;
93531c67
DA
2945
2946 if (cfg->fc_flags & RTF_ADDRCONF)
2947 rt->dst_nocount = true;
1da177e4 2948
d4ead6b3
DA
2949 err = ip6_convert_metrics(net, rt, cfg);
2950 if (err < 0)
2951 goto out;
2952
1716a961 2953 if (cfg->fc_flags & RTF_EXPIRES)
14895687 2954 fib6_set_expires(rt, jiffies +
1716a961
G
2955 clock_t_to_jiffies(cfg->fc_expires));
2956 else
14895687 2957 fib6_clean_expires(rt);
1da177e4 2958
86872cb5
TG
2959 if (cfg->fc_protocol == RTPROT_UNSPEC)
2960 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 2961 rt->fib6_protocol = cfg->fc_protocol;
86872cb5
TG
2962
2963 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4 2964
19e42e45
RP
2965 if (cfg->fc_encap) {
2966 struct lwtunnel_state *lwtstate;
2967
30357d7d 2968 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2969 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2970 &lwtstate, extack);
19e42e45
RP
2971 if (err)
2972 goto out;
5e670d84 2973 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
19e42e45
RP
2974 }
2975
93c2fb25
DA
2976 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2977 rt->fib6_dst.plen = cfg->fc_dst_len;
2978 if (rt->fib6_dst.plen == 128)
3b6761d1 2979 rt->dst_host = true;
e5fd387a 2980
1da177e4 2981#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
2982 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
2983 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4
LT
2984#endif
2985
93c2fb25 2986 rt->fib6_metric = cfg->fc_metric;
5e670d84 2987 rt->fib6_nh.nh_weight = 1;
1da177e4 2988
e8478e80
DA
2989 rt->fib6_type = cfg->fc_type;
2990
1da177e4
LT
2991 /* We cannot add true routes via loopback here,
2992 they would result in kernel looping; promote them to reject routes
2993 */
86872cb5 2994 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
2995 (dev && (dev->flags & IFF_LOOPBACK) &&
2996 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2997 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 2998 /* hold loopback dev/idev if we haven't done so. */
5578689a 2999 if (dev != net->loopback_dev) {
1da177e4
LT
3000 if (dev) {
3001 dev_put(dev);
3002 in6_dev_put(idev);
3003 }
5578689a 3004 dev = net->loopback_dev;
1da177e4
LT
3005 dev_hold(dev);
3006 idev = in6_dev_get(dev);
3007 if (!idev) {
3008 err = -ENODEV;
3009 goto out;
3010 }
3011 }
93c2fb25 3012 rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
1da177e4
LT
3013 goto install_route;
3014 }
3015
86872cb5 3016 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
3017 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3018 if (err)
48ed7b26 3019 goto out;
1da177e4 3020
93531c67 3021 rt->fib6_nh.nh_gw = cfg->fc_gateway;
1da177e4
LT
3022 }
3023
3024 err = -ENODEV;
38308473 3025 if (!dev)
1da177e4
LT
3026 goto out;
3027
428604fb
LB
3028 if (idev->cnf.disable_ipv6) {
3029 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3030 err = -EACCES;
3031 goto out;
3032 }
3033
955ec4cb
DA
3034 if (!(dev->flags & IFF_UP)) {
3035 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3036 err = -ENETDOWN;
3037 goto out;
3038 }
3039
c3968a85
DW
3040 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3041 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3042 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3043 err = -EINVAL;
3044 goto out;
3045 }
93c2fb25
DA
3046 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3047 rt->fib6_prefsrc.plen = 128;
c3968a85 3048 } else
93c2fb25 3049 rt->fib6_prefsrc.plen = 0;
c3968a85 3050
93c2fb25 3051 rt->fib6_flags = cfg->fc_flags;
1da177e4
LT
3052
3053install_route:
93c2fb25 3054 if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
5609b80a 3055 !netif_carrier_ok(dev))
5e670d84
DA
3056 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3057 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
93531c67 3058 rt->fib6_nh.nh_dev = dev;
93c2fb25 3059 rt->fib6_table = table;
63152fc0 3060
c346dca1 3061 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 3062
dcd1f572
DA
3063 if (idev)
3064 in6_dev_put(idev);
3065
8c5b83f0 3066 return rt;
6b9ea5a6
RP
3067out:
3068 if (dev)
3069 dev_put(dev);
3070 if (idev)
3071 in6_dev_put(idev);
6b9ea5a6 3072
93531c67 3073 fib6_info_release(rt);
8c5b83f0 3074 return ERR_PTR(err);
6b9ea5a6
RP
3075}
3076
acb54e3c
DA
3077int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3078 struct netlink_ext_ack *extack)
6b9ea5a6 3079{
8d1c802b 3080 struct fib6_info *rt;
6b9ea5a6
RP
3081 int err;
3082
acb54e3c 3083 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3084 if (IS_ERR(rt))
3085 return PTR_ERR(rt);
6b9ea5a6 3086
d4ead6b3 3087 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3088 fib6_info_release(rt);
6b9ea5a6 3089
1da177e4
LT
3090 return err;
3091}
3092
8d1c802b 3093static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3094{
afb1d4b5 3095 struct net *net = info->nl_net;
c71099ac 3096 struct fib6_table *table;
afb1d4b5 3097 int err;
1da177e4 3098
421842ed 3099 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3100 err = -ENOENT;
3101 goto out;
3102 }
6c813a72 3103
93c2fb25 3104 table = rt->fib6_table;
66f5d6ce 3105 spin_lock_bh(&table->tb6_lock);
86872cb5 3106 err = fib6_del(rt, info);
66f5d6ce 3107 spin_unlock_bh(&table->tb6_lock);
1da177e4 3108
6825a26c 3109out:
93531c67 3110 fib6_info_release(rt);
1da177e4
LT
3111 return err;
3112}
3113
8d1c802b 3114int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3115{
afb1d4b5
DA
3116 struct nl_info info = { .nl_net = net };
3117
528c4ceb 3118 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3119}
3120
8d1c802b 3121static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3122{
3123 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3124 struct net *net = info->nl_net;
16a16cd3 3125 struct sk_buff *skb = NULL;
0ae81335 3126 struct fib6_table *table;
e3330039 3127 int err = -ENOENT;
0ae81335 3128
421842ed 3129 if (rt == net->ipv6.fib6_null_entry)
e3330039 3130 goto out_put;
93c2fb25 3131 table = rt->fib6_table;
66f5d6ce 3132 spin_lock_bh(&table->tb6_lock);
0ae81335 3133
93c2fb25 3134 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3135 struct fib6_info *sibling, *next_sibling;
0ae81335 3136
16a16cd3
DA
3137 /* prefer to send a single notification with all hops */
3138 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3139 if (skb) {
3140 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3141
d4ead6b3 3142 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3143 NULL, NULL, 0, RTM_DELROUTE,
3144 info->portid, seq, 0) < 0) {
3145 kfree_skb(skb);
3146 skb = NULL;
3147 } else
3148 info->skip_notify = 1;
3149 }
3150
0ae81335 3151 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3152 &rt->fib6_siblings,
3153 fib6_siblings) {
0ae81335
DA
3154 err = fib6_del(sibling, info);
3155 if (err)
e3330039 3156 goto out_unlock;
0ae81335
DA
3157 }
3158 }
3159
3160 err = fib6_del(rt, info);
e3330039 3161out_unlock:
66f5d6ce 3162 spin_unlock_bh(&table->tb6_lock);
e3330039 3163out_put:
93531c67 3164 fib6_info_release(rt);
16a16cd3
DA
3165
3166 if (skb) {
e3330039 3167 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3168 info->nlh, gfp_any());
3169 }
0ae81335
DA
3170 return err;
3171}
3172
23fb93a4
DA
3173static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3174{
3175 int rc = -ESRCH;
3176
3177 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3178 goto out;
3179
3180 if (cfg->fc_flags & RTF_GATEWAY &&
3181 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3182 goto out;
3183 if (dst_hold_safe(&rt->dst))
3184 rc = rt6_remove_exception_rt(rt);
3185out:
3186 return rc;
3187}
3188
333c4301
DA
3189static int ip6_route_del(struct fib6_config *cfg,
3190 struct netlink_ext_ack *extack)
1da177e4 3191{
8d1c802b 3192 struct rt6_info *rt_cache;
c71099ac 3193 struct fib6_table *table;
8d1c802b 3194 struct fib6_info *rt;
1da177e4 3195 struct fib6_node *fn;
1da177e4
LT
3196 int err = -ESRCH;
3197
5578689a 3198 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3199 if (!table) {
3200 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3201 return err;
d5d531cb 3202 }
c71099ac 3203
66f5d6ce 3204 rcu_read_lock();
1da177e4 3205
c71099ac 3206 fn = fib6_locate(&table->tb6_root,
86872cb5 3207 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3208 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3209 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3210
1da177e4 3211 if (fn) {
66f5d6ce 3212 for_each_fib6_node_rt_rcu(fn) {
2b760fcf 3213 if (cfg->fc_flags & RTF_CACHE) {
23fb93a4
DA
3214 int rc;
3215
2b760fcf
WW
3216 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3217 &cfg->fc_src);
23fb93a4
DA
3218 if (rt_cache) {
3219 rc = ip6_del_cached_rt(rt_cache, cfg);
3220 if (rc != -ESRCH)
3221 return rc;
3222 }
3223 continue;
2b760fcf 3224 }
86872cb5 3225 if (cfg->fc_ifindex &&
5e670d84
DA
3226 (!rt->fib6_nh.nh_dev ||
3227 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3228 continue;
86872cb5 3229 if (cfg->fc_flags & RTF_GATEWAY &&
5e670d84 3230 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
1da177e4 3231 continue;
93c2fb25 3232 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3233 continue;
93c2fb25 3234 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3235 continue;
93531c67 3236 fib6_info_hold(rt);
66f5d6ce 3237 rcu_read_unlock();
1da177e4 3238
0ae81335
DA
3239 /* if gateway was specified only delete the one hop */
3240 if (cfg->fc_flags & RTF_GATEWAY)
3241 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3242
3243 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3244 }
3245 }
66f5d6ce 3246 rcu_read_unlock();
1da177e4
LT
3247
3248 return err;
3249}
3250
6700c270 3251static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3252{
a6279458 3253 struct netevent_redirect netevent;
e8599ff4 3254 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3255 struct ndisc_options ndopts;
3256 struct inet6_dev *in6_dev;
3257 struct neighbour *neigh;
a68886a6 3258 struct fib6_info *from;
71bcdba0 3259 struct rd_msg *msg;
6e157b6a
DM
3260 int optlen, on_link;
3261 u8 *lladdr;
e8599ff4 3262
29a3cad5 3263 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3264 optlen -= sizeof(*msg);
e8599ff4
DM
3265
3266 if (optlen < 0) {
6e157b6a 3267 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3268 return;
3269 }
3270
71bcdba0 3271 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3272
71bcdba0 3273 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3274 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3275 return;
3276 }
3277
6e157b6a 3278 on_link = 0;
71bcdba0 3279 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3280 on_link = 1;
71bcdba0 3281 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3282 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3283 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3284 return;
3285 }
3286
3287 in6_dev = __in6_dev_get(skb->dev);
3288 if (!in6_dev)
3289 return;
3290 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3291 return;
3292
3293 /* RFC2461 8.1:
3294 * The IP source address of the Redirect MUST be the same as the current
3295 * first-hop router for the specified ICMP Destination Address.
3296 */
3297
f997c55c 3298 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3299 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3300 return;
3301 }
6e157b6a
DM
3302
3303 lladdr = NULL;
e8599ff4
DM
3304 if (ndopts.nd_opts_tgt_lladdr) {
3305 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3306 skb->dev);
3307 if (!lladdr) {
3308 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3309 return;
3310 }
3311 }
3312
6e157b6a 3313 rt = (struct rt6_info *) dst;
ec13ad1d 3314 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3315 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3316 return;
6e157b6a 3317 }
e8599ff4 3318
6e157b6a
DM
3319 /* Redirect received -> path was valid.
3320 * Look, redirects are sent only in response to data packets,
3321 * so that this nexthop apparently is reachable. --ANK
3322 */
0dec879f 3323 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3324
71bcdba0 3325 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3326 if (!neigh)
3327 return;
a6279458 3328
1da177e4
LT
3329 /*
3330 * We have finally decided to accept it.
3331 */
3332
f997c55c 3333 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3334 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3335 NEIGH_UPDATE_F_OVERRIDE|
3336 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3337 NEIGH_UPDATE_F_ISROUTER)),
3338 NDISC_REDIRECT, &ndopts);
1da177e4 3339
4d85cd0c 3340 rcu_read_lock();
a68886a6
DA
3341 from = rcu_dereference(rt->from);
3342 nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
4d85cd0c 3343 rcu_read_unlock();
38308473 3344 if (!nrt)
1da177e4
LT
3345 goto out;
3346
3347 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3348 if (on_link)
3349 nrt->rt6i_flags &= ~RTF_GATEWAY;
3350
4e3fd7a0 3351 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3352
2b760fcf
WW
3353 /* No need to remove rt from the exception table if rt is
3354 * a cached route because rt6_insert_exception() will
3355 * takes care of it
3356 */
d4ead6b3 3357 if (rt6_insert_exception(nrt, rt->from)) {
2b760fcf
WW
3358 dst_release_immediate(&nrt->dst);
3359 goto out;
3360 }
1da177e4 3361
d8d1f30b
CG
3362 netevent.old = &rt->dst;
3363 netevent.new = &nrt->dst;
71bcdba0 3364 netevent.daddr = &msg->dest;
60592833 3365 netevent.neigh = neigh;
8d71740c
TT
3366 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3367
1da177e4 3368out:
e8599ff4 3369 neigh_release(neigh);
6e157b6a
DM
3370}
3371
70ceb4f5 3372#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3373static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3374 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3375 const struct in6_addr *gwaddr,
3376 struct net_device *dev)
70ceb4f5 3377{
830218c1
DA
3378 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3379 int ifindex = dev->ifindex;
70ceb4f5 3380 struct fib6_node *fn;
8d1c802b 3381 struct fib6_info *rt = NULL;
c71099ac
TG
3382 struct fib6_table *table;
3383
830218c1 3384 table = fib6_get_table(net, tb_id);
38308473 3385 if (!table)
c71099ac 3386 return NULL;
70ceb4f5 3387
66f5d6ce 3388 rcu_read_lock();
38fbeeee 3389 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3390 if (!fn)
3391 goto out;
3392
66f5d6ce 3393 for_each_fib6_node_rt_rcu(fn) {
5e670d84 3394 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
70ceb4f5 3395 continue;
93c2fb25 3396 if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
70ceb4f5 3397 continue;
5e670d84 3398 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
70ceb4f5 3399 continue;
8d1c802b 3400 fib6_info_hold(rt);
70ceb4f5
YH
3401 break;
3402 }
3403out:
66f5d6ce 3404 rcu_read_unlock();
70ceb4f5
YH
3405 return rt;
3406}
3407
8d1c802b 3408static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3409 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3410 const struct in6_addr *gwaddr,
3411 struct net_device *dev,
95c96174 3412 unsigned int pref)
70ceb4f5 3413{
86872cb5 3414 struct fib6_config cfg = {
238fc7ea 3415 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3416 .fc_ifindex = dev->ifindex,
86872cb5
TG
3417 .fc_dst_len = prefixlen,
3418 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3419 RTF_UP | RTF_PREF(pref),
b91d5329 3420 .fc_protocol = RTPROT_RA,
e8478e80 3421 .fc_type = RTN_UNICAST,
15e47304 3422 .fc_nlinfo.portid = 0,
efa2cea0
DL
3423 .fc_nlinfo.nlh = NULL,
3424 .fc_nlinfo.nl_net = net,
86872cb5
TG
3425 };
3426
830218c1 3427 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3428 cfg.fc_dst = *prefix;
3429 cfg.fc_gateway = *gwaddr;
70ceb4f5 3430
e317da96
YH
3431 /* We should treat it as a default route if prefix length is 0. */
3432 if (!prefixlen)
86872cb5 3433 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3434
acb54e3c 3435 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3436
830218c1 3437 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3438}
3439#endif
3440
8d1c802b 3441struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3442 const struct in6_addr *addr,
3443 struct net_device *dev)
1ab1457c 3444{
830218c1 3445 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3446 struct fib6_info *rt;
c71099ac 3447 struct fib6_table *table;
1da177e4 3448
afb1d4b5 3449 table = fib6_get_table(net, tb_id);
38308473 3450 if (!table)
c71099ac 3451 return NULL;
1da177e4 3452
66f5d6ce
WW
3453 rcu_read_lock();
3454 for_each_fib6_node_rt_rcu(&table->tb6_root) {
5e670d84 3455 if (dev == rt->fib6_nh.nh_dev &&
93c2fb25 3456 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
5e670d84 3457 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
1da177e4
LT
3458 break;
3459 }
3460 if (rt)
8d1c802b 3461 fib6_info_hold(rt);
66f5d6ce 3462 rcu_read_unlock();
1da177e4
LT
3463 return rt;
3464}
3465
8d1c802b 3466struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3467 const struct in6_addr *gwaddr,
ebacaaa0
YH
3468 struct net_device *dev,
3469 unsigned int pref)
1da177e4 3470{
86872cb5 3471 struct fib6_config cfg = {
ca254490 3472 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3473 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3474 .fc_ifindex = dev->ifindex,
3475 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3476 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3477 .fc_protocol = RTPROT_RA,
e8478e80 3478 .fc_type = RTN_UNICAST,
15e47304 3479 .fc_nlinfo.portid = 0,
5578689a 3480 .fc_nlinfo.nlh = NULL,
afb1d4b5 3481 .fc_nlinfo.nl_net = net,
86872cb5 3482 };
1da177e4 3483
4e3fd7a0 3484 cfg.fc_gateway = *gwaddr;
1da177e4 3485
acb54e3c 3486 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3487 struct fib6_table *table;
3488
3489 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3490 if (table)
3491 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3492 }
1da177e4 3493
afb1d4b5 3494 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3495}
3496
afb1d4b5
DA
3497static void __rt6_purge_dflt_routers(struct net *net,
3498 struct fib6_table *table)
1da177e4 3499{
8d1c802b 3500 struct fib6_info *rt;
1da177e4
LT
3501
3502restart:
66f5d6ce
WW
3503 rcu_read_lock();
3504 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3505 struct net_device *dev = fib6_info_nh_dev(rt);
3506 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3507
93c2fb25 3508 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
dcd1f572 3509 (!idev || idev->cnf.accept_ra != 2)) {
93531c67
DA
3510 fib6_info_hold(rt);
3511 rcu_read_unlock();
3512 ip6_del_rt(net, rt);
1da177e4
LT
3513 goto restart;
3514 }
3515 }
66f5d6ce 3516 rcu_read_unlock();
830218c1
DA
3517
3518 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3519}
3520
3521void rt6_purge_dflt_routers(struct net *net)
3522{
3523 struct fib6_table *table;
3524 struct hlist_head *head;
3525 unsigned int h;
3526
3527 rcu_read_lock();
3528
3529 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3530 head = &net->ipv6.fib_table_hash[h];
3531 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3532 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3533 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3534 }
3535 }
3536
3537 rcu_read_unlock();
1da177e4
LT
3538}
3539
5578689a
DL
3540static void rtmsg_to_fib6_config(struct net *net,
3541 struct in6_rtmsg *rtmsg,
86872cb5
TG
3542 struct fib6_config *cfg)
3543{
3544 memset(cfg, 0, sizeof(*cfg));
3545
ca254490
DA
3546 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3547 : RT6_TABLE_MAIN;
86872cb5
TG
3548 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3549 cfg->fc_metric = rtmsg->rtmsg_metric;
3550 cfg->fc_expires = rtmsg->rtmsg_info;
3551 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3552 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3553 cfg->fc_flags = rtmsg->rtmsg_flags;
e8478e80 3554 cfg->fc_type = rtmsg->rtmsg_type;
86872cb5 3555
5578689a 3556 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3557
4e3fd7a0
AD
3558 cfg->fc_dst = rtmsg->rtmsg_dst;
3559 cfg->fc_src = rtmsg->rtmsg_src;
3560 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3561}
3562
5578689a 3563int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3564{
86872cb5 3565 struct fib6_config cfg;
1da177e4
LT
3566 struct in6_rtmsg rtmsg;
3567 int err;
3568
67ba4152 3569 switch (cmd) {
1da177e4
LT
3570 case SIOCADDRT: /* Add a route */
3571 case SIOCDELRT: /* Delete a route */
af31f412 3572 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3573 return -EPERM;
3574 err = copy_from_user(&rtmsg, arg,
3575 sizeof(struct in6_rtmsg));
3576 if (err)
3577 return -EFAULT;
86872cb5 3578
5578689a 3579 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3580
1da177e4
LT
3581 rtnl_lock();
3582 switch (cmd) {
3583 case SIOCADDRT:
acb54e3c 3584 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3585 break;
3586 case SIOCDELRT:
333c4301 3587 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3588 break;
3589 default:
3590 err = -EINVAL;
3591 }
3592 rtnl_unlock();
3593
3594 return err;
3ff50b79 3595 }
1da177e4
LT
3596
3597 return -EINVAL;
3598}
3599
3600/*
3601 * Drop the packet on the floor
3602 */
3603
d5fdd6ba 3604static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3605{
612f09e8 3606 int type;
adf30907 3607 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3608 switch (ipstats_mib_noroutes) {
3609 case IPSTATS_MIB_INNOROUTES:
0660e03f 3610 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3611 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3612 IP6_INC_STATS(dev_net(dst->dev),
3613 __in6_dev_get_safely(skb->dev),
3bd653c8 3614 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3615 break;
3616 }
3617 /* FALLTHROUGH */
3618 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3619 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3620 ipstats_mib_noroutes);
612f09e8
YH
3621 break;
3622 }
3ffe533c 3623 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3624 kfree_skb(skb);
3625 return 0;
3626}
3627
9ce8ade0
TG
3628static int ip6_pkt_discard(struct sk_buff *skb)
3629{
612f09e8 3630 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3631}
3632
ede2059d 3633static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3634{
adf30907 3635 skb->dev = skb_dst(skb)->dev;
612f09e8 3636 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3637}
3638
9ce8ade0
TG
3639static int ip6_pkt_prohibit(struct sk_buff *skb)
3640{
612f09e8 3641 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3642}
3643
ede2059d 3644static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3645{
adf30907 3646 skb->dev = skb_dst(skb)->dev;
612f09e8 3647 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3648}
3649
1da177e4
LT
3650/*
3651 * Allocate a dst for local (unicast / anycast) address.
3652 */
3653
360a9887
DA
3654struct fib6_info *addrconf_f6i_alloc(struct net *net,
3655 struct inet6_dev *idev,
3656 const struct in6_addr *addr,
3657 bool anycast, gfp_t gfp_flags)
1da177e4 3658{
ca254490 3659 u32 tb_id;
4832c30d 3660 struct net_device *dev = idev->dev;
360a9887 3661 struct fib6_info *f6i;
5f02ce24 3662
360a9887
DA
3663 f6i = fib6_info_alloc(gfp_flags);
3664 if (!f6i)
1da177e4
LT
3665 return ERR_PTR(-ENOMEM);
3666
360a9887 3667 f6i->dst_nocount = true;
360a9887
DA
3668 f6i->dst_host = true;
3669 f6i->fib6_protocol = RTPROT_KERNEL;
3670 f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
e8478e80 3671 if (anycast) {
360a9887
DA
3672 f6i->fib6_type = RTN_ANYCAST;
3673 f6i->fib6_flags |= RTF_ANYCAST;
e8478e80 3674 } else {
360a9887
DA
3675 f6i->fib6_type = RTN_LOCAL;
3676 f6i->fib6_flags |= RTF_LOCAL;
e8478e80 3677 }
1da177e4 3678
360a9887 3679 f6i->fib6_nh.nh_gw = *addr;
93531c67 3680 dev_hold(dev);
360a9887
DA
3681 f6i->fib6_nh.nh_dev = dev;
3682 f6i->fib6_dst.addr = *addr;
3683 f6i->fib6_dst.plen = 128;
ca254490 3684 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
360a9887 3685 f6i->fib6_table = fib6_get_table(net, tb_id);
1da177e4 3686
360a9887 3687 return f6i;
1da177e4
LT
3688}
3689
c3968a85
DW
3690/* remove deleted ip from prefsrc entries */
3691struct arg_dev_net_ip {
3692 struct net_device *dev;
3693 struct net *net;
3694 struct in6_addr *addr;
3695};
3696
8d1c802b 3697static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
3698{
3699 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3700 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3701 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3702
5e670d84 3703 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
421842ed 3704 rt != net->ipv6.fib6_null_entry &&
93c2fb25 3705 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 3706 spin_lock_bh(&rt6_exception_lock);
c3968a85 3707 /* remove prefsrc entry */
93c2fb25 3708 rt->fib6_prefsrc.plen = 0;
60006a48
WW
3709 /* need to update cache as well */
3710 rt6_exceptions_remove_prefsrc(rt);
3711 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3712 }
3713 return 0;
3714}
3715
3716void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3717{
3718 struct net *net = dev_net(ifp->idev->dev);
3719 struct arg_dev_net_ip adni = {
3720 .dev = ifp->idev->dev,
3721 .net = net,
3722 .addr = &ifp->addr,
3723 };
0c3584d5 3724 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3725}
3726
be7a010d 3727#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3728
3729/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 3730static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
3731{
3732 struct in6_addr *gateway = (struct in6_addr *)arg;
3733
93c2fb25 3734 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
5e670d84 3735 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
be7a010d
DJ
3736 return -1;
3737 }
b16cb459
WW
3738
3739 /* Further clean up cached routes in exception table.
3740 * This is needed because cached route may have a different
3741 * gateway than its 'parent' in the case of an ip redirect.
3742 */
3743 rt6_exceptions_clean_tohost(rt, gateway);
3744
be7a010d
DJ
3745 return 0;
3746}
3747
3748void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3749{
3750 fib6_clean_all(net, fib6_clean_tohost, gateway);
3751}
3752
2127d95a
IS
3753struct arg_netdev_event {
3754 const struct net_device *dev;
4c981e28
IS
3755 union {
3756 unsigned int nh_flags;
3757 unsigned long event;
3758 };
2127d95a
IS
3759};
3760
8d1c802b 3761static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 3762{
8d1c802b 3763 struct fib6_info *iter;
d7dedee1
IS
3764 struct fib6_node *fn;
3765
93c2fb25
DA
3766 fn = rcu_dereference_protected(rt->fib6_node,
3767 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3768 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 3769 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3770 while (iter) {
93c2fb25 3771 if (iter->fib6_metric == rt->fib6_metric &&
d7dedee1
IS
3772 rt6_qualify_for_ecmp(iter))
3773 return iter;
3774 iter = rcu_dereference_protected(iter->rt6_next,
93c2fb25 3775 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
3776 }
3777
3778 return NULL;
3779}
3780
8d1c802b 3781static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 3782{
5e670d84
DA
3783 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3784 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
dcd1f572 3785 fib6_ignore_linkdown(rt)))
d7dedee1
IS
3786 return true;
3787
3788 return false;
3789}
3790
8d1c802b 3791static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 3792{
8d1c802b 3793 struct fib6_info *iter;
d7dedee1
IS
3794 int total = 0;
3795
3796 if (!rt6_is_dead(rt))
5e670d84 3797 total += rt->fib6_nh.nh_weight;
d7dedee1 3798
93c2fb25 3799 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 3800 if (!rt6_is_dead(iter))
5e670d84 3801 total += iter->fib6_nh.nh_weight;
d7dedee1
IS
3802 }
3803
3804 return total;
3805}
3806
8d1c802b 3807static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
3808{
3809 int upper_bound = -1;
3810
3811 if (!rt6_is_dead(rt)) {
5e670d84 3812 *weight += rt->fib6_nh.nh_weight;
d7dedee1
IS
3813 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3814 total) - 1;
3815 }
5e670d84 3816 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
d7dedee1
IS
3817}
3818
8d1c802b 3819static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 3820{
8d1c802b 3821 struct fib6_info *iter;
d7dedee1
IS
3822 int weight = 0;
3823
3824 rt6_upper_bound_set(rt, &weight, total);
3825
93c2fb25 3826 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
3827 rt6_upper_bound_set(iter, &weight, total);
3828}
3829
8d1c802b 3830void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 3831{
8d1c802b 3832 struct fib6_info *first;
d7dedee1
IS
3833 int total;
3834
3835 /* In case the entire multipath route was marked for flushing,
3836 * then there is no need to rebalance upon the removal of every
3837 * sibling route.
3838 */
93c2fb25 3839 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
3840 return;
3841
3842 /* During lookup routes are evaluated in order, so we need to
3843 * make sure upper bounds are assigned from the first sibling
3844 * onwards.
3845 */
3846 first = rt6_multipath_first_sibling(rt);
3847 if (WARN_ON_ONCE(!first))
3848 return;
3849
3850 total = rt6_multipath_total_weight(first);
3851 rt6_multipath_upper_bound_set(first, total);
3852}
3853
8d1c802b 3854static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
3855{
3856 const struct arg_netdev_event *arg = p_arg;
7aef6859 3857 struct net *net = dev_net(arg->dev);
2127d95a 3858
421842ed 3859 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
5e670d84 3860 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
7aef6859 3861 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3862 rt6_multipath_rebalance(rt);
1de178ed 3863 }
2127d95a
IS
3864
3865 return 0;
3866}
3867
3868void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3869{
3870 struct arg_netdev_event arg = {
3871 .dev = dev,
6802f3ad
IS
3872 {
3873 .nh_flags = nh_flags,
3874 },
2127d95a
IS
3875 };
3876
3877 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3878 arg.nh_flags |= RTNH_F_LINKDOWN;
3879
3880 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3881}
3882
8d1c802b 3883static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
3884 const struct net_device *dev)
3885{
8d1c802b 3886 struct fib6_info *iter;
1de178ed 3887
5e670d84 3888 if (rt->fib6_nh.nh_dev == dev)
1de178ed 3889 return true;
93c2fb25 3890 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84 3891 if (iter->fib6_nh.nh_dev == dev)
1de178ed
IS
3892 return true;
3893
3894 return false;
3895}
3896
8d1c802b 3897static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 3898{
8d1c802b 3899 struct fib6_info *iter;
1de178ed
IS
3900
3901 rt->should_flush = 1;
93c2fb25 3902 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
3903 iter->should_flush = 1;
3904}
3905
8d1c802b 3906static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
3907 const struct net_device *down_dev)
3908{
8d1c802b 3909 struct fib6_info *iter;
1de178ed
IS
3910 unsigned int dead = 0;
3911
5e670d84
DA
3912 if (rt->fib6_nh.nh_dev == down_dev ||
3913 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed 3914 dead++;
93c2fb25 3915 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
3916 if (iter->fib6_nh.nh_dev == down_dev ||
3917 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
3918 dead++;
3919
3920 return dead;
3921}
3922
8d1c802b 3923static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed
IS
3924 const struct net_device *dev,
3925 unsigned int nh_flags)
3926{
8d1c802b 3927 struct fib6_info *iter;
1de178ed 3928
5e670d84
DA
3929 if (rt->fib6_nh.nh_dev == dev)
3930 rt->fib6_nh.nh_flags |= nh_flags;
93c2fb25 3931 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
3932 if (iter->fib6_nh.nh_dev == dev)
3933 iter->fib6_nh.nh_flags |= nh_flags;
1de178ed
IS
3934}
3935
a1a22c12 3936/* called with write lock held for table with rt */
8d1c802b 3937static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 3938{
4c981e28
IS
3939 const struct arg_netdev_event *arg = p_arg;
3940 const struct net_device *dev = arg->dev;
7aef6859 3941 struct net *net = dev_net(dev);
8ed67789 3942
421842ed 3943 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
3944 return 0;
3945
3946 switch (arg->event) {
3947 case NETDEV_UNREGISTER:
5e670d84 3948 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
27c6fa73 3949 case NETDEV_DOWN:
1de178ed 3950 if (rt->should_flush)
27c6fa73 3951 return -1;
93c2fb25 3952 if (!rt->fib6_nsiblings)
5e670d84 3953 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
1de178ed
IS
3954 if (rt6_multipath_uses_dev(rt, dev)) {
3955 unsigned int count;
3956
3957 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 3958 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
3959 rt6_multipath_flush(rt);
3960 return -1;
3961 }
3962 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3963 RTNH_F_LINKDOWN);
7aef6859 3964 fib6_update_sernum(net, rt);
d7dedee1 3965 rt6_multipath_rebalance(rt);
1de178ed
IS
3966 }
3967 return -2;
27c6fa73 3968 case NETDEV_CHANGE:
5e670d84 3969 if (rt->fib6_nh.nh_dev != dev ||
93c2fb25 3970 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 3971 break;
5e670d84 3972 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 3973 rt6_multipath_rebalance(rt);
27c6fa73 3974 break;
2b241361 3975 }
c159d30c 3976
1da177e4
LT
3977 return 0;
3978}
3979
27c6fa73 3980void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 3981{
4c981e28 3982 struct arg_netdev_event arg = {
8ed67789 3983 .dev = dev,
6802f3ad
IS
3984 {
3985 .event = event,
3986 },
8ed67789
DL
3987 };
3988
4c981e28
IS
3989 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
3990}
3991
3992void rt6_disable_ip(struct net_device *dev, unsigned long event)
3993{
3994 rt6_sync_down_dev(dev, event);
3995 rt6_uncached_list_flush_dev(dev_net(dev), dev);
3996 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
3997}
3998
95c96174 3999struct rt6_mtu_change_arg {
1da177e4 4000 struct net_device *dev;
95c96174 4001 unsigned int mtu;
1da177e4
LT
4002};
4003
8d1c802b 4004static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4005{
4006 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4007 struct inet6_dev *idev;
4008
4009 /* In IPv6 pmtu discovery is not optional,
4010 so that RTAX_MTU lock cannot disable it.
4011 We still use this lock to block changes
4012 caused by addrconf/ndisc.
4013 */
4014
4015 idev = __in6_dev_get(arg->dev);
38308473 4016 if (!idev)
1da177e4
LT
4017 return 0;
4018
4019 /* For administrative MTU increase, there is no way to discover
4020 IPv6 PMTU increase, so PMTU increase should be updated here.
4021 Since RFC 1981 doesn't include administrative MTU increase
4022 update PMTU increase is a MUST. (i.e. jumbo frame)
4023 */
5e670d84 4024 if (rt->fib6_nh.nh_dev == arg->dev &&
d4ead6b3
DA
4025 !fib6_metric_locked(rt, RTAX_MTU)) {
4026 u32 mtu = rt->fib6_pmtu;
4027
4028 if (mtu >= arg->mtu ||
4029 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4030 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4031
f5bbe7ee 4032 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4033 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4034 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4035 }
1da177e4
LT
4036 return 0;
4037}
4038
95c96174 4039void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4040{
c71099ac
TG
4041 struct rt6_mtu_change_arg arg = {
4042 .dev = dev,
4043 .mtu = mtu,
4044 };
1da177e4 4045
0c3584d5 4046 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4047}
4048
ef7c79ed 4049static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4050 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 4051 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4052 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4053 [RTA_PRIORITY] = { .type = NLA_U32 },
4054 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4055 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4056 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4057 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4058 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4059 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4060 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4061 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
4062};
4063
4064static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4065 struct fib6_config *cfg,
4066 struct netlink_ext_ack *extack)
1da177e4 4067{
86872cb5
TG
4068 struct rtmsg *rtm;
4069 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4070 unsigned int pref;
86872cb5 4071 int err;
1da177e4 4072
fceb6435
JB
4073 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4074 NULL);
86872cb5
TG
4075 if (err < 0)
4076 goto errout;
1da177e4 4077
86872cb5
TG
4078 err = -EINVAL;
4079 rtm = nlmsg_data(nlh);
4080 memset(cfg, 0, sizeof(*cfg));
4081
4082 cfg->fc_table = rtm->rtm_table;
4083 cfg->fc_dst_len = rtm->rtm_dst_len;
4084 cfg->fc_src_len = rtm->rtm_src_len;
4085 cfg->fc_flags = RTF_UP;
4086 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 4087 cfg->fc_type = rtm->rtm_type;
86872cb5 4088
ef2c7d7b
ND
4089 if (rtm->rtm_type == RTN_UNREACHABLE ||
4090 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4091 rtm->rtm_type == RTN_PROHIBIT ||
4092 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4093 cfg->fc_flags |= RTF_REJECT;
4094
ab79ad14
4095 if (rtm->rtm_type == RTN_LOCAL)
4096 cfg->fc_flags |= RTF_LOCAL;
4097
1f56a01f
MKL
4098 if (rtm->rtm_flags & RTM_F_CLONED)
4099 cfg->fc_flags |= RTF_CACHE;
4100
fc1e64e1
DA
4101 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4102
15e47304 4103 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 4104 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 4105 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
4106
4107 if (tb[RTA_GATEWAY]) {
67b61f6c 4108 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4109 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4110 }
86872cb5
TG
4111
4112 if (tb[RTA_DST]) {
4113 int plen = (rtm->rtm_dst_len + 7) >> 3;
4114
4115 if (nla_len(tb[RTA_DST]) < plen)
4116 goto errout;
4117
4118 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4119 }
86872cb5
TG
4120
4121 if (tb[RTA_SRC]) {
4122 int plen = (rtm->rtm_src_len + 7) >> 3;
4123
4124 if (nla_len(tb[RTA_SRC]) < plen)
4125 goto errout;
4126
4127 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4128 }
86872cb5 4129
c3968a85 4130 if (tb[RTA_PREFSRC])
67b61f6c 4131 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4132
86872cb5
TG
4133 if (tb[RTA_OIF])
4134 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4135
4136 if (tb[RTA_PRIORITY])
4137 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4138
4139 if (tb[RTA_METRICS]) {
4140 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4141 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4142 }
86872cb5
TG
4143
4144 if (tb[RTA_TABLE])
4145 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4146
51ebd318
ND
4147 if (tb[RTA_MULTIPATH]) {
4148 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4149 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4150
4151 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4152 cfg->fc_mp_len, extack);
9ed59592
DA
4153 if (err < 0)
4154 goto errout;
51ebd318
ND
4155 }
4156
c78ba6d6
LR
4157 if (tb[RTA_PREF]) {
4158 pref = nla_get_u8(tb[RTA_PREF]);
4159 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4160 pref != ICMPV6_ROUTER_PREF_HIGH)
4161 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4162 cfg->fc_flags |= RTF_PREF(pref);
4163 }
4164
19e42e45
RP
4165 if (tb[RTA_ENCAP])
4166 cfg->fc_encap = tb[RTA_ENCAP];
4167
9ed59592 4168 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4169 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4170
c255bd68 4171 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4172 if (err < 0)
4173 goto errout;
4174 }
4175
32bc201e
XL
4176 if (tb[RTA_EXPIRES]) {
4177 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4178
4179 if (addrconf_finite_timeout(timeout)) {
4180 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4181 cfg->fc_flags |= RTF_EXPIRES;
4182 }
4183 }
4184
86872cb5
TG
4185 err = 0;
4186errout:
4187 return err;
1da177e4
LT
4188}
4189
6b9ea5a6 4190struct rt6_nh {
8d1c802b 4191 struct fib6_info *fib6_info;
6b9ea5a6 4192 struct fib6_config r_cfg;
6b9ea5a6
RP
4193 struct list_head next;
4194};
4195
4196static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4197{
4198 struct rt6_nh *nh;
4199
4200 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4201 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4202 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4203 nh->r_cfg.fc_ifindex);
4204 }
4205}
4206
d4ead6b3
DA
4207static int ip6_route_info_append(struct net *net,
4208 struct list_head *rt6_nh_list,
8d1c802b
DA
4209 struct fib6_info *rt,
4210 struct fib6_config *r_cfg)
6b9ea5a6
RP
4211{
4212 struct rt6_nh *nh;
6b9ea5a6
RP
4213 int err = -EEXIST;
4214
4215 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4216 /* check if fib6_info already exists */
4217 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4218 return err;
4219 }
4220
4221 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4222 if (!nh)
4223 return -ENOMEM;
8d1c802b 4224 nh->fib6_info = rt;
d4ead6b3 4225 err = ip6_convert_metrics(net, rt, r_cfg);
6b9ea5a6
RP
4226 if (err) {
4227 kfree(nh);
4228 return err;
4229 }
4230 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4231 list_add_tail(&nh->next, rt6_nh_list);
4232
4233 return 0;
4234}
4235
8d1c802b
DA
4236static void ip6_route_mpath_notify(struct fib6_info *rt,
4237 struct fib6_info *rt_last,
3b1137fe
DA
4238 struct nl_info *info,
4239 __u16 nlflags)
4240{
4241 /* if this is an APPEND route, then rt points to the first route
4242 * inserted and rt_last points to last route inserted. Userspace
4243 * wants a consistent dump of the route which starts at the first
4244 * nexthop. Since sibling routes are always added at the end of
4245 * the list, find the first sibling of the last route appended
4246 */
93c2fb25
DA
4247 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4248 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4249 struct fib6_info,
93c2fb25 4250 fib6_siblings);
3b1137fe
DA
4251 }
4252
4253 if (rt)
4254 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4255}
4256
333c4301
DA
4257static int ip6_route_multipath_add(struct fib6_config *cfg,
4258 struct netlink_ext_ack *extack)
51ebd318 4259{
8d1c802b 4260 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4261 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4262 struct fib6_config r_cfg;
4263 struct rtnexthop *rtnh;
8d1c802b 4264 struct fib6_info *rt;
6b9ea5a6
RP
4265 struct rt6_nh *err_nh;
4266 struct rt6_nh *nh, *nh_safe;
3b1137fe 4267 __u16 nlflags;
51ebd318
ND
4268 int remaining;
4269 int attrlen;
6b9ea5a6
RP
4270 int err = 1;
4271 int nhn = 0;
4272 int replace = (cfg->fc_nlinfo.nlh &&
4273 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4274 LIST_HEAD(rt6_nh_list);
51ebd318 4275
3b1137fe
DA
4276 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4277 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4278 nlflags |= NLM_F_APPEND;
4279
35f1b4e9 4280 remaining = cfg->fc_mp_len;
51ebd318 4281 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4282
6b9ea5a6 4283 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4284 * fib6_info structs per nexthop
6b9ea5a6 4285 */
51ebd318
ND
4286 while (rtnh_ok(rtnh, remaining)) {
4287 memcpy(&r_cfg, cfg, sizeof(*cfg));
4288 if (rtnh->rtnh_ifindex)
4289 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4290
4291 attrlen = rtnh_attrlen(rtnh);
4292 if (attrlen > 0) {
4293 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4294
4295 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4296 if (nla) {
67b61f6c 4297 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4298 r_cfg.fc_flags |= RTF_GATEWAY;
4299 }
19e42e45
RP
4300 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4301 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4302 if (nla)
4303 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4304 }
6b9ea5a6 4305
68e2ffde 4306 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4307 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4308 if (IS_ERR(rt)) {
4309 err = PTR_ERR(rt);
4310 rt = NULL;
6b9ea5a6 4311 goto cleanup;
8c5b83f0 4312 }
6b9ea5a6 4313
5e670d84 4314 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
398958ae 4315
d4ead6b3
DA
4316 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4317 rt, &r_cfg);
51ebd318 4318 if (err) {
93531c67 4319 fib6_info_release(rt);
6b9ea5a6
RP
4320 goto cleanup;
4321 }
4322
4323 rtnh = rtnh_next(rtnh, &remaining);
4324 }
4325
3b1137fe
DA
4326 /* for add and replace send one notification with all nexthops.
4327 * Skip the notification in fib6_add_rt2node and send one with
4328 * the full route when done
4329 */
4330 info->skip_notify = 1;
4331
6b9ea5a6
RP
4332 err_nh = NULL;
4333 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4334 rt_last = nh->fib6_info;
4335 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4336 fib6_info_release(nh->fib6_info);
93531c67 4337
3b1137fe
DA
4338 /* save reference to first route for notification */
4339 if (!rt_notif && !err)
8d1c802b 4340 rt_notif = nh->fib6_info;
3b1137fe 4341
8d1c802b
DA
4342 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4343 nh->fib6_info = NULL;
6b9ea5a6
RP
4344 if (err) {
4345 if (replace && nhn)
4346 ip6_print_replace_route_err(&rt6_nh_list);
4347 err_nh = nh;
4348 goto add_errout;
51ebd318 4349 }
6b9ea5a6 4350
1a72418b 4351 /* Because each route is added like a single route we remove
27596472
MK
4352 * these flags after the first nexthop: if there is a collision,
4353 * we have already failed to add the first nexthop:
4354 * fib6_add_rt2node() has rejected it; when replacing, old
4355 * nexthops have been replaced by first new, the rest should
4356 * be added to it.
1a72418b 4357 */
27596472
MK
4358 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4359 NLM_F_REPLACE);
6b9ea5a6
RP
4360 nhn++;
4361 }
4362
3b1137fe
DA
4363 /* success ... tell user about new route */
4364 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4365 goto cleanup;
4366
4367add_errout:
3b1137fe
DA
4368 /* send notification for routes that were added so that
4369 * the delete notifications sent by ip6_route_del are
4370 * coherent
4371 */
4372 if (rt_notif)
4373 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4374
6b9ea5a6
RP
4375 /* Delete routes that were already added */
4376 list_for_each_entry(nh, &rt6_nh_list, next) {
4377 if (err_nh == nh)
4378 break;
333c4301 4379 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4380 }
4381
4382cleanup:
4383 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4384 if (nh->fib6_info)
4385 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4386 list_del(&nh->next);
4387 kfree(nh);
4388 }
4389
4390 return err;
4391}
4392
333c4301
DA
4393static int ip6_route_multipath_del(struct fib6_config *cfg,
4394 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4395{
4396 struct fib6_config r_cfg;
4397 struct rtnexthop *rtnh;
4398 int remaining;
4399 int attrlen;
4400 int err = 1, last_err = 0;
4401
4402 remaining = cfg->fc_mp_len;
4403 rtnh = (struct rtnexthop *)cfg->fc_mp;
4404
4405 /* Parse a Multipath Entry */
4406 while (rtnh_ok(rtnh, remaining)) {
4407 memcpy(&r_cfg, cfg, sizeof(*cfg));
4408 if (rtnh->rtnh_ifindex)
4409 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4410
4411 attrlen = rtnh_attrlen(rtnh);
4412 if (attrlen > 0) {
4413 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4414
4415 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4416 if (nla) {
4417 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4418 r_cfg.fc_flags |= RTF_GATEWAY;
4419 }
4420 }
333c4301 4421 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4422 if (err)
4423 last_err = err;
4424
51ebd318
ND
4425 rtnh = rtnh_next(rtnh, &remaining);
4426 }
4427
4428 return last_err;
4429}
4430
c21ef3e3
DA
4431static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4432 struct netlink_ext_ack *extack)
1da177e4 4433{
86872cb5
TG
4434 struct fib6_config cfg;
4435 int err;
1da177e4 4436
333c4301 4437 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4438 if (err < 0)
4439 return err;
4440
51ebd318 4441 if (cfg.fc_mp)
333c4301 4442 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4443 else {
4444 cfg.fc_delete_all_nh = 1;
333c4301 4445 return ip6_route_del(&cfg, extack);
0ae81335 4446 }
1da177e4
LT
4447}
4448
c21ef3e3
DA
4449static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4450 struct netlink_ext_ack *extack)
1da177e4 4451{
86872cb5
TG
4452 struct fib6_config cfg;
4453 int err;
1da177e4 4454
333c4301 4455 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4456 if (err < 0)
4457 return err;
4458
51ebd318 4459 if (cfg.fc_mp)
333c4301 4460 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4461 else
acb54e3c 4462 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4463}
4464
8d1c802b 4465static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4466{
beb1afac
DA
4467 int nexthop_len = 0;
4468
93c2fb25 4469 if (rt->fib6_nsiblings) {
beb1afac
DA
4470 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4471 + NLA_ALIGN(sizeof(struct rtnexthop))
4472 + nla_total_size(16) /* RTA_GATEWAY */
5e670d84 4473 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
beb1afac 4474
93c2fb25 4475 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4476 }
4477
339bf98f
TG
4478 return NLMSG_ALIGN(sizeof(struct rtmsg))
4479 + nla_total_size(16) /* RTA_SRC */
4480 + nla_total_size(16) /* RTA_DST */
4481 + nla_total_size(16) /* RTA_GATEWAY */
4482 + nla_total_size(16) /* RTA_PREFSRC */
4483 + nla_total_size(4) /* RTA_TABLE */
4484 + nla_total_size(4) /* RTA_IIF */
4485 + nla_total_size(4) /* RTA_OIF */
4486 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4487 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4488 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4489 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4490 + nla_total_size(1) /* RTA_PREF */
5e670d84 4491 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
beb1afac
DA
4492 + nexthop_len;
4493}
4494
8d1c802b 4495static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
5be083ce 4496 unsigned int *flags, bool skip_oif)
beb1afac 4497{
5e670d84 4498 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
f9d882ea
IS
4499 *flags |= RTNH_F_DEAD;
4500
5e670d84 4501 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
beb1afac 4502 *flags |= RTNH_F_LINKDOWN;
dcd1f572
DA
4503
4504 rcu_read_lock();
4505 if (fib6_ignore_linkdown(rt))
beb1afac 4506 *flags |= RTNH_F_DEAD;
dcd1f572 4507 rcu_read_unlock();
beb1afac
DA
4508 }
4509
93c2fb25 4510 if (rt->fib6_flags & RTF_GATEWAY) {
5e670d84 4511 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
beb1afac
DA
4512 goto nla_put_failure;
4513 }
4514
5e670d84
DA
4515 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4516 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4517 *flags |= RTNH_F_OFFLOAD;
4518
5be083ce 4519 /* not needed for multipath encoding b/c it has a rtnexthop struct */
5e670d84
DA
4520 if (!skip_oif && rt->fib6_nh.nh_dev &&
4521 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
beb1afac
DA
4522 goto nla_put_failure;
4523
5e670d84
DA
4524 if (rt->fib6_nh.nh_lwtstate &&
4525 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
beb1afac
DA
4526 goto nla_put_failure;
4527
4528 return 0;
4529
4530nla_put_failure:
4531 return -EMSGSIZE;
4532}
4533
5be083ce 4534/* add multipath next hop */
8d1c802b 4535static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
beb1afac 4536{
5e670d84 4537 const struct net_device *dev = rt->fib6_nh.nh_dev;
beb1afac
DA
4538 struct rtnexthop *rtnh;
4539 unsigned int flags = 0;
4540
4541 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4542 if (!rtnh)
4543 goto nla_put_failure;
4544
5e670d84
DA
4545 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4546 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
beb1afac 4547
5be083ce 4548 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4549 goto nla_put_failure;
4550
4551 rtnh->rtnh_flags = flags;
4552
4553 /* length of rtnetlink header + attributes */
4554 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4555
4556 return 0;
4557
4558nla_put_failure:
4559 return -EMSGSIZE;
339bf98f
TG
4560}
4561
d4ead6b3 4562static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4563 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4564 struct in6_addr *dest, struct in6_addr *src,
15e47304 4565 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4566 unsigned int flags)
1da177e4
LT
4567{
4568 struct rtmsg *rtm;
2d7202bf 4569 struct nlmsghdr *nlh;
d4ead6b3
DA
4570 long expires = 0;
4571 u32 *pmetrics;
9e762a4a 4572 u32 table;
1da177e4 4573
15e47304 4574 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4575 if (!nlh)
26932566 4576 return -EMSGSIZE;
2d7202bf
TG
4577
4578 rtm = nlmsg_data(nlh);
1da177e4 4579 rtm->rtm_family = AF_INET6;
93c2fb25
DA
4580 rtm->rtm_dst_len = rt->fib6_dst.plen;
4581 rtm->rtm_src_len = rt->fib6_src.plen;
1da177e4 4582 rtm->rtm_tos = 0;
93c2fb25
DA
4583 if (rt->fib6_table)
4584 table = rt->fib6_table->tb6_id;
c71099ac 4585 else
9e762a4a
PM
4586 table = RT6_TABLE_UNSPEC;
4587 rtm->rtm_table = table;
c78679e8
DM
4588 if (nla_put_u32(skb, RTA_TABLE, table))
4589 goto nla_put_failure;
e8478e80
DA
4590
4591 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4592 rtm->rtm_flags = 0;
4593 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4594 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4595
93c2fb25 4596 if (rt->fib6_flags & RTF_CACHE)
1da177e4
LT
4597 rtm->rtm_flags |= RTM_F_CLONED;
4598
d4ead6b3
DA
4599 if (dest) {
4600 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4601 goto nla_put_failure;
1ab1457c 4602 rtm->rtm_dst_len = 128;
1da177e4 4603 } else if (rtm->rtm_dst_len)
93c2fb25 4604 if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr))
c78679e8 4605 goto nla_put_failure;
1da177e4
LT
4606#ifdef CONFIG_IPV6_SUBTREES
4607 if (src) {
930345ea 4608 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4609 goto nla_put_failure;
1ab1457c 4610 rtm->rtm_src_len = 128;
c78679e8 4611 } else if (rtm->rtm_src_len &&
93c2fb25 4612 nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr))
c78679e8 4613 goto nla_put_failure;
1da177e4 4614#endif
7bc570c8
YH
4615 if (iif) {
4616#ifdef CONFIG_IPV6_MROUTE
93c2fb25 4617 if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) {
fd61c6ba
DA
4618 int err = ip6mr_get_route(net, skb, rtm, portid);
4619
4620 if (err == 0)
4621 return 0;
4622 if (err < 0)
4623 goto nla_put_failure;
7bc570c8
YH
4624 } else
4625#endif
c78679e8
DM
4626 if (nla_put_u32(skb, RTA_IIF, iif))
4627 goto nla_put_failure;
d4ead6b3 4628 } else if (dest) {
1da177e4 4629 struct in6_addr saddr_buf;
d4ead6b3 4630 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4631 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4632 goto nla_put_failure;
1da177e4 4633 }
2d7202bf 4634
93c2fb25 4635 if (rt->fib6_prefsrc.plen) {
c3968a85 4636 struct in6_addr saddr_buf;
93c2fb25 4637 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4638 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4639 goto nla_put_failure;
c3968a85
DW
4640 }
4641
d4ead6b3
DA
4642 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4643 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4644 goto nla_put_failure;
4645
93c2fb25 4646 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 4647 goto nla_put_failure;
8253947e 4648
beb1afac
DA
4649 /* For multipath routes, walk the siblings list and add
4650 * each as a nexthop within RTA_MULTIPATH.
4651 */
93c2fb25 4652 if (rt->fib6_nsiblings) {
8d1c802b 4653 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
4654 struct nlattr *mp;
4655
4656 mp = nla_nest_start(skb, RTA_MULTIPATH);
4657 if (!mp)
4658 goto nla_put_failure;
4659
4660 if (rt6_add_nexthop(skb, rt) < 0)
4661 goto nla_put_failure;
4662
4663 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 4664 &rt->fib6_siblings, fib6_siblings) {
beb1afac
DA
4665 if (rt6_add_nexthop(skb, sibling) < 0)
4666 goto nla_put_failure;
4667 }
4668
4669 nla_nest_end(skb, mp);
4670 } else {
5be083ce 4671 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4672 goto nla_put_failure;
4673 }
4674
93c2fb25 4675 if (rt->fib6_flags & RTF_EXPIRES) {
14895687
DA
4676 expires = dst ? dst->expires : rt->expires;
4677 expires -= jiffies;
4678 }
69cdf8f9 4679
d4ead6b3 4680 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4681 goto nla_put_failure;
2d7202bf 4682
93c2fb25 4683 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags)))
c78ba6d6
LR
4684 goto nla_put_failure;
4685
19e42e45 4686
053c095a
JB
4687 nlmsg_end(skb, nlh);
4688 return 0;
2d7202bf
TG
4689
4690nla_put_failure:
26932566
PM
4691 nlmsg_cancel(skb, nlh);
4692 return -EMSGSIZE;
1da177e4
LT
4693}
4694
8d1c802b 4695int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4696{
4697 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4698 struct net *net = arg->net;
4699
421842ed 4700 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4701 return 0;
1da177e4 4702
2d7202bf
TG
4703 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4704 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4705
4706 /* user wants prefix routes only */
4707 if (rtm->rtm_flags & RTM_F_PREFIX &&
93c2fb25 4708 !(rt->fib6_flags & RTF_PREFIX_RT)) {
f8cfe2ce
DA
4709 /* success since this is not a prefix route */
4710 return 1;
4711 }
4712 }
1da177e4 4713
d4ead6b3
DA
4714 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4715 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4716 arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
1da177e4
LT
4717}
4718
c21ef3e3
DA
4719static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4720 struct netlink_ext_ack *extack)
1da177e4 4721{
3b1e0a65 4722 struct net *net = sock_net(in_skb->sk);
ab364a6f 4723 struct nlattr *tb[RTA_MAX+1];
18c3a61c 4724 int err, iif = 0, oif = 0;
a68886a6 4725 struct fib6_info *from;
18c3a61c 4726 struct dst_entry *dst;
ab364a6f 4727 struct rt6_info *rt;
1da177e4 4728 struct sk_buff *skb;
ab364a6f 4729 struct rtmsg *rtm;
4c9483b2 4730 struct flowi6 fl6;
18c3a61c 4731 bool fibmatch;
1da177e4 4732
fceb6435 4733 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4734 extack);
ab364a6f
TG
4735 if (err < 0)
4736 goto errout;
1da177e4 4737
ab364a6f 4738 err = -EINVAL;
4c9483b2 4739 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4740 rtm = nlmsg_data(nlh);
4741 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4742 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4743
ab364a6f
TG
4744 if (tb[RTA_SRC]) {
4745 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4746 goto errout;
4747
4e3fd7a0 4748 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4749 }
4750
4751 if (tb[RTA_DST]) {
4752 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4753 goto errout;
4754
4e3fd7a0 4755 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4756 }
4757
4758 if (tb[RTA_IIF])
4759 iif = nla_get_u32(tb[RTA_IIF]);
4760
4761 if (tb[RTA_OIF])
72331bc0 4762 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4763
2e47b291
LC
4764 if (tb[RTA_MARK])
4765 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4766
622ec2c9
LC
4767 if (tb[RTA_UID])
4768 fl6.flowi6_uid = make_kuid(current_user_ns(),
4769 nla_get_u32(tb[RTA_UID]));
4770 else
4771 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4772
1da177e4
LT
4773 if (iif) {
4774 struct net_device *dev;
72331bc0
SL
4775 int flags = 0;
4776
121622db
FW
4777 rcu_read_lock();
4778
4779 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4780 if (!dev) {
121622db 4781 rcu_read_unlock();
1da177e4 4782 err = -ENODEV;
ab364a6f 4783 goto errout;
1da177e4 4784 }
72331bc0
SL
4785
4786 fl6.flowi6_iif = iif;
4787
4788 if (!ipv6_addr_any(&fl6.saddr))
4789 flags |= RT6_LOOKUP_F_HAS_SADDR;
4790
b75cc8f9 4791 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4792
4793 rcu_read_unlock();
72331bc0
SL
4794 } else {
4795 fl6.flowi6_oif = oif;
4796
58acfd71 4797 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4798 }
4799
18c3a61c
RP
4800
4801 rt = container_of(dst, struct rt6_info, dst);
4802 if (rt->dst.error) {
4803 err = rt->dst.error;
4804 ip6_rt_put(rt);
4805 goto errout;
1da177e4
LT
4806 }
4807
9d6acb3b
WC
4808 if (rt == net->ipv6.ip6_null_entry) {
4809 err = rt->dst.error;
4810 ip6_rt_put(rt);
4811 goto errout;
4812 }
4813
ab364a6f 4814 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4815 if (!skb) {
94e187c0 4816 ip6_rt_put(rt);
ab364a6f
TG
4817 err = -ENOBUFS;
4818 goto errout;
4819 }
1da177e4 4820
d8d1f30b 4821 skb_dst_set(skb, &rt->dst);
a68886a6
DA
4822
4823 rcu_read_lock();
4824 from = rcu_dereference(rt->from);
4825
18c3a61c 4826 if (fibmatch)
a68886a6 4827 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
18c3a61c
RP
4828 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4829 nlh->nlmsg_seq, 0);
4830 else
a68886a6
DA
4831 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
4832 &fl6.saddr, iif, RTM_NEWROUTE,
d4ead6b3
DA
4833 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4834 0);
a68886a6
DA
4835 rcu_read_unlock();
4836
1da177e4 4837 if (err < 0) {
ab364a6f
TG
4838 kfree_skb(skb);
4839 goto errout;
1da177e4
LT
4840 }
4841
15e47304 4842 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4843errout:
1da177e4 4844 return err;
1da177e4
LT
4845}
4846
8d1c802b 4847void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 4848 unsigned int nlm_flags)
1da177e4
LT
4849{
4850 struct sk_buff *skb;
5578689a 4851 struct net *net = info->nl_net;
528c4ceb
DL
4852 u32 seq;
4853 int err;
4854
4855 err = -ENOBUFS;
38308473 4856 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4857
19e42e45 4858 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4859 if (!skb)
21713ebc
TG
4860 goto errout;
4861
d4ead6b3
DA
4862 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4863 event, info->portid, seq, nlm_flags);
26932566
PM
4864 if (err < 0) {
4865 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4866 WARN_ON(err == -EMSGSIZE);
4867 kfree_skb(skb);
4868 goto errout;
4869 }
15e47304 4870 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4871 info->nlh, gfp_any());
4872 return;
21713ebc
TG
4873errout:
4874 if (err < 0)
5578689a 4875 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4876}
4877
8ed67789 4878static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4879 unsigned long event, void *ptr)
8ed67789 4880{
351638e7 4881 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4882 struct net *net = dev_net(dev);
8ed67789 4883
242d3a49
WC
4884 if (!(dev->flags & IFF_LOOPBACK))
4885 return NOTIFY_OK;
4886
4887 if (event == NETDEV_REGISTER) {
421842ed 4888 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
d8d1f30b 4889 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4890 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4891#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4892 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4893 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4894 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4895 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4896#endif
76da0704
WC
4897 } else if (event == NETDEV_UNREGISTER &&
4898 dev->reg_state != NETREG_UNREGISTERED) {
4899 /* NETDEV_UNREGISTER could be fired for multiple times by
4900 * netdev_wait_allrefs(). Make sure we only call this once.
4901 */
12d94a80 4902 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4903#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4904 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4905 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4906#endif
4907 }
4908
4909 return NOTIFY_OK;
4910}
4911
1da177e4
LT
4912/*
4913 * /proc
4914 */
4915
4916#ifdef CONFIG_PROC_FS
4917
33120b30 4918static const struct file_operations ipv6_route_proc_fops = {
33120b30
AD
4919 .open = ipv6_route_open,
4920 .read = seq_read,
4921 .llseek = seq_lseek,
8d2ca1d7 4922 .release = seq_release_net,
33120b30
AD
4923};
4924
1da177e4
LT
4925static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4926{
69ddb805 4927 struct net *net = (struct net *)seq->private;
1da177e4 4928 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4929 net->ipv6.rt6_stats->fib_nodes,
4930 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 4931 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
4932 net->ipv6.rt6_stats->fib_rt_entries,
4933 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4934 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4935 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4936
4937 return 0;
4938}
4939
4940static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4941{
de05c557 4942 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4943}
4944
9a32144e 4945static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4946 .open = rt6_stats_seq_open,
4947 .read = seq_read,
4948 .llseek = seq_lseek,
b6fcbdb4 4949 .release = single_release_net,
1da177e4
LT
4950};
4951#endif /* CONFIG_PROC_FS */
4952
4953#ifdef CONFIG_SYSCTL
4954
1da177e4 4955static
fe2c6338 4956int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4957 void __user *buffer, size_t *lenp, loff_t *ppos)
4958{
c486da34
LAG
4959 struct net *net;
4960 int delay;
4961 if (!write)
1da177e4 4962 return -EINVAL;
c486da34
LAG
4963
4964 net = (struct net *)ctl->extra1;
4965 delay = net->ipv6.sysctl.flush_delay;
4966 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4967 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4968 return 0;
1da177e4
LT
4969}
4970
fe2c6338 4971struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4972 {
1da177e4 4973 .procname = "flush",
4990509f 4974 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4975 .maxlen = sizeof(int),
89c8b3a1 4976 .mode = 0200,
6d9f239a 4977 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4978 },
4979 {
1da177e4 4980 .procname = "gc_thresh",
9a7ec3a9 4981 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4982 .maxlen = sizeof(int),
4983 .mode = 0644,
6d9f239a 4984 .proc_handler = proc_dointvec,
1da177e4
LT
4985 },
4986 {
1da177e4 4987 .procname = "max_size",
4990509f 4988 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
4989 .maxlen = sizeof(int),
4990 .mode = 0644,
6d9f239a 4991 .proc_handler = proc_dointvec,
1da177e4
LT
4992 },
4993 {
1da177e4 4994 .procname = "gc_min_interval",
4990509f 4995 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4996 .maxlen = sizeof(int),
4997 .mode = 0644,
6d9f239a 4998 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4999 },
5000 {
1da177e4 5001 .procname = "gc_timeout",
4990509f 5002 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5003 .maxlen = sizeof(int),
5004 .mode = 0644,
6d9f239a 5005 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5006 },
5007 {
1da177e4 5008 .procname = "gc_interval",
4990509f 5009 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5010 .maxlen = sizeof(int),
5011 .mode = 0644,
6d9f239a 5012 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5013 },
5014 {
1da177e4 5015 .procname = "gc_elasticity",
4990509f 5016 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5017 .maxlen = sizeof(int),
5018 .mode = 0644,
f3d3f616 5019 .proc_handler = proc_dointvec,
1da177e4
LT
5020 },
5021 {
1da177e4 5022 .procname = "mtu_expires",
4990509f 5023 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5024 .maxlen = sizeof(int),
5025 .mode = 0644,
6d9f239a 5026 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5027 },
5028 {
1da177e4 5029 .procname = "min_adv_mss",
4990509f 5030 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5031 .maxlen = sizeof(int),
5032 .mode = 0644,
f3d3f616 5033 .proc_handler = proc_dointvec,
1da177e4
LT
5034 },
5035 {
1da177e4 5036 .procname = "gc_min_interval_ms",
4990509f 5037 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5038 .maxlen = sizeof(int),
5039 .mode = 0644,
6d9f239a 5040 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5041 },
f8572d8f 5042 { }
1da177e4
LT
5043};
5044
2c8c1e72 5045struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5046{
5047 struct ctl_table *table;
5048
5049 table = kmemdup(ipv6_route_table_template,
5050 sizeof(ipv6_route_table_template),
5051 GFP_KERNEL);
5ee09105
YH
5052
5053 if (table) {
5054 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5055 table[0].extra1 = net;
86393e52 5056 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5057 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5058 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5059 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5060 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5061 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5062 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5063 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5064 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
5065
5066 /* Don't export sysctls to unprivileged users */
5067 if (net->user_ns != &init_user_ns)
5068 table[0].procname = NULL;
5ee09105
YH
5069 }
5070
760f2d01
DL
5071 return table;
5072}
1da177e4
LT
5073#endif
5074
2c8c1e72 5075static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5076{
633d424b 5077 int ret = -ENOMEM;
8ed67789 5078
86393e52
AD
5079 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5080 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5081
fc66f95c
ED
5082 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5083 goto out_ip6_dst_ops;
5084
421842ed
DA
5085 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5086 sizeof(*net->ipv6.fib6_null_entry),
5087 GFP_KERNEL);
5088 if (!net->ipv6.fib6_null_entry)
5089 goto out_ip6_dst_entries;
5090
8ed67789
DL
5091 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5092 sizeof(*net->ipv6.ip6_null_entry),
5093 GFP_KERNEL);
5094 if (!net->ipv6.ip6_null_entry)
421842ed 5095 goto out_fib6_null_entry;
d8d1f30b 5096 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5097 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5098 ip6_template_metrics, true);
8ed67789
DL
5099
5100#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5101 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5102 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5103 sizeof(*net->ipv6.ip6_prohibit_entry),
5104 GFP_KERNEL);
68fffc67
PZ
5105 if (!net->ipv6.ip6_prohibit_entry)
5106 goto out_ip6_null_entry;
d8d1f30b 5107 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5108 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5109 ip6_template_metrics, true);
8ed67789
DL
5110
5111 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5112 sizeof(*net->ipv6.ip6_blk_hole_entry),
5113 GFP_KERNEL);
68fffc67
PZ
5114 if (!net->ipv6.ip6_blk_hole_entry)
5115 goto out_ip6_prohibit_entry;
d8d1f30b 5116 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5117 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5118 ip6_template_metrics, true);
8ed67789
DL
5119#endif
5120
b339a47c
PZ
5121 net->ipv6.sysctl.flush_delay = 0;
5122 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5123 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5124 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5125 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5126 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5127 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5128 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5129
6891a346
BT
5130 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5131
8ed67789
DL
5132 ret = 0;
5133out:
5134 return ret;
f2fc6a54 5135
68fffc67
PZ
5136#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5137out_ip6_prohibit_entry:
5138 kfree(net->ipv6.ip6_prohibit_entry);
5139out_ip6_null_entry:
5140 kfree(net->ipv6.ip6_null_entry);
5141#endif
421842ed
DA
5142out_fib6_null_entry:
5143 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5144out_ip6_dst_entries:
5145 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5146out_ip6_dst_ops:
f2fc6a54 5147 goto out;
cdb18761
DL
5148}
5149
2c8c1e72 5150static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5151{
421842ed 5152 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5153 kfree(net->ipv6.ip6_null_entry);
5154#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5155 kfree(net->ipv6.ip6_prohibit_entry);
5156 kfree(net->ipv6.ip6_blk_hole_entry);
5157#endif
41bb78b4 5158 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5159}
5160
d189634e
TG
5161static int __net_init ip6_route_net_init_late(struct net *net)
5162{
5163#ifdef CONFIG_PROC_FS
d4beaa66 5164 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
d6444062 5165 proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
5166#endif
5167 return 0;
5168}
5169
5170static void __net_exit ip6_route_net_exit_late(struct net *net)
5171{
5172#ifdef CONFIG_PROC_FS
ece31ffd
G
5173 remove_proc_entry("ipv6_route", net->proc_net);
5174 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5175#endif
5176}
5177
cdb18761
DL
5178static struct pernet_operations ip6_route_net_ops = {
5179 .init = ip6_route_net_init,
5180 .exit = ip6_route_net_exit,
5181};
5182
c3426b47
DM
5183static int __net_init ipv6_inetpeer_init(struct net *net)
5184{
5185 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5186
5187 if (!bp)
5188 return -ENOMEM;
5189 inet_peer_base_init(bp);
5190 net->ipv6.peers = bp;
5191 return 0;
5192}
5193
5194static void __net_exit ipv6_inetpeer_exit(struct net *net)
5195{
5196 struct inet_peer_base *bp = net->ipv6.peers;
5197
5198 net->ipv6.peers = NULL;
56a6b248 5199 inetpeer_invalidate_tree(bp);
c3426b47
DM
5200 kfree(bp);
5201}
5202
2b823f72 5203static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5204 .init = ipv6_inetpeer_init,
5205 .exit = ipv6_inetpeer_exit,
5206};
5207
d189634e
TG
5208static struct pernet_operations ip6_route_net_late_ops = {
5209 .init = ip6_route_net_init_late,
5210 .exit = ip6_route_net_exit_late,
5211};
5212
8ed67789
DL
5213static struct notifier_block ip6_route_dev_notifier = {
5214 .notifier_call = ip6_route_dev_notify,
242d3a49 5215 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5216};
5217
2f460933
WC
5218void __init ip6_route_init_special_entries(void)
5219{
5220 /* Registering of the loopback is done before this portion of code,
5221 * the loopback reference in rt6_info will not be taken, do it
5222 * manually for init_net */
421842ed 5223 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
2f460933
WC
5224 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5225 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5226 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5227 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5228 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5229 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5230 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5231 #endif
5232}
5233
433d49c3 5234int __init ip6_route_init(void)
1da177e4 5235{
433d49c3 5236 int ret;
8d0b94af 5237 int cpu;
433d49c3 5238
9a7ec3a9
DL
5239 ret = -ENOMEM;
5240 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5241 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5242 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5243 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5244 goto out;
14e50e57 5245
fc66f95c 5246 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5247 if (ret)
bdb3289f 5248 goto out_kmem_cache;
bdb3289f 5249
c3426b47
DM
5250 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5251 if (ret)
e8803b6c 5252 goto out_dst_entries;
2a0c451a 5253
7e52b33b
DM
5254 ret = register_pernet_subsys(&ip6_route_net_ops);
5255 if (ret)
5256 goto out_register_inetpeer;
c3426b47 5257
5dc121e9
AE
5258 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5259
e8803b6c 5260 ret = fib6_init();
433d49c3 5261 if (ret)
8ed67789 5262 goto out_register_subsys;
433d49c3 5263
433d49c3
DL
5264 ret = xfrm6_init();
5265 if (ret)
e8803b6c 5266 goto out_fib6_init;
c35b7e72 5267
433d49c3
DL
5268 ret = fib6_rules_init();
5269 if (ret)
5270 goto xfrm6_init;
7e5449c2 5271
d189634e
TG
5272 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5273 if (ret)
5274 goto fib6_rules_init;
5275
16feebcf
FW
5276 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5277 inet6_rtm_newroute, NULL, 0);
5278 if (ret < 0)
5279 goto out_register_late_subsys;
5280
5281 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5282 inet6_rtm_delroute, NULL, 0);
5283 if (ret < 0)
5284 goto out_register_late_subsys;
5285
5286 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5287 inet6_rtm_getroute, NULL,
5288 RTNL_FLAG_DOIT_UNLOCKED);
5289 if (ret < 0)
d189634e 5290 goto out_register_late_subsys;
c127ea2c 5291
8ed67789 5292 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5293 if (ret)
d189634e 5294 goto out_register_late_subsys;
8ed67789 5295
8d0b94af
MKL
5296 for_each_possible_cpu(cpu) {
5297 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5298
5299 INIT_LIST_HEAD(&ul->head);
5300 spin_lock_init(&ul->lock);
5301 }
5302
433d49c3
DL
5303out:
5304 return ret;
5305
d189634e 5306out_register_late_subsys:
16feebcf 5307 rtnl_unregister_all(PF_INET6);
d189634e 5308 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5309fib6_rules_init:
433d49c3
DL
5310 fib6_rules_cleanup();
5311xfrm6_init:
433d49c3 5312 xfrm6_fini();
2a0c451a
TG
5313out_fib6_init:
5314 fib6_gc_cleanup();
8ed67789
DL
5315out_register_subsys:
5316 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5317out_register_inetpeer:
5318 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5319out_dst_entries:
5320 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5321out_kmem_cache:
f2fc6a54 5322 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5323 goto out;
1da177e4
LT
5324}
5325
5326void ip6_route_cleanup(void)
5327{
8ed67789 5328 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5329 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5330 fib6_rules_cleanup();
1da177e4 5331 xfrm6_fini();
1da177e4 5332 fib6_gc_cleanup();
c3426b47 5333 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5334 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5335 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5336 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5337}