net/ipv4: Udate fib_table_lookup tracepoint
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
eacb9384 66#include <net/ip.h>
b811580d 67#include <trace/events/fib6.h>
1da177e4 68
7c0f6ba6 69#include <linux/uaccess.h>
1da177e4
LT
70
71#ifdef CONFIG_SYSCTL
72#include <linux/sysctl.h>
73#endif
74
afc154e9 75enum rt6_nud_state {
7e980569
JB
76 RT6_NUD_FAIL_HARD = -3,
77 RT6_NUD_FAIL_PROBE = -2,
78 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
79 RT6_NUD_SUCCEED = 1
80};
81
1da177e4 82static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 83static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 84static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
85static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
569d3645 89static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
90
91static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 92static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 93static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 94static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 95static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
96static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
97 struct sk_buff *skb, u32 mtu);
98static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 struct sk_buff *skb);
8d1c802b
DA
100static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
101static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 102static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 103 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 104 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
105 int iif, int type, u32 portid, u32 seq,
106 unsigned int flags);
8d1c802b 107static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
108 struct in6_addr *daddr,
109 struct in6_addr *saddr);
1da177e4 110
70ceb4f5 111#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 112static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 113 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
114 const struct in6_addr *gwaddr,
115 struct net_device *dev,
95c96174 116 unsigned int pref);
8d1c802b 117static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 118 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
119 const struct in6_addr *gwaddr,
120 struct net_device *dev);
70ceb4f5
YH
121#endif
122
8d0b94af
MKL
123struct uncached_list {
124 spinlock_t lock;
125 struct list_head head;
126};
127
128static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
129
510c321b 130void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
131{
132 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
133
8d0b94af
MKL
134 rt->rt6i_uncached_list = ul;
135
136 spin_lock_bh(&ul->lock);
137 list_add_tail(&rt->rt6i_uncached, &ul->head);
138 spin_unlock_bh(&ul->lock);
139}
140
510c321b 141void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
142{
143 if (!list_empty(&rt->rt6i_uncached)) {
144 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 145 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
146
147 spin_lock_bh(&ul->lock);
148 list_del(&rt->rt6i_uncached);
81eb8447 149 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
150 spin_unlock_bh(&ul->lock);
151 }
152}
153
154static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
155{
156 struct net_device *loopback_dev = net->loopback_dev;
157 int cpu;
158
e332bc67
EB
159 if (dev == loopback_dev)
160 return;
161
8d0b94af
MKL
162 for_each_possible_cpu(cpu) {
163 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
164 struct rt6_info *rt;
165
166 spin_lock_bh(&ul->lock);
167 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
168 struct inet6_dev *rt_idev = rt->rt6i_idev;
169 struct net_device *rt_dev = rt->dst.dev;
170
e332bc67 171 if (rt_idev->dev == dev) {
8d0b94af
MKL
172 rt->rt6i_idev = in6_dev_get(loopback_dev);
173 in6_dev_put(rt_idev);
174 }
175
e332bc67 176 if (rt_dev == dev) {
8d0b94af
MKL
177 rt->dst.dev = loopback_dev;
178 dev_hold(rt->dst.dev);
179 dev_put(rt_dev);
180 }
181 }
182 spin_unlock_bh(&ul->lock);
183 }
184}
185
f8a1b43b 186static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
187 struct sk_buff *skb,
188 const void *daddr)
39232973 189{
a7563f34 190 if (!ipv6_addr_any(p))
39232973 191 return (const void *) p;
f894cbf8
DM
192 else if (skb)
193 return &ipv6_hdr(skb)->daddr;
39232973
DM
194 return daddr;
195}
196
f8a1b43b
DA
197struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
198 struct net_device *dev,
199 struct sk_buff *skb,
200 const void *daddr)
d3aaeb38 201{
39232973
DM
202 struct neighbour *n;
203
f8a1b43b
DA
204 daddr = choose_neigh_daddr(gw, skb, daddr);
205 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
206 if (n)
207 return n;
f8a1b43b
DA
208 return neigh_create(&nd_tbl, daddr, dev);
209}
210
211static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
212 struct sk_buff *skb,
213 const void *daddr)
214{
215 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
216
217 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
218}
219
63fca65d
JA
220static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
221{
222 struct net_device *dev = dst->dev;
223 struct rt6_info *rt = (struct rt6_info *)dst;
224
f8a1b43b 225 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
226 if (!daddr)
227 return;
228 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
229 return;
230 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
231 return;
232 __ipv6_confirm_neigh(dev, daddr);
233}
234
9a7ec3a9 235static struct dst_ops ip6_dst_ops_template = {
1da177e4 236 .family = AF_INET6,
1da177e4
LT
237 .gc = ip6_dst_gc,
238 .gc_thresh = 1024,
239 .check = ip6_dst_check,
0dbaee3b 240 .default_advmss = ip6_default_advmss,
ebb762f2 241 .mtu = ip6_mtu,
d4ead6b3 242 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
243 .destroy = ip6_dst_destroy,
244 .ifdown = ip6_dst_ifdown,
245 .negative_advice = ip6_negative_advice,
246 .link_failure = ip6_link_failure,
247 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 248 .redirect = rt6_do_redirect,
9f8955cc 249 .local_out = __ip6_local_out,
f8a1b43b 250 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 251 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
252};
253
ebb762f2 254static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 255{
618f9bc7
SK
256 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
257
258 return mtu ? : dst->dev->mtu;
ec831ea7
RD
259}
260
6700c270
DM
261static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
262 struct sk_buff *skb, u32 mtu)
14e50e57
DM
263{
264}
265
6700c270
DM
266static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
267 struct sk_buff *skb)
b587ee3b
DM
268{
269}
270
14e50e57
DM
271static struct dst_ops ip6_dst_blackhole_ops = {
272 .family = AF_INET6,
14e50e57
DM
273 .destroy = ip6_dst_destroy,
274 .check = ip6_dst_check,
ebb762f2 275 .mtu = ip6_blackhole_mtu,
214f45c9 276 .default_advmss = ip6_default_advmss,
14e50e57 277 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 278 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 279 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 280 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
281};
282
62fa8a84 283static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 284 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
285};
286
8d1c802b 287static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
288 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
289 .fib6_protocol = RTPROT_KERNEL,
290 .fib6_metric = ~(u32)0,
291 .fib6_ref = ATOMIC_INIT(1),
421842ed
DA
292 .fib6_type = RTN_UNREACHABLE,
293 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
294};
295
fb0af4c7 296static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
297 .dst = {
298 .__refcnt = ATOMIC_INIT(1),
299 .__use = 1,
2c20cbd7 300 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 301 .error = -ENETUNREACH,
d8d1f30b
CG
302 .input = ip6_pkt_discard,
303 .output = ip6_pkt_discard_out,
1da177e4
LT
304 },
305 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
306};
307
101367c2
TG
308#ifdef CONFIG_IPV6_MULTIPLE_TABLES
309
fb0af4c7 310static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
311 .dst = {
312 .__refcnt = ATOMIC_INIT(1),
313 .__use = 1,
2c20cbd7 314 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 315 .error = -EACCES,
d8d1f30b
CG
316 .input = ip6_pkt_prohibit,
317 .output = ip6_pkt_prohibit_out,
101367c2
TG
318 },
319 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
320};
321
fb0af4c7 322static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
323 .dst = {
324 .__refcnt = ATOMIC_INIT(1),
325 .__use = 1,
2c20cbd7 326 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 327 .error = -EINVAL,
d8d1f30b 328 .input = dst_discard,
ede2059d 329 .output = dst_discard_out,
101367c2
TG
330 },
331 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
332};
333
334#endif
335
ebfa45f0
MKL
336static void rt6_info_init(struct rt6_info *rt)
337{
338 struct dst_entry *dst = &rt->dst;
339
340 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
341 INIT_LIST_HEAD(&rt->rt6i_uncached);
342}
343
1da177e4 344/* allocate dst with ip6_dst_ops */
93531c67
DA
345struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
346 int flags)
1da177e4 347{
97bab73f 348 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 349 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 350
81eb8447 351 if (rt) {
ebfa45f0 352 rt6_info_init(rt);
81eb8447
WW
353 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
354 }
8104891b 355
cf911662 356 return rt;
1da177e4 357}
9ab179d8 358EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 359
1da177e4
LT
360static void ip6_dst_destroy(struct dst_entry *dst)
361{
362 struct rt6_info *rt = (struct rt6_info *)dst;
a68886a6 363 struct fib6_info *from;
8d0b94af 364 struct inet6_dev *idev;
1da177e4 365
4b32b5ad 366 dst_destroy_metrics_generic(dst);
8d0b94af
MKL
367 rt6_uncached_list_del(rt);
368
369 idev = rt->rt6i_idev;
38308473 370 if (idev) {
1da177e4
LT
371 rt->rt6i_idev = NULL;
372 in6_dev_put(idev);
1ab1457c 373 }
d4ead6b3 374
a68886a6
DA
375 rcu_read_lock();
376 from = rcu_dereference(rt->from);
377 rcu_assign_pointer(rt->from, NULL);
93531c67 378 fib6_info_release(from);
a68886a6 379 rcu_read_unlock();
b3419363
DM
380}
381
1da177e4
LT
382static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
383 int how)
384{
385 struct rt6_info *rt = (struct rt6_info *)dst;
386 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 387 struct net_device *loopback_dev =
c346dca1 388 dev_net(dev)->loopback_dev;
1da177e4 389
e5645f51
WW
390 if (idev && idev->dev != loopback_dev) {
391 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
392 if (loopback_idev) {
393 rt->rt6i_idev = loopback_idev;
394 in6_dev_put(idev);
97cac082 395 }
1da177e4
LT
396 }
397}
398
5973fb1e
MKL
399static bool __rt6_check_expired(const struct rt6_info *rt)
400{
401 if (rt->rt6i_flags & RTF_EXPIRES)
402 return time_after(jiffies, rt->dst.expires);
403 else
404 return false;
405}
406
a50feda5 407static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 408{
a68886a6
DA
409 struct fib6_info *from;
410
411 from = rcu_dereference(rt->from);
412
1716a961
G
413 if (rt->rt6i_flags & RTF_EXPIRES) {
414 if (time_after(jiffies, rt->dst.expires))
a50feda5 415 return true;
a68886a6 416 } else if (from) {
1e2ea8ad 417 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 418 fib6_check_expired(from);
1716a961 419 }
a50feda5 420 return false;
1da177e4
LT
421}
422
3b290a31
DA
423struct fib6_info *fib6_multipath_select(const struct net *net,
424 struct fib6_info *match,
425 struct flowi6 *fl6, int oif,
426 const struct sk_buff *skb,
427 int strict)
51ebd318 428{
8d1c802b 429 struct fib6_info *sibling, *next_sibling;
51ebd318 430
b673d6cc
JS
431 /* We might have already computed the hash for ICMPv6 errors. In such
432 * case it will always be non-zero. Otherwise now is the time to do it.
433 */
434 if (!fl6->mp_hash)
b4bac172 435 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 436
5e670d84 437 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
3d709f69
IS
438 return match;
439
93c2fb25
DA
440 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
441 fib6_siblings) {
5e670d84
DA
442 int nh_upper_bound;
443
444 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
445 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
446 continue;
447 if (rt6_score_route(sibling, oif, strict) < 0)
448 break;
449 match = sibling;
450 break;
451 }
452
51ebd318
ND
453 return match;
454}
455
1da177e4 456/*
66f5d6ce 457 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
458 */
459
8d1c802b
DA
460static inline struct fib6_info *rt6_device_match(struct net *net,
461 struct fib6_info *rt,
b71d1d42 462 const struct in6_addr *saddr,
1da177e4 463 int oif,
d420895e 464 int flags)
1da177e4 465{
8d1c802b 466 struct fib6_info *sprt;
1da177e4 467
5e670d84
DA
468 if (!oif && ipv6_addr_any(saddr) &&
469 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
8067bb8c 470 return rt;
dd3abc4e 471
8fb11a9a 472 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
5e670d84 473 const struct net_device *dev = sprt->fib6_nh.nh_dev;
dd3abc4e 474
5e670d84 475 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
476 continue;
477
dd3abc4e 478 if (oif) {
1da177e4
LT
479 if (dev->ifindex == oif)
480 return sprt;
dd3abc4e
YH
481 } else {
482 if (ipv6_chk_addr(net, saddr, dev,
483 flags & RT6_LOOKUP_F_IFACE))
484 return sprt;
1da177e4 485 }
dd3abc4e 486 }
1da177e4 487
eea68cd3
DA
488 if (oif && flags & RT6_LOOKUP_F_IFACE)
489 return net->ipv6.fib6_null_entry;
8067bb8c 490
421842ed 491 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
492}
493
27097255 494#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
495struct __rt6_probe_work {
496 struct work_struct work;
497 struct in6_addr target;
498 struct net_device *dev;
499};
500
501static void rt6_probe_deferred(struct work_struct *w)
502{
503 struct in6_addr mcaddr;
504 struct __rt6_probe_work *work =
505 container_of(w, struct __rt6_probe_work, work);
506
507 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 508 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 509 dev_put(work->dev);
662f5533 510 kfree(work);
c2f17e82
HFS
511}
512
8d1c802b 513static void rt6_probe(struct fib6_info *rt)
27097255 514{
990edb42 515 struct __rt6_probe_work *work;
5e670d84 516 const struct in6_addr *nh_gw;
f2c31e32 517 struct neighbour *neigh;
5e670d84
DA
518 struct net_device *dev;
519
27097255
YH
520 /*
521 * Okay, this does not seem to be appropriate
522 * for now, however, we need to check if it
523 * is really so; aka Router Reachability Probing.
524 *
525 * Router Reachability Probe MUST be rate-limited
526 * to no more than one per minute.
527 */
93c2fb25 528 if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
7ff74a59 529 return;
5e670d84
DA
530
531 nh_gw = &rt->fib6_nh.nh_gw;
532 dev = rt->fib6_nh.nh_dev;
2152caea 533 rcu_read_lock_bh();
5e670d84 534 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 535 if (neigh) {
dcd1f572
DA
536 struct inet6_dev *idev;
537
8d6c31bf
MKL
538 if (neigh->nud_state & NUD_VALID)
539 goto out;
540
dcd1f572 541 idev = __in6_dev_get(dev);
990edb42 542 work = NULL;
2152caea 543 write_lock(&neigh->lock);
990edb42
MKL
544 if (!(neigh->nud_state & NUD_VALID) &&
545 time_after(jiffies,
dcd1f572 546 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
547 work = kmalloc(sizeof(*work), GFP_ATOMIC);
548 if (work)
549 __neigh_set_probe_once(neigh);
c2f17e82 550 }
2152caea 551 write_unlock(&neigh->lock);
990edb42
MKL
552 } else {
553 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 554 }
990edb42
MKL
555
556 if (work) {
557 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
558 work->target = *nh_gw;
559 dev_hold(dev);
560 work->dev = dev;
990edb42
MKL
561 schedule_work(&work->work);
562 }
563
8d6c31bf 564out:
2152caea 565 rcu_read_unlock_bh();
27097255
YH
566}
567#else
8d1c802b 568static inline void rt6_probe(struct fib6_info *rt)
27097255 569{
27097255
YH
570}
571#endif
572
1da177e4 573/*
554cfb7e 574 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 575 */
8d1c802b 576static inline int rt6_check_dev(struct fib6_info *rt, int oif)
554cfb7e 577{
5e670d84
DA
578 const struct net_device *dev = rt->fib6_nh.nh_dev;
579
161980f4 580 if (!oif || dev->ifindex == oif)
554cfb7e 581 return 2;
161980f4 582 return 0;
554cfb7e 583}
1da177e4 584
8d1c802b 585static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
1da177e4 586{
afc154e9 587 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 588 struct neighbour *neigh;
f2c31e32 589
93c2fb25
DA
590 if (rt->fib6_flags & RTF_NONEXTHOP ||
591 !(rt->fib6_flags & RTF_GATEWAY))
afc154e9 592 return RT6_NUD_SUCCEED;
145a3621
YH
593
594 rcu_read_lock_bh();
5e670d84
DA
595 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
596 &rt->fib6_nh.nh_gw);
145a3621
YH
597 if (neigh) {
598 read_lock(&neigh->lock);
554cfb7e 599 if (neigh->nud_state & NUD_VALID)
afc154e9 600 ret = RT6_NUD_SUCCEED;
398bcbeb 601#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 602 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 603 ret = RT6_NUD_SUCCEED;
7e980569
JB
604 else
605 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 606#endif
145a3621 607 read_unlock(&neigh->lock);
afc154e9
HFS
608 } else {
609 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 610 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 611 }
145a3621
YH
612 rcu_read_unlock_bh();
613
a5a81f0b 614 return ret;
1da177e4
LT
615}
616
8d1c802b 617static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
1da177e4 618{
a5a81f0b 619 int m;
1ab1457c 620
4d0c5911 621 m = rt6_check_dev(rt, oif);
77d16f45 622 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 623 return RT6_NUD_FAIL_HARD;
ebacaaa0 624#ifdef CONFIG_IPV6_ROUTER_PREF
93c2fb25 625 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
ebacaaa0 626#endif
afc154e9
HFS
627 if (strict & RT6_LOOKUP_F_REACHABLE) {
628 int n = rt6_check_neigh(rt);
629 if (n < 0)
630 return n;
631 }
554cfb7e
YH
632 return m;
633}
634
dcd1f572
DA
635/* called with rc_read_lock held */
636static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
637{
638 const struct net_device *dev = fib6_info_nh_dev(f6i);
639 bool rc = false;
640
641 if (dev) {
642 const struct inet6_dev *idev = __in6_dev_get(dev);
643
644 rc = !!idev->cnf.ignore_routes_with_linkdown;
645 }
646
647 return rc;
648}
649
8d1c802b
DA
650static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
651 int *mpri, struct fib6_info *match,
afc154e9 652 bool *do_rr)
554cfb7e 653{
f11e6659 654 int m;
afc154e9 655 bool match_do_rr = false;
35103d11 656
5e670d84 657 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
658 goto out;
659
dcd1f572 660 if (fib6_ignore_linkdown(rt) &&
5e670d84 661 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 662 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 663 goto out;
f11e6659 664
14895687 665 if (fib6_check_expired(rt))
f11e6659
DM
666 goto out;
667
668 m = rt6_score_route(rt, oif, strict);
7e980569 669 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
670 match_do_rr = true;
671 m = 0; /* lowest valid score */
7e980569 672 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 673 goto out;
afc154e9
HFS
674 }
675
676 if (strict & RT6_LOOKUP_F_REACHABLE)
677 rt6_probe(rt);
f11e6659 678
7e980569 679 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 680 if (m > *mpri) {
afc154e9 681 *do_rr = match_do_rr;
f11e6659
DM
682 *mpri = m;
683 match = rt;
f11e6659 684 }
f11e6659
DM
685out:
686 return match;
687}
688
8d1c802b
DA
689static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
690 struct fib6_info *leaf,
691 struct fib6_info *rr_head,
afc154e9
HFS
692 u32 metric, int oif, int strict,
693 bool *do_rr)
f11e6659 694{
8d1c802b 695 struct fib6_info *rt, *match, *cont;
554cfb7e 696 int mpri = -1;
1da177e4 697
f11e6659 698 match = NULL;
9fbdcfaf 699 cont = NULL;
8fb11a9a 700 for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 701 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
702 cont = rt;
703 break;
704 }
705
706 match = find_match(rt, oif, strict, &mpri, match, do_rr);
707 }
708
66f5d6ce 709 for (rt = leaf; rt && rt != rr_head;
8fb11a9a 710 rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 711 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
712 cont = rt;
713 break;
714 }
715
afc154e9 716 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
717 }
718
719 if (match || !cont)
720 return match;
721
8fb11a9a 722 for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
afc154e9 723 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 724
f11e6659
DM
725 return match;
726}
1da177e4 727
8d1c802b 728static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
8d1040e8 729 int oif, int strict)
f11e6659 730{
8d1c802b
DA
731 struct fib6_info *leaf = rcu_dereference(fn->leaf);
732 struct fib6_info *match, *rt0;
afc154e9 733 bool do_rr = false;
17ecf590 734 int key_plen;
1da177e4 735
421842ed
DA
736 if (!leaf || leaf == net->ipv6.fib6_null_entry)
737 return net->ipv6.fib6_null_entry;
8d1040e8 738
66f5d6ce 739 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 740 if (!rt0)
66f5d6ce 741 rt0 = leaf;
1da177e4 742
17ecf590
WW
743 /* Double check to make sure fn is not an intermediate node
744 * and fn->leaf does not points to its child's leaf
745 * (This might happen if all routes under fn are deleted from
746 * the tree and fib6_repair_tree() is called on the node.)
747 */
93c2fb25 748 key_plen = rt0->fib6_dst.plen;
17ecf590 749#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
750 if (rt0->fib6_src.plen)
751 key_plen = rt0->fib6_src.plen;
17ecf590
WW
752#endif
753 if (fn->fn_bit != key_plen)
421842ed 754 return net->ipv6.fib6_null_entry;
17ecf590 755
93c2fb25 756 match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
afc154e9 757 &do_rr);
1da177e4 758
afc154e9 759 if (do_rr) {
8fb11a9a 760 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
f11e6659 761
554cfb7e 762 /* no entries matched; do round-robin */
93c2fb25 763 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 764 next = leaf;
f11e6659 765
66f5d6ce 766 if (next != rt0) {
93c2fb25 767 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 768 /* make sure next is not being deleted from the tree */
93c2fb25 769 if (next->fib6_node)
66f5d6ce 770 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 771 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 772 }
1da177e4 773 }
1da177e4 774
421842ed 775 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
776}
777
8d1c802b 778static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
8b9df265 779{
93c2fb25 780 return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
8b9df265
MKL
781}
782
70ceb4f5
YH
783#ifdef CONFIG_IPV6_ROUTE_INFO
784int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 785 const struct in6_addr *gwaddr)
70ceb4f5 786{
c346dca1 787 struct net *net = dev_net(dev);
70ceb4f5
YH
788 struct route_info *rinfo = (struct route_info *) opt;
789 struct in6_addr prefix_buf, *prefix;
790 unsigned int pref;
4bed72e4 791 unsigned long lifetime;
8d1c802b 792 struct fib6_info *rt;
70ceb4f5
YH
793
794 if (len < sizeof(struct route_info)) {
795 return -EINVAL;
796 }
797
798 /* Sanity check for prefix_len and length */
799 if (rinfo->length > 3) {
800 return -EINVAL;
801 } else if (rinfo->prefix_len > 128) {
802 return -EINVAL;
803 } else if (rinfo->prefix_len > 64) {
804 if (rinfo->length < 2) {
805 return -EINVAL;
806 }
807 } else if (rinfo->prefix_len > 0) {
808 if (rinfo->length < 1) {
809 return -EINVAL;
810 }
811 }
812
813 pref = rinfo->route_pref;
814 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 815 return -EINVAL;
70ceb4f5 816
4bed72e4 817 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
818
819 if (rinfo->length == 3)
820 prefix = (struct in6_addr *)rinfo->prefix;
821 else {
822 /* this function is safe */
823 ipv6_addr_prefix(&prefix_buf,
824 (struct in6_addr *)rinfo->prefix,
825 rinfo->prefix_len);
826 prefix = &prefix_buf;
827 }
828
f104a567 829 if (rinfo->prefix_len == 0)
afb1d4b5 830 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
831 else
832 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 833 gwaddr, dev);
70ceb4f5
YH
834
835 if (rt && !lifetime) {
afb1d4b5 836 ip6_del_rt(net, rt);
70ceb4f5
YH
837 rt = NULL;
838 }
839
840 if (!rt && lifetime)
830218c1
DA
841 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
842 dev, pref);
70ceb4f5 843 else if (rt)
93c2fb25
DA
844 rt->fib6_flags = RTF_ROUTEINFO |
845 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
846
847 if (rt) {
1716a961 848 if (!addrconf_finite_timeout(lifetime))
14895687 849 fib6_clean_expires(rt);
1716a961 850 else
14895687 851 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 852
93531c67 853 fib6_info_release(rt);
70ceb4f5
YH
854 }
855 return 0;
856}
857#endif
858
ae90d867
DA
859/*
860 * Misc support functions
861 */
862
863/* called with rcu_lock held */
8d1c802b 864static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
ae90d867 865{
5e670d84 866 struct net_device *dev = rt->fib6_nh.nh_dev;
ae90d867 867
93c2fb25 868 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
869 /* for copies of local routes, dst->dev needs to be the
870 * device if it is a master device, the master device if
871 * device is enslaved, and the loopback as the default
872 */
873 if (netif_is_l3_slave(dev) &&
93c2fb25 874 !rt6_need_strict(&rt->fib6_dst.addr))
ae90d867
DA
875 dev = l3mdev_master_dev_rcu(dev);
876 else if (!netif_is_l3_master(dev))
877 dev = dev_net(dev)->loopback_dev;
878 /* last case is netif_is_l3_master(dev) is true in which
879 * case we want dev returned to be dev
880 */
881 }
882
883 return dev;
884}
885
6edb3c96
DA
886static const int fib6_prop[RTN_MAX + 1] = {
887 [RTN_UNSPEC] = 0,
888 [RTN_UNICAST] = 0,
889 [RTN_LOCAL] = 0,
890 [RTN_BROADCAST] = 0,
891 [RTN_ANYCAST] = 0,
892 [RTN_MULTICAST] = 0,
893 [RTN_BLACKHOLE] = -EINVAL,
894 [RTN_UNREACHABLE] = -EHOSTUNREACH,
895 [RTN_PROHIBIT] = -EACCES,
896 [RTN_THROW] = -EAGAIN,
897 [RTN_NAT] = -EINVAL,
898 [RTN_XRESOLVE] = -EINVAL,
899};
900
901static int ip6_rt_type_to_error(u8 fib6_type)
902{
903 return fib6_prop[fib6_type];
904}
905
8d1c802b 906static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
907{
908 unsigned short flags = 0;
909
910 if (rt->dst_nocount)
911 flags |= DST_NOCOUNT;
912 if (rt->dst_nopolicy)
913 flags |= DST_NOPOLICY;
914 if (rt->dst_host)
915 flags |= DST_HOST;
916
917 return flags;
918}
919
8d1c802b 920static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96
DA
921{
922 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
923
924 switch (ort->fib6_type) {
925 case RTN_BLACKHOLE:
926 rt->dst.output = dst_discard_out;
927 rt->dst.input = dst_discard;
928 break;
929 case RTN_PROHIBIT:
930 rt->dst.output = ip6_pkt_prohibit_out;
931 rt->dst.input = ip6_pkt_prohibit;
932 break;
933 case RTN_THROW:
934 case RTN_UNREACHABLE:
935 default:
936 rt->dst.output = ip6_pkt_discard_out;
937 rt->dst.input = ip6_pkt_discard;
938 break;
939 }
940}
941
8d1c802b 942static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96 943{
3b6761d1
DA
944 rt->dst.flags |= fib6_info_dst_flags(ort);
945
93c2fb25 946 if (ort->fib6_flags & RTF_REJECT) {
6edb3c96
DA
947 ip6_rt_init_dst_reject(rt, ort);
948 return;
949 }
950
951 rt->dst.error = 0;
952 rt->dst.output = ip6_output;
953
954 if (ort->fib6_type == RTN_LOCAL) {
6edb3c96 955 rt->dst.input = ip6_input;
93c2fb25 956 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
957 rt->dst.input = ip6_mc_input;
958 } else {
959 rt->dst.input = ip6_forward;
960 }
961
962 if (ort->fib6_nh.nh_lwtstate) {
963 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
964 lwtunnel_set_redirect(&rt->dst);
965 }
966
967 rt->dst.lastuse = jiffies;
968}
969
8d1c802b 970static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 971{
ae90d867 972 rt->rt6i_flags &= ~RTF_EXPIRES;
93531c67 973 fib6_info_hold(from);
a68886a6 974 rcu_assign_pointer(rt->from, from);
d4ead6b3
DA
975 dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
976 if (from->fib6_metrics != &dst_default_metrics) {
977 rt->dst._metrics |= DST_METRICS_REFCOUNTED;
978 refcount_inc(&from->fib6_metrics->refcnt);
979 }
ae90d867
DA
980}
981
8d1c802b 982static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
ae90d867 983{
dcd1f572
DA
984 struct net_device *dev = fib6_info_nh_dev(ort);
985
6edb3c96
DA
986 ip6_rt_init_dst(rt, ort);
987
93c2fb25 988 rt->rt6i_dst = ort->fib6_dst;
dcd1f572 989 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
5e670d84 990 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
93c2fb25 991 rt->rt6i_flags = ort->fib6_flags;
ae90d867 992 rt6_set_from(rt, ort);
ae90d867 993#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 994 rt->rt6i_src = ort->fib6_src;
ae90d867 995#endif
93c2fb25 996 rt->rt6i_prefsrc = ort->fib6_prefsrc;
5e670d84 997 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
ae90d867
DA
998}
999
a3c00e46
MKL
1000static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1001 struct in6_addr *saddr)
1002{
66f5d6ce 1003 struct fib6_node *pn, *sn;
a3c00e46
MKL
1004 while (1) {
1005 if (fn->fn_flags & RTN_TL_ROOT)
1006 return NULL;
66f5d6ce
WW
1007 pn = rcu_dereference(fn->parent);
1008 sn = FIB6_SUBTREE(pn);
1009 if (sn && sn != fn)
6454743b 1010 fn = fib6_node_lookup(sn, NULL, saddr);
a3c00e46
MKL
1011 else
1012 fn = pn;
1013 if (fn->fn_flags & RTN_RTINFO)
1014 return fn;
1015 }
1016}
c71099ac 1017
d3843fe5
WW
1018static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1019 bool null_fallback)
1020{
1021 struct rt6_info *rt = *prt;
1022
1023 if (dst_hold_safe(&rt->dst))
1024 return true;
1025 if (null_fallback) {
1026 rt = net->ipv6.ip6_null_entry;
1027 dst_hold(&rt->dst);
1028 } else {
1029 rt = NULL;
1030 }
1031 *prt = rt;
1032 return false;
1033}
1034
dec9b0e2 1035/* called with rcu_lock held */
8d1c802b 1036static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
dec9b0e2 1037{
3b6761d1 1038 unsigned short flags = fib6_info_dst_flags(rt);
dec9b0e2
DA
1039 struct net_device *dev = rt->fib6_nh.nh_dev;
1040 struct rt6_info *nrt;
1041
93531c67 1042 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
dec9b0e2
DA
1043 if (nrt)
1044 ip6_rt_copy_init(nrt, rt);
1045
1046 return nrt;
1047}
1048
8ed67789
DL
1049static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1050 struct fib6_table *table,
b75cc8f9
DA
1051 struct flowi6 *fl6,
1052 const struct sk_buff *skb,
1053 int flags)
1da177e4 1054{
8d1c802b 1055 struct fib6_info *f6i;
1da177e4 1056 struct fib6_node *fn;
23fb93a4 1057 struct rt6_info *rt;
1da177e4 1058
b6cdbc85
DA
1059 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1060 flags &= ~RT6_LOOKUP_F_IFACE;
1061
66f5d6ce 1062 rcu_read_lock();
6454743b 1063 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1064restart:
23fb93a4
DA
1065 f6i = rcu_dereference(fn->leaf);
1066 if (!f6i) {
1067 f6i = net->ipv6.fib6_null_entry;
66f5d6ce 1068 } else {
23fb93a4 1069 f6i = rt6_device_match(net, f6i, &fl6->saddr,
66f5d6ce 1070 fl6->flowi6_oif, flags);
93c2fb25 1071 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
3b290a31
DA
1072 f6i = fib6_multipath_select(net, f6i, fl6,
1073 fl6->flowi6_oif, skb,
1074 flags);
66f5d6ce 1075 }
23fb93a4 1076 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1077 fn = fib6_backtrack(fn, &fl6->saddr);
1078 if (fn)
1079 goto restart;
1080 }
23fb93a4 1081
d4bea421
DA
1082 trace_fib6_table_lookup(net, f6i, table, fl6);
1083
2b760fcf 1084 /* Search through exception table */
23fb93a4
DA
1085 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1086 if (rt) {
dec9b0e2
DA
1087 if (ip6_hold_safe(net, &rt, true))
1088 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1089 } else if (f6i == net->ipv6.fib6_null_entry) {
dec9b0e2
DA
1090 rt = net->ipv6.ip6_null_entry;
1091 dst_hold(&rt->dst);
23fb93a4
DA
1092 } else {
1093 rt = ip6_create_rt_rcu(f6i);
1094 if (!rt) {
1095 rt = net->ipv6.ip6_null_entry;
1096 dst_hold(&rt->dst);
1097 }
dec9b0e2 1098 }
d3843fe5 1099
66f5d6ce 1100 rcu_read_unlock();
b811580d 1101
c71099ac 1102 return rt;
c71099ac
TG
1103}
1104
67ba4152 1105struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1106 const struct sk_buff *skb, int flags)
ea6e574e 1107{
b75cc8f9 1108 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1109}
1110EXPORT_SYMBOL_GPL(ip6_route_lookup);
1111
9acd9f3a 1112struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1113 const struct in6_addr *saddr, int oif,
1114 const struct sk_buff *skb, int strict)
c71099ac 1115{
4c9483b2
DM
1116 struct flowi6 fl6 = {
1117 .flowi6_oif = oif,
1118 .daddr = *daddr,
c71099ac
TG
1119 };
1120 struct dst_entry *dst;
77d16f45 1121 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1122
adaa70bb 1123 if (saddr) {
4c9483b2 1124 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1125 flags |= RT6_LOOKUP_F_HAS_SADDR;
1126 }
1127
b75cc8f9 1128 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1129 if (dst->error == 0)
1130 return (struct rt6_info *) dst;
1131
1132 dst_release(dst);
1133
1da177e4
LT
1134 return NULL;
1135}
7159039a
YH
1136EXPORT_SYMBOL(rt6_lookup);
1137
c71099ac 1138/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1139 * It takes new route entry, the addition fails by any reason the
1140 * route is released.
1141 * Caller must hold dst before calling it.
1da177e4
LT
1142 */
1143
8d1c802b 1144static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1145 struct netlink_ext_ack *extack)
1da177e4
LT
1146{
1147 int err;
c71099ac 1148 struct fib6_table *table;
1da177e4 1149
93c2fb25 1150 table = rt->fib6_table;
66f5d6ce 1151 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1152 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1153 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1154
1155 return err;
1156}
1157
8d1c802b 1158int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1159{
afb1d4b5 1160 struct nl_info info = { .nl_net = net, };
e715b6d3 1161
d4ead6b3 1162 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1163}
1164
8d1c802b 1165static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
8b9df265
MKL
1166 const struct in6_addr *daddr,
1167 const struct in6_addr *saddr)
1da177e4 1168{
4832c30d 1169 struct net_device *dev;
1da177e4
LT
1170 struct rt6_info *rt;
1171
1172 /*
1173 * Clone the route.
1174 */
1175
4832c30d 1176 dev = ip6_rt_get_dev_rcu(ort);
93531c67 1177 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
83a09abd
MKL
1178 if (!rt)
1179 return NULL;
1180
1181 ip6_rt_copy_init(rt, ort);
1182 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1183 rt->dst.flags |= DST_HOST;
1184 rt->rt6i_dst.addr = *daddr;
1185 rt->rt6i_dst.plen = 128;
1da177e4 1186
83a09abd 1187 if (!rt6_is_gw_or_nonexthop(ort)) {
93c2fb25
DA
1188 if (ort->fib6_dst.plen != 128 &&
1189 ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
83a09abd 1190 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1191#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1192 if (rt->rt6i_src.plen && saddr) {
1193 rt->rt6i_src.addr = *saddr;
1194 rt->rt6i_src.plen = 128;
8b9df265 1195 }
83a09abd 1196#endif
95a9a5ba 1197 }
1da177e4 1198
95a9a5ba
YH
1199 return rt;
1200}
1da177e4 1201
8d1c802b 1202static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
d52d3997 1203{
3b6761d1 1204 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1205 struct net_device *dev;
d52d3997
MKL
1206 struct rt6_info *pcpu_rt;
1207
4832c30d
DA
1208 rcu_read_lock();
1209 dev = ip6_rt_get_dev_rcu(rt);
93531c67 1210 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1211 rcu_read_unlock();
d52d3997
MKL
1212 if (!pcpu_rt)
1213 return NULL;
1214 ip6_rt_copy_init(pcpu_rt, rt);
d52d3997
MKL
1215 pcpu_rt->rt6i_flags |= RTF_PCPU;
1216 return pcpu_rt;
1217}
1218
66f5d6ce 1219/* It should be called with rcu_read_lock() acquired */
8d1c802b 1220static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
d52d3997 1221{
a73e4195 1222 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1223
1224 p = this_cpu_ptr(rt->rt6i_pcpu);
1225 pcpu_rt = *p;
1226
d4ead6b3
DA
1227 if (pcpu_rt)
1228 ip6_hold_safe(NULL, &pcpu_rt, false);
d3843fe5 1229
a73e4195
MKL
1230 return pcpu_rt;
1231}
1232
afb1d4b5 1233static struct rt6_info *rt6_make_pcpu_route(struct net *net,
8d1c802b 1234 struct fib6_info *rt)
a73e4195
MKL
1235{
1236 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1237
1238 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1239 if (!pcpu_rt) {
9c7370a1
MKL
1240 dst_hold(&net->ipv6.ip6_null_entry->dst);
1241 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1242 }
1243
a94b9367
WW
1244 dst_hold(&pcpu_rt->dst);
1245 p = this_cpu_ptr(rt->rt6i_pcpu);
1246 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1247 BUG_ON(prev);
a94b9367 1248
d52d3997
MKL
1249 return pcpu_rt;
1250}
1251
35732d01
WW
1252/* exception hash table implementation
1253 */
1254static DEFINE_SPINLOCK(rt6_exception_lock);
1255
1256/* Remove rt6_ex from hash table and free the memory
1257 * Caller must hold rt6_exception_lock
1258 */
1259static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1260 struct rt6_exception *rt6_ex)
1261{
b2427e67 1262 struct net *net;
81eb8447 1263
35732d01
WW
1264 if (!bucket || !rt6_ex)
1265 return;
b2427e67
CIK
1266
1267 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01 1268 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1269 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1270 kfree_rcu(rt6_ex, rcu);
1271 WARN_ON_ONCE(!bucket->depth);
1272 bucket->depth--;
81eb8447 1273 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1274}
1275
1276/* Remove oldest rt6_ex in bucket and free the memory
1277 * Caller must hold rt6_exception_lock
1278 */
1279static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1280{
1281 struct rt6_exception *rt6_ex, *oldest = NULL;
1282
1283 if (!bucket)
1284 return;
1285
1286 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1287 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1288 oldest = rt6_ex;
1289 }
1290 rt6_remove_exception(bucket, oldest);
1291}
1292
1293static u32 rt6_exception_hash(const struct in6_addr *dst,
1294 const struct in6_addr *src)
1295{
1296 static u32 seed __read_mostly;
1297 u32 val;
1298
1299 net_get_random_once(&seed, sizeof(seed));
1300 val = jhash(dst, sizeof(*dst), seed);
1301
1302#ifdef CONFIG_IPV6_SUBTREES
1303 if (src)
1304 val = jhash(src, sizeof(*src), val);
1305#endif
1306 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1307}
1308
1309/* Helper function to find the cached rt in the hash table
1310 * and update bucket pointer to point to the bucket for this
1311 * (daddr, saddr) pair
1312 * Caller must hold rt6_exception_lock
1313 */
1314static struct rt6_exception *
1315__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1316 const struct in6_addr *daddr,
1317 const struct in6_addr *saddr)
1318{
1319 struct rt6_exception *rt6_ex;
1320 u32 hval;
1321
1322 if (!(*bucket) || !daddr)
1323 return NULL;
1324
1325 hval = rt6_exception_hash(daddr, saddr);
1326 *bucket += hval;
1327
1328 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1329 struct rt6_info *rt6 = rt6_ex->rt6i;
1330 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1331
1332#ifdef CONFIG_IPV6_SUBTREES
1333 if (matched && saddr)
1334 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1335#endif
1336 if (matched)
1337 return rt6_ex;
1338 }
1339 return NULL;
1340}
1341
1342/* Helper function to find the cached rt in the hash table
1343 * and update bucket pointer to point to the bucket for this
1344 * (daddr, saddr) pair
1345 * Caller must hold rcu_read_lock()
1346 */
1347static struct rt6_exception *
1348__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1349 const struct in6_addr *daddr,
1350 const struct in6_addr *saddr)
1351{
1352 struct rt6_exception *rt6_ex;
1353 u32 hval;
1354
1355 WARN_ON_ONCE(!rcu_read_lock_held());
1356
1357 if (!(*bucket) || !daddr)
1358 return NULL;
1359
1360 hval = rt6_exception_hash(daddr, saddr);
1361 *bucket += hval;
1362
1363 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1364 struct rt6_info *rt6 = rt6_ex->rt6i;
1365 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1366
1367#ifdef CONFIG_IPV6_SUBTREES
1368 if (matched && saddr)
1369 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1370#endif
1371 if (matched)
1372 return rt6_ex;
1373 }
1374 return NULL;
1375}
1376
8d1c802b 1377static unsigned int fib6_mtu(const struct fib6_info *rt)
d4ead6b3
DA
1378{
1379 unsigned int mtu;
1380
dcd1f572
DA
1381 if (rt->fib6_pmtu) {
1382 mtu = rt->fib6_pmtu;
1383 } else {
1384 struct net_device *dev = fib6_info_nh_dev(rt);
1385 struct inet6_dev *idev;
1386
1387 rcu_read_lock();
1388 idev = __in6_dev_get(dev);
1389 mtu = idev->cnf.mtu6;
1390 rcu_read_unlock();
1391 }
1392
d4ead6b3
DA
1393 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1394
1395 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1396}
1397
35732d01 1398static int rt6_insert_exception(struct rt6_info *nrt,
8d1c802b 1399 struct fib6_info *ort)
35732d01 1400{
5e670d84 1401 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1402 struct rt6_exception_bucket *bucket;
1403 struct in6_addr *src_key = NULL;
1404 struct rt6_exception *rt6_ex;
1405 int err = 0;
1406
35732d01
WW
1407 spin_lock_bh(&rt6_exception_lock);
1408
1409 if (ort->exception_bucket_flushed) {
1410 err = -EINVAL;
1411 goto out;
1412 }
1413
1414 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1415 lockdep_is_held(&rt6_exception_lock));
1416 if (!bucket) {
1417 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1418 GFP_ATOMIC);
1419 if (!bucket) {
1420 err = -ENOMEM;
1421 goto out;
1422 }
1423 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1424 }
1425
1426#ifdef CONFIG_IPV6_SUBTREES
1427 /* rt6i_src.plen != 0 indicates ort is in subtree
1428 * and exception table is indexed by a hash of
1429 * both rt6i_dst and rt6i_src.
1430 * Otherwise, the exception table is indexed by
1431 * a hash of only rt6i_dst.
1432 */
93c2fb25 1433 if (ort->fib6_src.plen)
35732d01
WW
1434 src_key = &nrt->rt6i_src.addr;
1435#endif
60006a48
WW
1436
1437 /* Update rt6i_prefsrc as it could be changed
1438 * in rt6_remove_prefsrc()
1439 */
93c2fb25 1440 nrt->rt6i_prefsrc = ort->fib6_prefsrc;
f5bbe7ee
WW
1441 /* rt6_mtu_change() might lower mtu on ort.
1442 * Only insert this exception route if its mtu
1443 * is less than ort's mtu value.
1444 */
d4ead6b3 1445 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1446 err = -EINVAL;
1447 goto out;
1448 }
60006a48 1449
35732d01
WW
1450 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1451 src_key);
1452 if (rt6_ex)
1453 rt6_remove_exception(bucket, rt6_ex);
1454
1455 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1456 if (!rt6_ex) {
1457 err = -ENOMEM;
1458 goto out;
1459 }
1460 rt6_ex->rt6i = nrt;
1461 rt6_ex->stamp = jiffies;
35732d01
WW
1462 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1463 bucket->depth++;
81eb8447 1464 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1465
1466 if (bucket->depth > FIB6_MAX_DEPTH)
1467 rt6_exception_remove_oldest(bucket);
1468
1469out:
1470 spin_unlock_bh(&rt6_exception_lock);
1471
1472 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1473 if (!err) {
93c2fb25 1474 spin_lock_bh(&ort->fib6_table->tb6_lock);
7aef6859 1475 fib6_update_sernum(net, ort);
93c2fb25 1476 spin_unlock_bh(&ort->fib6_table->tb6_lock);
b886d5f2
PA
1477 fib6_force_start_gc(net);
1478 }
35732d01
WW
1479
1480 return err;
1481}
1482
8d1c802b 1483void rt6_flush_exceptions(struct fib6_info *rt)
35732d01
WW
1484{
1485 struct rt6_exception_bucket *bucket;
1486 struct rt6_exception *rt6_ex;
1487 struct hlist_node *tmp;
1488 int i;
1489
1490 spin_lock_bh(&rt6_exception_lock);
1491 /* Prevent rt6_insert_exception() to recreate the bucket list */
1492 rt->exception_bucket_flushed = 1;
1493
1494 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1495 lockdep_is_held(&rt6_exception_lock));
1496 if (!bucket)
1497 goto out;
1498
1499 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1500 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1501 rt6_remove_exception(bucket, rt6_ex);
1502 WARN_ON_ONCE(bucket->depth);
1503 bucket++;
1504 }
1505
1506out:
1507 spin_unlock_bh(&rt6_exception_lock);
1508}
1509
1510/* Find cached rt in the hash table inside passed in rt
1511 * Caller has to hold rcu_read_lock()
1512 */
8d1c802b 1513static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
1514 struct in6_addr *daddr,
1515 struct in6_addr *saddr)
1516{
1517 struct rt6_exception_bucket *bucket;
1518 struct in6_addr *src_key = NULL;
1519 struct rt6_exception *rt6_ex;
1520 struct rt6_info *res = NULL;
1521
1522 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1523
1524#ifdef CONFIG_IPV6_SUBTREES
1525 /* rt6i_src.plen != 0 indicates rt is in subtree
1526 * and exception table is indexed by a hash of
1527 * both rt6i_dst and rt6i_src.
1528 * Otherwise, the exception table is indexed by
1529 * a hash of only rt6i_dst.
1530 */
93c2fb25 1531 if (rt->fib6_src.plen)
35732d01
WW
1532 src_key = saddr;
1533#endif
1534 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1535
1536 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1537 res = rt6_ex->rt6i;
1538
1539 return res;
1540}
1541
1542/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1543static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1544{
35732d01
WW
1545 struct rt6_exception_bucket *bucket;
1546 struct in6_addr *src_key = NULL;
1547 struct rt6_exception *rt6_ex;
8a14e46f 1548 struct fib6_info *from;
35732d01
WW
1549 int err;
1550
091311de 1551 from = rcu_dereference(rt->from);
35732d01 1552 if (!from ||
442d713b 1553 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1554 return -EINVAL;
1555
1556 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1557 return -ENOENT;
1558
1559 spin_lock_bh(&rt6_exception_lock);
1560 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1561 lockdep_is_held(&rt6_exception_lock));
1562#ifdef CONFIG_IPV6_SUBTREES
1563 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1564 * and exception table is indexed by a hash of
1565 * both rt6i_dst and rt6i_src.
1566 * Otherwise, the exception table is indexed by
1567 * a hash of only rt6i_dst.
1568 */
93c2fb25 1569 if (from->fib6_src.plen)
35732d01
WW
1570 src_key = &rt->rt6i_src.addr;
1571#endif
1572 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1573 &rt->rt6i_dst.addr,
1574 src_key);
1575 if (rt6_ex) {
1576 rt6_remove_exception(bucket, rt6_ex);
1577 err = 0;
1578 } else {
1579 err = -ENOENT;
1580 }
1581
1582 spin_unlock_bh(&rt6_exception_lock);
1583 return err;
1584}
1585
1586/* Find rt6_ex which contains the passed in rt cache and
1587 * refresh its stamp
1588 */
1589static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1590{
35732d01 1591 struct rt6_exception_bucket *bucket;
8d1c802b 1592 struct fib6_info *from = rt->from;
35732d01
WW
1593 struct in6_addr *src_key = NULL;
1594 struct rt6_exception *rt6_ex;
1595
1596 if (!from ||
442d713b 1597 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1598 return;
1599
1600 rcu_read_lock();
1601 bucket = rcu_dereference(from->rt6i_exception_bucket);
1602
1603#ifdef CONFIG_IPV6_SUBTREES
1604 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1605 * and exception table is indexed by a hash of
1606 * both rt6i_dst and rt6i_src.
1607 * Otherwise, the exception table is indexed by
1608 * a hash of only rt6i_dst.
1609 */
93c2fb25 1610 if (from->fib6_src.plen)
35732d01
WW
1611 src_key = &rt->rt6i_src.addr;
1612#endif
1613 rt6_ex = __rt6_find_exception_rcu(&bucket,
1614 &rt->rt6i_dst.addr,
1615 src_key);
1616 if (rt6_ex)
1617 rt6_ex->stamp = jiffies;
1618
1619 rcu_read_unlock();
1620}
1621
8d1c802b 1622static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
60006a48
WW
1623{
1624 struct rt6_exception_bucket *bucket;
1625 struct rt6_exception *rt6_ex;
1626 int i;
1627
1628 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1629 lockdep_is_held(&rt6_exception_lock));
1630
1631 if (bucket) {
1632 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1633 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1634 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1635 }
1636 bucket++;
1637 }
1638 }
1639}
1640
e9fa1495
SB
1641static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1642 struct rt6_info *rt, int mtu)
1643{
1644 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1645 * lowest MTU in the path: always allow updating the route PMTU to
1646 * reflect PMTU decreases.
1647 *
1648 * If the new MTU is higher, and the route PMTU is equal to the local
1649 * MTU, this means the old MTU is the lowest in the path, so allow
1650 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1651 * handle this.
1652 */
1653
1654 if (dst_mtu(&rt->dst) >= mtu)
1655 return true;
1656
1657 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1658 return true;
1659
1660 return false;
1661}
1662
1663static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
8d1c802b 1664 struct fib6_info *rt, int mtu)
f5bbe7ee
WW
1665{
1666 struct rt6_exception_bucket *bucket;
1667 struct rt6_exception *rt6_ex;
1668 int i;
1669
1670 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1671 lockdep_is_held(&rt6_exception_lock));
1672
e9fa1495
SB
1673 if (!bucket)
1674 return;
1675
1676 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1677 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1678 struct rt6_info *entry = rt6_ex->rt6i;
1679
1680 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1681 * route), the metrics of its rt->from have already
e9fa1495
SB
1682 * been updated.
1683 */
d4ead6b3 1684 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1685 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1686 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1687 }
e9fa1495 1688 bucket++;
f5bbe7ee
WW
1689 }
1690}
1691
b16cb459
WW
1692#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1693
8d1c802b 1694static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
b16cb459
WW
1695 struct in6_addr *gateway)
1696{
1697 struct rt6_exception_bucket *bucket;
1698 struct rt6_exception *rt6_ex;
1699 struct hlist_node *tmp;
1700 int i;
1701
1702 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1703 return;
1704
1705 spin_lock_bh(&rt6_exception_lock);
1706 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1707 lockdep_is_held(&rt6_exception_lock));
1708
1709 if (bucket) {
1710 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1711 hlist_for_each_entry_safe(rt6_ex, tmp,
1712 &bucket->chain, hlist) {
1713 struct rt6_info *entry = rt6_ex->rt6i;
1714
1715 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1716 RTF_CACHE_GATEWAY &&
1717 ipv6_addr_equal(gateway,
1718 &entry->rt6i_gateway)) {
1719 rt6_remove_exception(bucket, rt6_ex);
1720 }
1721 }
1722 bucket++;
1723 }
1724 }
1725
1726 spin_unlock_bh(&rt6_exception_lock);
1727}
1728
c757faa8
WW
1729static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1730 struct rt6_exception *rt6_ex,
1731 struct fib6_gc_args *gc_args,
1732 unsigned long now)
1733{
1734 struct rt6_info *rt = rt6_ex->rt6i;
1735
1859bac0
PA
1736 /* we are pruning and obsoleting aged-out and non gateway exceptions
1737 * even if others have still references to them, so that on next
1738 * dst_check() such references can be dropped.
1739 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1740 * expired, independently from their aging, as per RFC 8201 section 4
1741 */
31afeb42
WW
1742 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1743 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1744 RT6_TRACE("aging clone %p\n", rt);
1745 rt6_remove_exception(bucket, rt6_ex);
1746 return;
1747 }
1748 } else if (time_after(jiffies, rt->dst.expires)) {
1749 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1750 rt6_remove_exception(bucket, rt6_ex);
1751 return;
31afeb42
WW
1752 }
1753
1754 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1755 struct neighbour *neigh;
1756 __u8 neigh_flags = 0;
1757
1bfa26ff
ED
1758 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1759 if (neigh)
c757faa8 1760 neigh_flags = neigh->flags;
1bfa26ff 1761
c757faa8
WW
1762 if (!(neigh_flags & NTF_ROUTER)) {
1763 RT6_TRACE("purging route %p via non-router but gateway\n",
1764 rt);
1765 rt6_remove_exception(bucket, rt6_ex);
1766 return;
1767 }
1768 }
31afeb42 1769
c757faa8
WW
1770 gc_args->more++;
1771}
1772
8d1c802b 1773void rt6_age_exceptions(struct fib6_info *rt,
c757faa8
WW
1774 struct fib6_gc_args *gc_args,
1775 unsigned long now)
1776{
1777 struct rt6_exception_bucket *bucket;
1778 struct rt6_exception *rt6_ex;
1779 struct hlist_node *tmp;
1780 int i;
1781
1782 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1783 return;
1784
1bfa26ff
ED
1785 rcu_read_lock_bh();
1786 spin_lock(&rt6_exception_lock);
c757faa8
WW
1787 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1788 lockdep_is_held(&rt6_exception_lock));
1789
1790 if (bucket) {
1791 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1792 hlist_for_each_entry_safe(rt6_ex, tmp,
1793 &bucket->chain, hlist) {
1794 rt6_age_examine_exception(bucket, rt6_ex,
1795 gc_args, now);
1796 }
1797 bucket++;
1798 }
1799 }
1bfa26ff
ED
1800 spin_unlock(&rt6_exception_lock);
1801 rcu_read_unlock_bh();
c757faa8
WW
1802}
1803
1d053da9
DA
1804/* must be called with rcu lock held */
1805struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1806 int oif, struct flowi6 *fl6, int strict)
1da177e4 1807{
367efcb9 1808 struct fib6_node *fn, *saved_fn;
8d1c802b 1809 struct fib6_info *f6i;
1da177e4 1810
6454743b 1811 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1812 saved_fn = fn;
1da177e4 1813
ca254490
DA
1814 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1815 oif = 0;
1816
a3c00e46 1817redo_rt6_select:
23fb93a4 1818 f6i = rt6_select(net, fn, oif, strict);
23fb93a4 1819 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1820 fn = fib6_backtrack(fn, &fl6->saddr);
1821 if (fn)
1822 goto redo_rt6_select;
367efcb9
MKL
1823 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1824 /* also consider unreachable route */
1825 strict &= ~RT6_LOOKUP_F_REACHABLE;
1826 fn = saved_fn;
1827 goto redo_rt6_select;
367efcb9 1828 }
a3c00e46
MKL
1829 }
1830
d4bea421
DA
1831 trace_fib6_table_lookup(net, f6i, table, fl6);
1832
1d053da9
DA
1833 return f6i;
1834}
1835
1836struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1837 int oif, struct flowi6 *fl6,
1838 const struct sk_buff *skb, int flags)
1839{
1840 struct fib6_info *f6i;
1841 struct rt6_info *rt;
1842 int strict = 0;
1843
1844 strict |= flags & RT6_LOOKUP_F_IFACE;
1845 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1846 if (net->ipv6.devconf_all->forwarding == 0)
1847 strict |= RT6_LOOKUP_F_REACHABLE;
1848
1849 rcu_read_lock();
1850
1851 f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1852 if (f6i->fib6_nsiblings)
1853 f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1854
23fb93a4 1855 if (f6i == net->ipv6.fib6_null_entry) {
421842ed 1856 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1857 rcu_read_unlock();
d3843fe5 1858 dst_hold(&rt->dst);
d3843fe5 1859 return rt;
23fb93a4
DA
1860 }
1861
1862 /*Search through exception table */
1863 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1864 if (rt) {
d4ead6b3 1865 if (ip6_hold_safe(net, &rt, true))
d3843fe5 1866 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1867
66f5d6ce 1868 rcu_read_unlock();
d52d3997 1869 return rt;
3da59bd9 1870 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
93c2fb25 1871 !(f6i->fib6_flags & RTF_GATEWAY))) {
3da59bd9
MKL
1872 /* Create a RTF_CACHE clone which will not be
1873 * owned by the fib6 tree. It is for the special case where
1874 * the daddr in the skb during the neighbor look-up is different
1875 * from the fl6->daddr used to look-up route here.
1876 */
3da59bd9
MKL
1877 struct rt6_info *uncached_rt;
1878
23fb93a4 1879 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
4d85cd0c
DA
1880
1881 rcu_read_unlock();
c71099ac 1882
1cfb71ee
WW
1883 if (uncached_rt) {
1884 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1885 * No need for another dst_hold()
1886 */
8d0b94af 1887 rt6_uncached_list_add(uncached_rt);
81eb8447 1888 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1889 } else {
3da59bd9 1890 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1891 dst_hold(&uncached_rt->dst);
1892 }
b811580d 1893
3da59bd9 1894 return uncached_rt;
d52d3997
MKL
1895 } else {
1896 /* Get a percpu copy */
1897
1898 struct rt6_info *pcpu_rt;
1899
951f788a 1900 local_bh_disable();
23fb93a4 1901 pcpu_rt = rt6_get_pcpu_route(f6i);
d52d3997 1902
93531c67
DA
1903 if (!pcpu_rt)
1904 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1905
951f788a
ED
1906 local_bh_enable();
1907 rcu_read_unlock();
d4bea421 1908
d52d3997
MKL
1909 return pcpu_rt;
1910 }
1da177e4 1911}
9ff74384 1912EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1913
b75cc8f9
DA
1914static struct rt6_info *ip6_pol_route_input(struct net *net,
1915 struct fib6_table *table,
1916 struct flowi6 *fl6,
1917 const struct sk_buff *skb,
1918 int flags)
4acad72d 1919{
b75cc8f9 1920 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1921}
1922
d409b847
MB
1923struct dst_entry *ip6_route_input_lookup(struct net *net,
1924 struct net_device *dev,
b75cc8f9
DA
1925 struct flowi6 *fl6,
1926 const struct sk_buff *skb,
1927 int flags)
72331bc0
SL
1928{
1929 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1930 flags |= RT6_LOOKUP_F_IFACE;
1931
b75cc8f9 1932 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1933}
d409b847 1934EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1935
23aebdac 1936static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1937 struct flow_keys *keys,
1938 struct flow_keys *flkeys)
23aebdac
JS
1939{
1940 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1941 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1942 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1943 const struct ipv6hdr *inner_iph;
1944 const struct icmp6hdr *icmph;
1945 struct ipv6hdr _inner_iph;
cea67a2d 1946 struct icmp6hdr _icmph;
23aebdac
JS
1947
1948 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1949 goto out;
1950
cea67a2d
ED
1951 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1952 sizeof(_icmph), &_icmph);
1953 if (!icmph)
1954 goto out;
1955
23aebdac
JS
1956 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1957 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1958 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1959 icmph->icmp6_type != ICMPV6_PARAMPROB)
1960 goto out;
1961
1962 inner_iph = skb_header_pointer(skb,
1963 skb_transport_offset(skb) + sizeof(*icmph),
1964 sizeof(_inner_iph), &_inner_iph);
1965 if (!inner_iph)
1966 goto out;
1967
1968 key_iph = inner_iph;
5e5d6fed 1969 _flkeys = NULL;
23aebdac 1970out:
5e5d6fed
RP
1971 if (_flkeys) {
1972 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1973 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1974 keys->tags.flow_label = _flkeys->tags.flow_label;
1975 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1976 } else {
1977 keys->addrs.v6addrs.src = key_iph->saddr;
1978 keys->addrs.v6addrs.dst = key_iph->daddr;
1979 keys->tags.flow_label = ip6_flowinfo(key_iph);
1980 keys->basic.ip_proto = key_iph->nexthdr;
1981 }
23aebdac
JS
1982}
1983
1984/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
1985u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1986 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
1987{
1988 struct flow_keys hash_keys;
9a2a537a 1989 u32 mhash;
23aebdac 1990
bbfa047a 1991 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
1992 case 0:
1993 memset(&hash_keys, 0, sizeof(hash_keys));
1994 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1995 if (skb) {
1996 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1997 } else {
1998 hash_keys.addrs.v6addrs.src = fl6->saddr;
1999 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2000 hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
2001 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2002 }
2003 break;
2004 case 1:
2005 if (skb) {
2006 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2007 struct flow_keys keys;
2008
2009 /* short-circuit if we already have L4 hash present */
2010 if (skb->l4_hash)
2011 return skb_get_hash_raw(skb) >> 1;
2012
2013 memset(&hash_keys, 0, sizeof(hash_keys));
2014
2015 if (!flkeys) {
2016 skb_flow_dissect_flow_keys(skb, &keys, flag);
2017 flkeys = &keys;
2018 }
2019 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2020 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2021 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2022 hash_keys.ports.src = flkeys->ports.src;
2023 hash_keys.ports.dst = flkeys->ports.dst;
2024 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2025 } else {
2026 memset(&hash_keys, 0, sizeof(hash_keys));
2027 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2028 hash_keys.addrs.v6addrs.src = fl6->saddr;
2029 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2030 hash_keys.ports.src = fl6->fl6_sport;
2031 hash_keys.ports.dst = fl6->fl6_dport;
2032 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2033 }
2034 break;
23aebdac 2035 }
9a2a537a 2036 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2037
9a2a537a 2038 return mhash >> 1;
23aebdac
JS
2039}
2040
c71099ac
TG
2041void ip6_route_input(struct sk_buff *skb)
2042{
b71d1d42 2043 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2044 struct net *net = dev_net(skb->dev);
adaa70bb 2045 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2046 struct ip_tunnel_info *tun_info;
4c9483b2 2047 struct flowi6 fl6 = {
e0d56fdd 2048 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2049 .daddr = iph->daddr,
2050 .saddr = iph->saddr,
6502ca52 2051 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2052 .flowi6_mark = skb->mark,
2053 .flowi6_proto = iph->nexthdr,
c71099ac 2054 };
5e5d6fed 2055 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2056
904af04d 2057 tun_info = skb_tunnel_info(skb);
46fa062a 2058 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2059 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2060
2061 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2062 flkeys = &_flkeys;
2063
23aebdac 2064 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2065 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2066 skb_dst_drop(skb);
b75cc8f9
DA
2067 skb_dst_set(skb,
2068 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2069}
2070
b75cc8f9
DA
2071static struct rt6_info *ip6_pol_route_output(struct net *net,
2072 struct fib6_table *table,
2073 struct flowi6 *fl6,
2074 const struct sk_buff *skb,
2075 int flags)
1da177e4 2076{
b75cc8f9 2077 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2078}
2079
6f21c96a
PA
2080struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2081 struct flowi6 *fl6, int flags)
c71099ac 2082{
d46a9d67 2083 bool any_src;
c71099ac 2084
4c1feac5
DA
2085 if (rt6_need_strict(&fl6->daddr)) {
2086 struct dst_entry *dst;
2087
2088 dst = l3mdev_link_scope_lookup(net, fl6);
2089 if (dst)
2090 return dst;
2091 }
ca254490 2092
1fb9489b 2093 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2094
d46a9d67 2095 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2096 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2097 (fl6->flowi6_oif && any_src))
77d16f45 2098 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2099
d46a9d67 2100 if (!any_src)
adaa70bb 2101 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2102 else if (sk)
2103 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2104
b75cc8f9 2105 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2106}
6f21c96a 2107EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2108
2774c131 2109struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2110{
5c1e6aa3 2111 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2112 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2113 struct dst_entry *new = NULL;
2114
1dbe3252 2115 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2116 DST_OBSOLETE_DEAD, 0);
14e50e57 2117 if (rt) {
0a1f5962 2118 rt6_info_init(rt);
81eb8447 2119 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2120
0a1f5962 2121 new = &rt->dst;
14e50e57 2122 new->__use = 1;
352e512c 2123 new->input = dst_discard;
ede2059d 2124 new->output = dst_discard_out;
14e50e57 2125
0a1f5962 2126 dst_copy_metrics(new, &ort->dst);
14e50e57 2127
1dbe3252 2128 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2129 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2130 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2131
2132 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2133#ifdef CONFIG_IPV6_SUBTREES
2134 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2135#endif
14e50e57
DM
2136 }
2137
69ead7af
DM
2138 dst_release(dst_orig);
2139 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2140}
14e50e57 2141
1da177e4
LT
2142/*
2143 * Destination cache support functions
2144 */
2145
8d1c802b 2146static bool fib6_check(struct fib6_info *f6i, u32 cookie)
93531c67
DA
2147{
2148 u32 rt_cookie = 0;
2149
8ae86971 2150 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
93531c67
DA
2151 return false;
2152
2153 if (fib6_check_expired(f6i))
2154 return false;
2155
2156 return true;
2157}
2158
a68886a6
DA
2159static struct dst_entry *rt6_check(struct rt6_info *rt,
2160 struct fib6_info *from,
2161 u32 cookie)
3da59bd9 2162{
36143645 2163 u32 rt_cookie = 0;
c5cff856 2164
a68886a6 2165 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
93531c67 2166 rt_cookie != cookie)
3da59bd9
MKL
2167 return NULL;
2168
2169 if (rt6_check_expired(rt))
2170 return NULL;
2171
2172 return &rt->dst;
2173}
2174
a68886a6
DA
2175static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2176 struct fib6_info *from,
2177 u32 cookie)
3da59bd9 2178{
5973fb1e
MKL
2179 if (!__rt6_check_expired(rt) &&
2180 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2181 fib6_check(from, cookie))
3da59bd9
MKL
2182 return &rt->dst;
2183 else
2184 return NULL;
2185}
2186
1da177e4
LT
2187static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2188{
a87b7dc9 2189 struct dst_entry *dst_ret;
a68886a6 2190 struct fib6_info *from;
1da177e4
LT
2191 struct rt6_info *rt;
2192
a87b7dc9
DA
2193 rt = container_of(dst, struct rt6_info, dst);
2194
2195 rcu_read_lock();
1da177e4 2196
6f3118b5
ND
2197 /* All IPV6 dsts are created with ->obsolete set to the value
2198 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2199 * into this function always.
2200 */
e3bc10bd 2201
a68886a6
DA
2202 from = rcu_dereference(rt->from);
2203
2204 if (from && (rt->rt6i_flags & RTF_PCPU ||
2205 unlikely(!list_empty(&rt->rt6i_uncached))))
2206 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2207 else
a68886a6 2208 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2209
2210 rcu_read_unlock();
2211
2212 return dst_ret;
1da177e4
LT
2213}
2214
2215static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2216{
2217 struct rt6_info *rt = (struct rt6_info *) dst;
2218
2219 if (rt) {
54c1a859 2220 if (rt->rt6i_flags & RTF_CACHE) {
c3c14da0 2221 rcu_read_lock();
54c1a859 2222 if (rt6_check_expired(rt)) {
93531c67 2223 rt6_remove_exception_rt(rt);
54c1a859
YH
2224 dst = NULL;
2225 }
c3c14da0 2226 rcu_read_unlock();
54c1a859 2227 } else {
1da177e4 2228 dst_release(dst);
54c1a859
YH
2229 dst = NULL;
2230 }
1da177e4 2231 }
54c1a859 2232 return dst;
1da177e4
LT
2233}
2234
2235static void ip6_link_failure(struct sk_buff *skb)
2236{
2237 struct rt6_info *rt;
2238
3ffe533c 2239 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2240
adf30907 2241 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2242 if (rt) {
8a14e46f 2243 rcu_read_lock();
1eb4f758 2244 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0 2245 if (dst_hold_safe(&rt->dst))
93531c67 2246 rt6_remove_exception_rt(rt);
a68886a6
DA
2247 } else {
2248 struct fib6_info *from;
c5cff856
WW
2249 struct fib6_node *fn;
2250
a68886a6
DA
2251 from = rcu_dereference(rt->from);
2252 if (from) {
2253 fn = rcu_dereference(from->fib6_node);
2254 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2255 fn->fn_sernum = -1;
2256 }
1eb4f758 2257 }
8a14e46f 2258 rcu_read_unlock();
1da177e4
LT
2259 }
2260}
2261
6a3e030f
DA
2262static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2263{
a68886a6
DA
2264 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2265 struct fib6_info *from;
2266
2267 rcu_read_lock();
2268 from = rcu_dereference(rt0->from);
2269 if (from)
2270 rt0->dst.expires = from->expires;
2271 rcu_read_unlock();
2272 }
6a3e030f
DA
2273
2274 dst_set_expires(&rt0->dst, timeout);
2275 rt0->rt6i_flags |= RTF_EXPIRES;
2276}
2277
45e4fd26
MKL
2278static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2279{
2280 struct net *net = dev_net(rt->dst.dev);
2281
d4ead6b3 2282 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2283 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2284 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2285}
2286
0d3f6d29
MKL
2287static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2288{
a68886a6
DA
2289 bool from_set;
2290
2291 rcu_read_lock();
2292 from_set = !!rcu_dereference(rt->from);
2293 rcu_read_unlock();
2294
0d3f6d29 2295 return !(rt->rt6i_flags & RTF_CACHE) &&
a68886a6 2296 (rt->rt6i_flags & RTF_PCPU || from_set);
0d3f6d29
MKL
2297}
2298
45e4fd26
MKL
2299static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2300 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2301{
0dec879f 2302 const struct in6_addr *daddr, *saddr;
67ba4152 2303 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2304
45e4fd26
MKL
2305 if (rt6->rt6i_flags & RTF_LOCAL)
2306 return;
81aded24 2307
19bda36c
XL
2308 if (dst_metric_locked(dst, RTAX_MTU))
2309 return;
2310
0dec879f
JA
2311 if (iph) {
2312 daddr = &iph->daddr;
2313 saddr = &iph->saddr;
2314 } else if (sk) {
2315 daddr = &sk->sk_v6_daddr;
2316 saddr = &inet6_sk(sk)->saddr;
2317 } else {
2318 daddr = NULL;
2319 saddr = NULL;
2320 }
2321 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2322 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2323 if (mtu >= dst_mtu(dst))
2324 return;
9d289715 2325
0d3f6d29 2326 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2327 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2328 /* update rt6_ex->stamp for cache */
2329 if (rt6->rt6i_flags & RTF_CACHE)
2330 rt6_update_exception_stamp_rt(rt6);
0dec879f 2331 } else if (daddr) {
a68886a6 2332 struct fib6_info *from;
45e4fd26
MKL
2333 struct rt6_info *nrt6;
2334
4d85cd0c 2335 rcu_read_lock();
a68886a6
DA
2336 from = rcu_dereference(rt6->from);
2337 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
45e4fd26
MKL
2338 if (nrt6) {
2339 rt6_do_update_pmtu(nrt6, mtu);
a68886a6 2340 if (rt6_insert_exception(nrt6, from))
2b760fcf 2341 dst_release_immediate(&nrt6->dst);
45e4fd26 2342 }
a68886a6 2343 rcu_read_unlock();
1da177e4
LT
2344 }
2345}
2346
45e4fd26
MKL
2347static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2348 struct sk_buff *skb, u32 mtu)
2349{
2350 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2351}
2352
42ae66c8 2353void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2354 int oif, u32 mark, kuid_t uid)
81aded24
DM
2355{
2356 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2357 struct dst_entry *dst;
2358 struct flowi6 fl6;
2359
2360 memset(&fl6, 0, sizeof(fl6));
2361 fl6.flowi6_oif = oif;
1b3c61dc 2362 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2363 fl6.daddr = iph->daddr;
2364 fl6.saddr = iph->saddr;
6502ca52 2365 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2366 fl6.flowi6_uid = uid;
81aded24
DM
2367
2368 dst = ip6_route_output(net, NULL, &fl6);
2369 if (!dst->error)
45e4fd26 2370 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2371 dst_release(dst);
2372}
2373EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2374
2375void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2376{
33c162a9
MKL
2377 struct dst_entry *dst;
2378
81aded24 2379 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2380 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2381
2382 dst = __sk_dst_get(sk);
2383 if (!dst || !dst->obsolete ||
2384 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2385 return;
2386
2387 bh_lock_sock(sk);
2388 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2389 ip6_datagram_dst_update(sk, false);
2390 bh_unlock_sock(sk);
81aded24
DM
2391}
2392EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2393
7d6850f7
AK
2394void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2395 const struct flowi6 *fl6)
2396{
2397#ifdef CONFIG_IPV6_SUBTREES
2398 struct ipv6_pinfo *np = inet6_sk(sk);
2399#endif
2400
2401 ip6_dst_store(sk, dst,
2402 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2403 &sk->sk_v6_daddr : NULL,
2404#ifdef CONFIG_IPV6_SUBTREES
2405 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2406 &np->saddr :
2407#endif
2408 NULL);
2409}
2410
b55b76b2
DJ
2411/* Handle redirects */
2412struct ip6rd_flowi {
2413 struct flowi6 fl6;
2414 struct in6_addr gateway;
2415};
2416
2417static struct rt6_info *__ip6_route_redirect(struct net *net,
2418 struct fib6_table *table,
2419 struct flowi6 *fl6,
b75cc8f9 2420 const struct sk_buff *skb,
b55b76b2
DJ
2421 int flags)
2422{
2423 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
23fb93a4 2424 struct rt6_info *ret = NULL, *rt_cache;
8d1c802b 2425 struct fib6_info *rt;
b55b76b2
DJ
2426 struct fib6_node *fn;
2427
2428 /* Get the "current" route for this destination and
67c408cf 2429 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2430 *
2431 * RFC 4861 specifies that redirects should only be
2432 * accepted if they come from the nexthop to the target.
2433 * Due to the way the routes are chosen, this notion
2434 * is a bit fuzzy and one might need to check all possible
2435 * routes.
2436 */
2437
66f5d6ce 2438 rcu_read_lock();
6454743b 2439 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
b55b76b2 2440restart:
66f5d6ce 2441 for_each_fib6_node_rt_rcu(fn) {
5e670d84 2442 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c 2443 continue;
14895687 2444 if (fib6_check_expired(rt))
b55b76b2 2445 continue;
93c2fb25 2446 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2447 break;
93c2fb25 2448 if (!(rt->fib6_flags & RTF_GATEWAY))
b55b76b2 2449 continue;
5e670d84 2450 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
b55b76b2 2451 continue;
2b760fcf
WW
2452 /* rt_cache's gateway might be different from its 'parent'
2453 * in the case of an ip redirect.
2454 * So we keep searching in the exception table if the gateway
2455 * is different.
2456 */
5e670d84 2457 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2b760fcf
WW
2458 rt_cache = rt6_find_cached_rt(rt,
2459 &fl6->daddr,
2460 &fl6->saddr);
2461 if (rt_cache &&
2462 ipv6_addr_equal(&rdfl->gateway,
2463 &rt_cache->rt6i_gateway)) {
23fb93a4 2464 ret = rt_cache;
2b760fcf
WW
2465 break;
2466 }
b55b76b2 2467 continue;
2b760fcf 2468 }
b55b76b2
DJ
2469 break;
2470 }
2471
2472 if (!rt)
421842ed 2473 rt = net->ipv6.fib6_null_entry;
93c2fb25 2474 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2475 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2476 goto out;
2477 }
2478
421842ed 2479 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2480 fn = fib6_backtrack(fn, &fl6->saddr);
2481 if (fn)
2482 goto restart;
b55b76b2 2483 }
a3c00e46 2484
b0a1ba59 2485out:
23fb93a4
DA
2486 if (ret)
2487 dst_hold(&ret->dst);
2488 else
2489 ret = ip6_create_rt_rcu(rt);
b55b76b2 2490
66f5d6ce 2491 rcu_read_unlock();
b55b76b2 2492
d4bea421 2493 trace_fib6_table_lookup(net, rt, table, fl6);
23fb93a4 2494 return ret;
b55b76b2
DJ
2495};
2496
2497static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2498 const struct flowi6 *fl6,
2499 const struct sk_buff *skb,
2500 const struct in6_addr *gateway)
b55b76b2
DJ
2501{
2502 int flags = RT6_LOOKUP_F_HAS_SADDR;
2503 struct ip6rd_flowi rdfl;
2504
2505 rdfl.fl6 = *fl6;
2506 rdfl.gateway = *gateway;
2507
b75cc8f9 2508 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2509 flags, __ip6_route_redirect);
2510}
2511
e2d118a1
LC
2512void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2513 kuid_t uid)
3a5ad2ee
DM
2514{
2515 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2516 struct dst_entry *dst;
2517 struct flowi6 fl6;
2518
2519 memset(&fl6, 0, sizeof(fl6));
e374c618 2520 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2521 fl6.flowi6_oif = oif;
2522 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2523 fl6.daddr = iph->daddr;
2524 fl6.saddr = iph->saddr;
6502ca52 2525 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2526 fl6.flowi6_uid = uid;
3a5ad2ee 2527
b75cc8f9 2528 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2529 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2530 dst_release(dst);
2531}
2532EXPORT_SYMBOL_GPL(ip6_redirect);
2533
c92a59ec
DJ
2534void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2535 u32 mark)
2536{
2537 const struct ipv6hdr *iph = ipv6_hdr(skb);
2538 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2539 struct dst_entry *dst;
2540 struct flowi6 fl6;
2541
2542 memset(&fl6, 0, sizeof(fl6));
e374c618 2543 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2544 fl6.flowi6_oif = oif;
2545 fl6.flowi6_mark = mark;
c92a59ec
DJ
2546 fl6.daddr = msg->dest;
2547 fl6.saddr = iph->daddr;
e2d118a1 2548 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2549
b75cc8f9 2550 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2551 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2552 dst_release(dst);
2553}
2554
3a5ad2ee
DM
2555void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2556{
e2d118a1
LC
2557 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2558 sk->sk_uid);
3a5ad2ee
DM
2559}
2560EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2561
0dbaee3b 2562static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2563{
0dbaee3b
DM
2564 struct net_device *dev = dst->dev;
2565 unsigned int mtu = dst_mtu(dst);
2566 struct net *net = dev_net(dev);
2567
1da177e4
LT
2568 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2569
5578689a
DL
2570 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2571 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2572
2573 /*
1ab1457c
YH
2574 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2575 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2576 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2577 * rely only on pmtu discovery"
2578 */
2579 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2580 mtu = IPV6_MAXPLEN;
2581 return mtu;
2582}
2583
ebb762f2 2584static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2585{
d33e4553 2586 struct inet6_dev *idev;
d4ead6b3 2587 unsigned int mtu;
4b32b5ad
MKL
2588
2589 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2590 if (mtu)
30f78d8e 2591 goto out;
618f9bc7
SK
2592
2593 mtu = IPV6_MIN_MTU;
d33e4553
DM
2594
2595 rcu_read_lock();
2596 idev = __in6_dev_get(dst->dev);
2597 if (idev)
2598 mtu = idev->cnf.mtu6;
2599 rcu_read_unlock();
2600
30f78d8e 2601out:
14972cbd
RP
2602 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2603
2604 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2605}
2606
901731b8
DA
2607/* MTU selection:
2608 * 1. mtu on route is locked - use it
2609 * 2. mtu from nexthop exception
2610 * 3. mtu from egress device
2611 *
2612 * based on ip6_dst_mtu_forward and exception logic of
2613 * rt6_find_cached_rt; called with rcu_read_lock
2614 */
2615u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2616 struct in6_addr *saddr)
2617{
2618 struct rt6_exception_bucket *bucket;
2619 struct rt6_exception *rt6_ex;
2620 struct in6_addr *src_key;
2621 struct inet6_dev *idev;
2622 u32 mtu = 0;
2623
2624 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2625 mtu = f6i->fib6_pmtu;
2626 if (mtu)
2627 goto out;
2628 }
2629
2630 src_key = NULL;
2631#ifdef CONFIG_IPV6_SUBTREES
2632 if (f6i->fib6_src.plen)
2633 src_key = saddr;
2634#endif
2635
2636 bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2637 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2638 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2639 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2640
2641 if (likely(!mtu)) {
2642 struct net_device *dev = fib6_info_nh_dev(f6i);
2643
2644 mtu = IPV6_MIN_MTU;
2645 idev = __in6_dev_get(dev);
2646 if (idev && idev->cnf.mtu6 > mtu)
2647 mtu = idev->cnf.mtu6;
2648 }
2649
2650 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2651out:
2652 return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2653}
2654
3b00944c 2655struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2656 struct flowi6 *fl6)
1da177e4 2657{
87a11578 2658 struct dst_entry *dst;
1da177e4
LT
2659 struct rt6_info *rt;
2660 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2661 struct net *net = dev_net(dev);
1da177e4 2662
38308473 2663 if (unlikely(!idev))
122bdf67 2664 return ERR_PTR(-ENODEV);
1da177e4 2665
ad706862 2666 rt = ip6_dst_alloc(net, dev, 0);
38308473 2667 if (unlikely(!rt)) {
1da177e4 2668 in6_dev_put(idev);
87a11578 2669 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2670 goto out;
2671 }
2672
8e2ec639 2673 rt->dst.flags |= DST_HOST;
588753f1 2674 rt->dst.input = ip6_input;
8e2ec639 2675 rt->dst.output = ip6_output;
550bab42 2676 rt->rt6i_gateway = fl6->daddr;
87a11578 2677 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2678 rt->rt6i_dst.plen = 128;
2679 rt->rt6i_idev = idev;
14edd87d 2680 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2681
4c981e28 2682 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2683 * do proper release of the net_device
2684 */
2685 rt6_uncached_list_add(rt);
81eb8447 2686 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2687
87a11578
DM
2688 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2689
1da177e4 2690out:
87a11578 2691 return dst;
1da177e4
LT
2692}
2693
569d3645 2694static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2695{
86393e52 2696 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2697 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2698 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2699 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2700 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2701 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2702 int entries;
7019b78e 2703
fc66f95c 2704 entries = dst_entries_get_fast(ops);
49a18d86 2705 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2706 entries <= rt_max_size)
1da177e4
LT
2707 goto out;
2708
6891a346 2709 net->ipv6.ip6_rt_gc_expire++;
14956643 2710 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2711 entries = dst_entries_get_slow(ops);
2712 if (entries < ops->gc_thresh)
7019b78e 2713 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2714out:
7019b78e 2715 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2716 return entries > rt_max_size;
1da177e4
LT
2717}
2718
8d1c802b 2719static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
d4ead6b3 2720 struct fib6_config *cfg)
e715b6d3 2721{
263243d6 2722 struct dst_metrics *p;
e715b6d3 2723
263243d6
ED
2724 if (!cfg->fc_mx)
2725 return 0;
ea697639 2726
263243d6
ED
2727 p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
2728 if (unlikely(!p))
2729 return -ENOMEM;
e715b6d3 2730
263243d6
ED
2731 refcount_set(&p->refcnt, 1);
2732 rt->fib6_metrics = p;
e715b6d3 2733
263243d6 2734 return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
e715b6d3 2735}
1da177e4 2736
8c14586f
DA
2737static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2738 struct fib6_config *cfg,
f4797b33
DA
2739 const struct in6_addr *gw_addr,
2740 u32 tbid, int flags)
8c14586f
DA
2741{
2742 struct flowi6 fl6 = {
2743 .flowi6_oif = cfg->fc_ifindex,
2744 .daddr = *gw_addr,
2745 .saddr = cfg->fc_prefsrc,
2746 };
2747 struct fib6_table *table;
2748 struct rt6_info *rt;
8c14586f 2749
f4797b33 2750 table = fib6_get_table(net, tbid);
8c14586f
DA
2751 if (!table)
2752 return NULL;
2753
2754 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2755 flags |= RT6_LOOKUP_F_HAS_SADDR;
2756
f4797b33 2757 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2758 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2759
2760 /* if table lookup failed, fall back to full lookup */
2761 if (rt == net->ipv6.ip6_null_entry) {
2762 ip6_rt_put(rt);
2763 rt = NULL;
2764 }
2765
2766 return rt;
2767}
2768
fc1e64e1
DA
2769static int ip6_route_check_nh_onlink(struct net *net,
2770 struct fib6_config *cfg,
9fbb704c 2771 const struct net_device *dev,
fc1e64e1
DA
2772 struct netlink_ext_ack *extack)
2773{
44750f84 2774 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2775 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2776 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2777 struct rt6_info *grt;
2778 int err;
2779
2780 err = 0;
2781 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2782 if (grt) {
58e354c0
DA
2783 if (!grt->dst.error &&
2784 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2785 NL_SET_ERR_MSG(extack,
2786 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2787 err = -EINVAL;
2788 }
2789
2790 ip6_rt_put(grt);
2791 }
2792
2793 return err;
2794}
2795
1edce99f
DA
2796static int ip6_route_check_nh(struct net *net,
2797 struct fib6_config *cfg,
2798 struct net_device **_dev,
2799 struct inet6_dev **idev)
2800{
2801 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2802 struct net_device *dev = _dev ? *_dev : NULL;
2803 struct rt6_info *grt = NULL;
2804 int err = -EHOSTUNREACH;
2805
2806 if (cfg->fc_table) {
f4797b33
DA
2807 int flags = RT6_LOOKUP_F_IFACE;
2808
2809 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2810 cfg->fc_table, flags);
1edce99f
DA
2811 if (grt) {
2812 if (grt->rt6i_flags & RTF_GATEWAY ||
2813 (dev && dev != grt->dst.dev)) {
2814 ip6_rt_put(grt);
2815 grt = NULL;
2816 }
2817 }
2818 }
2819
2820 if (!grt)
b75cc8f9 2821 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2822
2823 if (!grt)
2824 goto out;
2825
2826 if (dev) {
2827 if (dev != grt->dst.dev) {
2828 ip6_rt_put(grt);
2829 goto out;
2830 }
2831 } else {
2832 *_dev = dev = grt->dst.dev;
2833 *idev = grt->rt6i_idev;
2834 dev_hold(dev);
2835 in6_dev_hold(grt->rt6i_idev);
2836 }
2837
2838 if (!(grt->rt6i_flags & RTF_GATEWAY))
2839 err = 0;
2840
2841 ip6_rt_put(grt);
2842
2843out:
2844 return err;
2845}
2846
9fbb704c
DA
2847static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2848 struct net_device **_dev, struct inet6_dev **idev,
2849 struct netlink_ext_ack *extack)
2850{
2851 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2852 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2853 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2854 const struct net_device *dev = *_dev;
232378e8 2855 bool need_addr_check = !dev;
9fbb704c
DA
2856 int err = -EINVAL;
2857
2858 /* if gw_addr is local we will fail to detect this in case
2859 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2860 * will return already-added prefix route via interface that
2861 * prefix route was assigned to, which might be non-loopback.
2862 */
232378e8
DA
2863 if (dev &&
2864 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2865 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2866 goto out;
2867 }
2868
2869 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2870 /* IPv6 strictly inhibits using not link-local
2871 * addresses as nexthop address.
2872 * Otherwise, router will not able to send redirects.
2873 * It is very good, but in some (rare!) circumstances
2874 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2875 * some exceptions. --ANK
2876 * We allow IPv4-mapped nexthops to support RFC4798-type
2877 * addressing
2878 */
2879 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2880 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2881 goto out;
2882 }
2883
2884 if (cfg->fc_flags & RTNH_F_ONLINK)
2885 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2886 else
2887 err = ip6_route_check_nh(net, cfg, _dev, idev);
2888
2889 if (err)
2890 goto out;
2891 }
2892
2893 /* reload in case device was changed */
2894 dev = *_dev;
2895
2896 err = -EINVAL;
2897 if (!dev) {
2898 NL_SET_ERR_MSG(extack, "Egress device not specified");
2899 goto out;
2900 } else if (dev->flags & IFF_LOOPBACK) {
2901 NL_SET_ERR_MSG(extack,
2902 "Egress device can not be loopback device for this route");
2903 goto out;
2904 }
232378e8
DA
2905
2906 /* if we did not check gw_addr above, do so now that the
2907 * egress device has been resolved.
2908 */
2909 if (need_addr_check &&
2910 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2911 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2912 goto out;
2913 }
2914
9fbb704c
DA
2915 err = 0;
2916out:
2917 return err;
2918}
2919
8d1c802b 2920static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 2921 gfp_t gfp_flags,
333c4301 2922 struct netlink_ext_ack *extack)
1da177e4 2923{
5578689a 2924 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 2925 struct fib6_info *rt = NULL;
1da177e4
LT
2926 struct net_device *dev = NULL;
2927 struct inet6_dev *idev = NULL;
c71099ac 2928 struct fib6_table *table;
1da177e4 2929 int addr_type;
8c5b83f0 2930 int err = -EINVAL;
1da177e4 2931
557c44be 2932 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2933 if (cfg->fc_flags & RTF_PCPU) {
2934 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2935 goto out;
d5d531cb 2936 }
557c44be 2937
2ea2352e
WW
2938 /* RTF_CACHE is an internal flag; can not be set by userspace */
2939 if (cfg->fc_flags & RTF_CACHE) {
2940 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2941 goto out;
2942 }
2943
e8478e80
DA
2944 if (cfg->fc_type > RTN_MAX) {
2945 NL_SET_ERR_MSG(extack, "Invalid route type");
2946 goto out;
2947 }
2948
d5d531cb
DA
2949 if (cfg->fc_dst_len > 128) {
2950 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2951 goto out;
2952 }
2953 if (cfg->fc_src_len > 128) {
2954 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2955 goto out;
d5d531cb 2956 }
1da177e4 2957#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2958 if (cfg->fc_src_len) {
2959 NL_SET_ERR_MSG(extack,
2960 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2961 goto out;
d5d531cb 2962 }
1da177e4 2963#endif
86872cb5 2964 if (cfg->fc_ifindex) {
1da177e4 2965 err = -ENODEV;
5578689a 2966 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2967 if (!dev)
2968 goto out;
2969 idev = in6_dev_get(dev);
2970 if (!idev)
2971 goto out;
2972 }
2973
86872cb5
TG
2974 if (cfg->fc_metric == 0)
2975 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2976
fc1e64e1
DA
2977 if (cfg->fc_flags & RTNH_F_ONLINK) {
2978 if (!dev) {
2979 NL_SET_ERR_MSG(extack,
2980 "Nexthop device required for onlink");
2981 err = -ENODEV;
2982 goto out;
2983 }
2984
2985 if (!(dev->flags & IFF_UP)) {
2986 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2987 err = -ENETDOWN;
2988 goto out;
2989 }
2990 }
2991
d71314b4 2992 err = -ENOBUFS;
38308473
DM
2993 if (cfg->fc_nlinfo.nlh &&
2994 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2995 table = fib6_get_table(net, cfg->fc_table);
38308473 2996 if (!table) {
f3213831 2997 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2998 table = fib6_new_table(net, cfg->fc_table);
2999 }
3000 } else {
3001 table = fib6_new_table(net, cfg->fc_table);
3002 }
38308473
DM
3003
3004 if (!table)
c71099ac 3005 goto out;
c71099ac 3006
93531c67
DA
3007 err = -ENOMEM;
3008 rt = fib6_info_alloc(gfp_flags);
3009 if (!rt)
1da177e4 3010 goto out;
93531c67
DA
3011
3012 if (cfg->fc_flags & RTF_ADDRCONF)
3013 rt->dst_nocount = true;
1da177e4 3014
d4ead6b3
DA
3015 err = ip6_convert_metrics(net, rt, cfg);
3016 if (err < 0)
3017 goto out;
3018
1716a961 3019 if (cfg->fc_flags & RTF_EXPIRES)
14895687 3020 fib6_set_expires(rt, jiffies +
1716a961
G
3021 clock_t_to_jiffies(cfg->fc_expires));
3022 else
14895687 3023 fib6_clean_expires(rt);
1da177e4 3024
86872cb5
TG
3025 if (cfg->fc_protocol == RTPROT_UNSPEC)
3026 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 3027 rt->fib6_protocol = cfg->fc_protocol;
86872cb5
TG
3028
3029 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4 3030
19e42e45
RP
3031 if (cfg->fc_encap) {
3032 struct lwtunnel_state *lwtstate;
3033
30357d7d 3034 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 3035 cfg->fc_encap, AF_INET6, cfg,
9ae28727 3036 &lwtstate, extack);
19e42e45
RP
3037 if (err)
3038 goto out;
5e670d84 3039 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
19e42e45
RP
3040 }
3041
93c2fb25
DA
3042 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3043 rt->fib6_dst.plen = cfg->fc_dst_len;
3044 if (rt->fib6_dst.plen == 128)
3b6761d1 3045 rt->dst_host = true;
e5fd387a 3046
1da177e4 3047#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
3048 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3049 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4
LT
3050#endif
3051
93c2fb25 3052 rt->fib6_metric = cfg->fc_metric;
5e670d84 3053 rt->fib6_nh.nh_weight = 1;
1da177e4 3054
e8478e80
DA
3055 rt->fib6_type = cfg->fc_type;
3056
1da177e4
LT
3057 /* We cannot add true routes via loopback here,
3058 they would result in kernel looping; promote them to reject routes
3059 */
86872cb5 3060 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
3061 (dev && (dev->flags & IFF_LOOPBACK) &&
3062 !(addr_type & IPV6_ADDR_LOOPBACK) &&
3063 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 3064 /* hold loopback dev/idev if we haven't done so. */
5578689a 3065 if (dev != net->loopback_dev) {
1da177e4
LT
3066 if (dev) {
3067 dev_put(dev);
3068 in6_dev_put(idev);
3069 }
5578689a 3070 dev = net->loopback_dev;
1da177e4
LT
3071 dev_hold(dev);
3072 idev = in6_dev_get(dev);
3073 if (!idev) {
3074 err = -ENODEV;
3075 goto out;
3076 }
3077 }
93c2fb25 3078 rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
1da177e4
LT
3079 goto install_route;
3080 }
3081
86872cb5 3082 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
3083 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3084 if (err)
48ed7b26 3085 goto out;
1da177e4 3086
93531c67 3087 rt->fib6_nh.nh_gw = cfg->fc_gateway;
1da177e4
LT
3088 }
3089
3090 err = -ENODEV;
38308473 3091 if (!dev)
1da177e4
LT
3092 goto out;
3093
428604fb
LB
3094 if (idev->cnf.disable_ipv6) {
3095 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3096 err = -EACCES;
3097 goto out;
3098 }
3099
955ec4cb
DA
3100 if (!(dev->flags & IFF_UP)) {
3101 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3102 err = -ENETDOWN;
3103 goto out;
3104 }
3105
c3968a85
DW
3106 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3107 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3108 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3109 err = -EINVAL;
3110 goto out;
3111 }
93c2fb25
DA
3112 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3113 rt->fib6_prefsrc.plen = 128;
c3968a85 3114 } else
93c2fb25 3115 rt->fib6_prefsrc.plen = 0;
c3968a85 3116
93c2fb25 3117 rt->fib6_flags = cfg->fc_flags;
1da177e4
LT
3118
3119install_route:
93c2fb25 3120 if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
5609b80a 3121 !netif_carrier_ok(dev))
5e670d84
DA
3122 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3123 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
93531c67 3124 rt->fib6_nh.nh_dev = dev;
93c2fb25 3125 rt->fib6_table = table;
63152fc0 3126
c346dca1 3127 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 3128
dcd1f572
DA
3129 if (idev)
3130 in6_dev_put(idev);
3131
8c5b83f0 3132 return rt;
6b9ea5a6
RP
3133out:
3134 if (dev)
3135 dev_put(dev);
3136 if (idev)
3137 in6_dev_put(idev);
6b9ea5a6 3138
93531c67 3139 fib6_info_release(rt);
8c5b83f0 3140 return ERR_PTR(err);
6b9ea5a6
RP
3141}
3142
acb54e3c
DA
3143int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3144 struct netlink_ext_ack *extack)
6b9ea5a6 3145{
8d1c802b 3146 struct fib6_info *rt;
6b9ea5a6
RP
3147 int err;
3148
acb54e3c 3149 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3150 if (IS_ERR(rt))
3151 return PTR_ERR(rt);
6b9ea5a6 3152
d4ead6b3 3153 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3154 fib6_info_release(rt);
6b9ea5a6 3155
1da177e4
LT
3156 return err;
3157}
3158
8d1c802b 3159static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3160{
afb1d4b5 3161 struct net *net = info->nl_net;
c71099ac 3162 struct fib6_table *table;
afb1d4b5 3163 int err;
1da177e4 3164
421842ed 3165 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3166 err = -ENOENT;
3167 goto out;
3168 }
6c813a72 3169
93c2fb25 3170 table = rt->fib6_table;
66f5d6ce 3171 spin_lock_bh(&table->tb6_lock);
86872cb5 3172 err = fib6_del(rt, info);
66f5d6ce 3173 spin_unlock_bh(&table->tb6_lock);
1da177e4 3174
6825a26c 3175out:
93531c67 3176 fib6_info_release(rt);
1da177e4
LT
3177 return err;
3178}
3179
8d1c802b 3180int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3181{
afb1d4b5
DA
3182 struct nl_info info = { .nl_net = net };
3183
528c4ceb 3184 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3185}
3186
8d1c802b 3187static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3188{
3189 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3190 struct net *net = info->nl_net;
16a16cd3 3191 struct sk_buff *skb = NULL;
0ae81335 3192 struct fib6_table *table;
e3330039 3193 int err = -ENOENT;
0ae81335 3194
421842ed 3195 if (rt == net->ipv6.fib6_null_entry)
e3330039 3196 goto out_put;
93c2fb25 3197 table = rt->fib6_table;
66f5d6ce 3198 spin_lock_bh(&table->tb6_lock);
0ae81335 3199
93c2fb25 3200 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3201 struct fib6_info *sibling, *next_sibling;
0ae81335 3202
16a16cd3
DA
3203 /* prefer to send a single notification with all hops */
3204 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3205 if (skb) {
3206 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3207
d4ead6b3 3208 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3209 NULL, NULL, 0, RTM_DELROUTE,
3210 info->portid, seq, 0) < 0) {
3211 kfree_skb(skb);
3212 skb = NULL;
3213 } else
3214 info->skip_notify = 1;
3215 }
3216
0ae81335 3217 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3218 &rt->fib6_siblings,
3219 fib6_siblings) {
0ae81335
DA
3220 err = fib6_del(sibling, info);
3221 if (err)
e3330039 3222 goto out_unlock;
0ae81335
DA
3223 }
3224 }
3225
3226 err = fib6_del(rt, info);
e3330039 3227out_unlock:
66f5d6ce 3228 spin_unlock_bh(&table->tb6_lock);
e3330039 3229out_put:
93531c67 3230 fib6_info_release(rt);
16a16cd3
DA
3231
3232 if (skb) {
e3330039 3233 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3234 info->nlh, gfp_any());
3235 }
0ae81335
DA
3236 return err;
3237}
3238
23fb93a4
DA
3239static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3240{
3241 int rc = -ESRCH;
3242
3243 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3244 goto out;
3245
3246 if (cfg->fc_flags & RTF_GATEWAY &&
3247 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3248 goto out;
3249 if (dst_hold_safe(&rt->dst))
3250 rc = rt6_remove_exception_rt(rt);
3251out:
3252 return rc;
3253}
3254
333c4301
DA
3255static int ip6_route_del(struct fib6_config *cfg,
3256 struct netlink_ext_ack *extack)
1da177e4 3257{
8d1c802b 3258 struct rt6_info *rt_cache;
c71099ac 3259 struct fib6_table *table;
8d1c802b 3260 struct fib6_info *rt;
1da177e4 3261 struct fib6_node *fn;
1da177e4
LT
3262 int err = -ESRCH;
3263
5578689a 3264 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3265 if (!table) {
3266 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3267 return err;
d5d531cb 3268 }
c71099ac 3269
66f5d6ce 3270 rcu_read_lock();
1da177e4 3271
c71099ac 3272 fn = fib6_locate(&table->tb6_root,
86872cb5 3273 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3274 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3275 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3276
1da177e4 3277 if (fn) {
66f5d6ce 3278 for_each_fib6_node_rt_rcu(fn) {
2b760fcf 3279 if (cfg->fc_flags & RTF_CACHE) {
23fb93a4
DA
3280 int rc;
3281
2b760fcf
WW
3282 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3283 &cfg->fc_src);
23fb93a4
DA
3284 if (rt_cache) {
3285 rc = ip6_del_cached_rt(rt_cache, cfg);
9e575010
ED
3286 if (rc != -ESRCH) {
3287 rcu_read_unlock();
23fb93a4 3288 return rc;
9e575010 3289 }
23fb93a4
DA
3290 }
3291 continue;
2b760fcf 3292 }
86872cb5 3293 if (cfg->fc_ifindex &&
5e670d84
DA
3294 (!rt->fib6_nh.nh_dev ||
3295 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3296 continue;
86872cb5 3297 if (cfg->fc_flags & RTF_GATEWAY &&
5e670d84 3298 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
1da177e4 3299 continue;
93c2fb25 3300 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3301 continue;
93c2fb25 3302 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3303 continue;
93531c67 3304 fib6_info_hold(rt);
66f5d6ce 3305 rcu_read_unlock();
1da177e4 3306
0ae81335
DA
3307 /* if gateway was specified only delete the one hop */
3308 if (cfg->fc_flags & RTF_GATEWAY)
3309 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3310
3311 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3312 }
3313 }
66f5d6ce 3314 rcu_read_unlock();
1da177e4
LT
3315
3316 return err;
3317}
3318
6700c270 3319static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3320{
a6279458 3321 struct netevent_redirect netevent;
e8599ff4 3322 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3323 struct ndisc_options ndopts;
3324 struct inet6_dev *in6_dev;
3325 struct neighbour *neigh;
a68886a6 3326 struct fib6_info *from;
71bcdba0 3327 struct rd_msg *msg;
6e157b6a
DM
3328 int optlen, on_link;
3329 u8 *lladdr;
e8599ff4 3330
29a3cad5 3331 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3332 optlen -= sizeof(*msg);
e8599ff4
DM
3333
3334 if (optlen < 0) {
6e157b6a 3335 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3336 return;
3337 }
3338
71bcdba0 3339 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3340
71bcdba0 3341 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3342 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3343 return;
3344 }
3345
6e157b6a 3346 on_link = 0;
71bcdba0 3347 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3348 on_link = 1;
71bcdba0 3349 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3350 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3351 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3352 return;
3353 }
3354
3355 in6_dev = __in6_dev_get(skb->dev);
3356 if (!in6_dev)
3357 return;
3358 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3359 return;
3360
3361 /* RFC2461 8.1:
3362 * The IP source address of the Redirect MUST be the same as the current
3363 * first-hop router for the specified ICMP Destination Address.
3364 */
3365
f997c55c 3366 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3367 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3368 return;
3369 }
6e157b6a
DM
3370
3371 lladdr = NULL;
e8599ff4
DM
3372 if (ndopts.nd_opts_tgt_lladdr) {
3373 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3374 skb->dev);
3375 if (!lladdr) {
3376 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3377 return;
3378 }
3379 }
3380
6e157b6a 3381 rt = (struct rt6_info *) dst;
ec13ad1d 3382 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3383 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3384 return;
6e157b6a 3385 }
e8599ff4 3386
6e157b6a
DM
3387 /* Redirect received -> path was valid.
3388 * Look, redirects are sent only in response to data packets,
3389 * so that this nexthop apparently is reachable. --ANK
3390 */
0dec879f 3391 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3392
71bcdba0 3393 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3394 if (!neigh)
3395 return;
a6279458 3396
1da177e4
LT
3397 /*
3398 * We have finally decided to accept it.
3399 */
3400
f997c55c 3401 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3402 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3403 NEIGH_UPDATE_F_OVERRIDE|
3404 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3405 NEIGH_UPDATE_F_ISROUTER)),
3406 NDISC_REDIRECT, &ndopts);
1da177e4 3407
4d85cd0c 3408 rcu_read_lock();
a68886a6 3409 from = rcu_dereference(rt->from);
8a14e46f 3410 fib6_info_hold(from);
4d85cd0c 3411 rcu_read_unlock();
8a14e46f
DA
3412
3413 nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
38308473 3414 if (!nrt)
1da177e4
LT
3415 goto out;
3416
3417 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3418 if (on_link)
3419 nrt->rt6i_flags &= ~RTF_GATEWAY;
3420
4e3fd7a0 3421 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3422
2b760fcf
WW
3423 /* No need to remove rt from the exception table if rt is
3424 * a cached route because rt6_insert_exception() will
3425 * takes care of it
3426 */
8a14e46f 3427 if (rt6_insert_exception(nrt, from)) {
2b760fcf
WW
3428 dst_release_immediate(&nrt->dst);
3429 goto out;
3430 }
1da177e4 3431
d8d1f30b
CG
3432 netevent.old = &rt->dst;
3433 netevent.new = &nrt->dst;
71bcdba0 3434 netevent.daddr = &msg->dest;
60592833 3435 netevent.neigh = neigh;
8d71740c
TT
3436 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3437
1da177e4 3438out:
8a14e46f 3439 fib6_info_release(from);
e8599ff4 3440 neigh_release(neigh);
6e157b6a
DM
3441}
3442
70ceb4f5 3443#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3444static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3445 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3446 const struct in6_addr *gwaddr,
3447 struct net_device *dev)
70ceb4f5 3448{
830218c1
DA
3449 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3450 int ifindex = dev->ifindex;
70ceb4f5 3451 struct fib6_node *fn;
8d1c802b 3452 struct fib6_info *rt = NULL;
c71099ac
TG
3453 struct fib6_table *table;
3454
830218c1 3455 table = fib6_get_table(net, tb_id);
38308473 3456 if (!table)
c71099ac 3457 return NULL;
70ceb4f5 3458
66f5d6ce 3459 rcu_read_lock();
38fbeeee 3460 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3461 if (!fn)
3462 goto out;
3463
66f5d6ce 3464 for_each_fib6_node_rt_rcu(fn) {
5e670d84 3465 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
70ceb4f5 3466 continue;
93c2fb25 3467 if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
70ceb4f5 3468 continue;
5e670d84 3469 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
70ceb4f5 3470 continue;
8d1c802b 3471 fib6_info_hold(rt);
70ceb4f5
YH
3472 break;
3473 }
3474out:
66f5d6ce 3475 rcu_read_unlock();
70ceb4f5
YH
3476 return rt;
3477}
3478
8d1c802b 3479static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3480 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3481 const struct in6_addr *gwaddr,
3482 struct net_device *dev,
95c96174 3483 unsigned int pref)
70ceb4f5 3484{
86872cb5 3485 struct fib6_config cfg = {
238fc7ea 3486 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3487 .fc_ifindex = dev->ifindex,
86872cb5
TG
3488 .fc_dst_len = prefixlen,
3489 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3490 RTF_UP | RTF_PREF(pref),
b91d5329 3491 .fc_protocol = RTPROT_RA,
e8478e80 3492 .fc_type = RTN_UNICAST,
15e47304 3493 .fc_nlinfo.portid = 0,
efa2cea0
DL
3494 .fc_nlinfo.nlh = NULL,
3495 .fc_nlinfo.nl_net = net,
86872cb5
TG
3496 };
3497
830218c1 3498 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3499 cfg.fc_dst = *prefix;
3500 cfg.fc_gateway = *gwaddr;
70ceb4f5 3501
e317da96
YH
3502 /* We should treat it as a default route if prefix length is 0. */
3503 if (!prefixlen)
86872cb5 3504 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3505
acb54e3c 3506 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3507
830218c1 3508 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3509}
3510#endif
3511
8d1c802b 3512struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3513 const struct in6_addr *addr,
3514 struct net_device *dev)
1ab1457c 3515{
830218c1 3516 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3517 struct fib6_info *rt;
c71099ac 3518 struct fib6_table *table;
1da177e4 3519
afb1d4b5 3520 table = fib6_get_table(net, tb_id);
38308473 3521 if (!table)
c71099ac 3522 return NULL;
1da177e4 3523
66f5d6ce
WW
3524 rcu_read_lock();
3525 for_each_fib6_node_rt_rcu(&table->tb6_root) {
5e670d84 3526 if (dev == rt->fib6_nh.nh_dev &&
93c2fb25 3527 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
5e670d84 3528 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
1da177e4
LT
3529 break;
3530 }
3531 if (rt)
8d1c802b 3532 fib6_info_hold(rt);
66f5d6ce 3533 rcu_read_unlock();
1da177e4
LT
3534 return rt;
3535}
3536
8d1c802b 3537struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3538 const struct in6_addr *gwaddr,
ebacaaa0
YH
3539 struct net_device *dev,
3540 unsigned int pref)
1da177e4 3541{
86872cb5 3542 struct fib6_config cfg = {
ca254490 3543 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3544 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3545 .fc_ifindex = dev->ifindex,
3546 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3547 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3548 .fc_protocol = RTPROT_RA,
e8478e80 3549 .fc_type = RTN_UNICAST,
15e47304 3550 .fc_nlinfo.portid = 0,
5578689a 3551 .fc_nlinfo.nlh = NULL,
afb1d4b5 3552 .fc_nlinfo.nl_net = net,
86872cb5 3553 };
1da177e4 3554
4e3fd7a0 3555 cfg.fc_gateway = *gwaddr;
1da177e4 3556
acb54e3c 3557 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3558 struct fib6_table *table;
3559
3560 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3561 if (table)
3562 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3563 }
1da177e4 3564
afb1d4b5 3565 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3566}
3567
afb1d4b5
DA
3568static void __rt6_purge_dflt_routers(struct net *net,
3569 struct fib6_table *table)
1da177e4 3570{
8d1c802b 3571 struct fib6_info *rt;
1da177e4
LT
3572
3573restart:
66f5d6ce
WW
3574 rcu_read_lock();
3575 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3576 struct net_device *dev = fib6_info_nh_dev(rt);
3577 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3578
93c2fb25 3579 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
dcd1f572 3580 (!idev || idev->cnf.accept_ra != 2)) {
93531c67
DA
3581 fib6_info_hold(rt);
3582 rcu_read_unlock();
3583 ip6_del_rt(net, rt);
1da177e4
LT
3584 goto restart;
3585 }
3586 }
66f5d6ce 3587 rcu_read_unlock();
830218c1
DA
3588
3589 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3590}
3591
3592void rt6_purge_dflt_routers(struct net *net)
3593{
3594 struct fib6_table *table;
3595 struct hlist_head *head;
3596 unsigned int h;
3597
3598 rcu_read_lock();
3599
3600 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3601 head = &net->ipv6.fib_table_hash[h];
3602 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3603 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3604 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3605 }
3606 }
3607
3608 rcu_read_unlock();
1da177e4
LT
3609}
3610
5578689a
DL
3611static void rtmsg_to_fib6_config(struct net *net,
3612 struct in6_rtmsg *rtmsg,
86872cb5
TG
3613 struct fib6_config *cfg)
3614{
3615 memset(cfg, 0, sizeof(*cfg));
3616
ca254490
DA
3617 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3618 : RT6_TABLE_MAIN;
86872cb5
TG
3619 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3620 cfg->fc_metric = rtmsg->rtmsg_metric;
3621 cfg->fc_expires = rtmsg->rtmsg_info;
3622 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3623 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3624 cfg->fc_flags = rtmsg->rtmsg_flags;
e8478e80 3625 cfg->fc_type = rtmsg->rtmsg_type;
86872cb5 3626
5578689a 3627 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3628
4e3fd7a0
AD
3629 cfg->fc_dst = rtmsg->rtmsg_dst;
3630 cfg->fc_src = rtmsg->rtmsg_src;
3631 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3632}
3633
5578689a 3634int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3635{
86872cb5 3636 struct fib6_config cfg;
1da177e4
LT
3637 struct in6_rtmsg rtmsg;
3638 int err;
3639
67ba4152 3640 switch (cmd) {
1da177e4
LT
3641 case SIOCADDRT: /* Add a route */
3642 case SIOCDELRT: /* Delete a route */
af31f412 3643 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3644 return -EPERM;
3645 err = copy_from_user(&rtmsg, arg,
3646 sizeof(struct in6_rtmsg));
3647 if (err)
3648 return -EFAULT;
86872cb5 3649
5578689a 3650 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3651
1da177e4
LT
3652 rtnl_lock();
3653 switch (cmd) {
3654 case SIOCADDRT:
acb54e3c 3655 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3656 break;
3657 case SIOCDELRT:
333c4301 3658 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3659 break;
3660 default:
3661 err = -EINVAL;
3662 }
3663 rtnl_unlock();
3664
3665 return err;
3ff50b79 3666 }
1da177e4
LT
3667
3668 return -EINVAL;
3669}
3670
3671/*
3672 * Drop the packet on the floor
3673 */
3674
d5fdd6ba 3675static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3676{
612f09e8 3677 int type;
adf30907 3678 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3679 switch (ipstats_mib_noroutes) {
3680 case IPSTATS_MIB_INNOROUTES:
0660e03f 3681 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3682 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3683 IP6_INC_STATS(dev_net(dst->dev),
3684 __in6_dev_get_safely(skb->dev),
3bd653c8 3685 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3686 break;
3687 }
3688 /* FALLTHROUGH */
3689 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3690 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3691 ipstats_mib_noroutes);
612f09e8
YH
3692 break;
3693 }
3ffe533c 3694 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3695 kfree_skb(skb);
3696 return 0;
3697}
3698
9ce8ade0
TG
3699static int ip6_pkt_discard(struct sk_buff *skb)
3700{
612f09e8 3701 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3702}
3703
ede2059d 3704static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3705{
adf30907 3706 skb->dev = skb_dst(skb)->dev;
612f09e8 3707 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3708}
3709
9ce8ade0
TG
3710static int ip6_pkt_prohibit(struct sk_buff *skb)
3711{
612f09e8 3712 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3713}
3714
ede2059d 3715static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3716{
adf30907 3717 skb->dev = skb_dst(skb)->dev;
612f09e8 3718 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3719}
3720
1da177e4
LT
3721/*
3722 * Allocate a dst for local (unicast / anycast) address.
3723 */
3724
360a9887
DA
3725struct fib6_info *addrconf_f6i_alloc(struct net *net,
3726 struct inet6_dev *idev,
3727 const struct in6_addr *addr,
3728 bool anycast, gfp_t gfp_flags)
1da177e4 3729{
ca254490 3730 u32 tb_id;
4832c30d 3731 struct net_device *dev = idev->dev;
360a9887 3732 struct fib6_info *f6i;
5f02ce24 3733
360a9887
DA
3734 f6i = fib6_info_alloc(gfp_flags);
3735 if (!f6i)
1da177e4
LT
3736 return ERR_PTR(-ENOMEM);
3737
360a9887 3738 f6i->dst_nocount = true;
360a9887
DA
3739 f6i->dst_host = true;
3740 f6i->fib6_protocol = RTPROT_KERNEL;
3741 f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
e8478e80 3742 if (anycast) {
360a9887
DA
3743 f6i->fib6_type = RTN_ANYCAST;
3744 f6i->fib6_flags |= RTF_ANYCAST;
e8478e80 3745 } else {
360a9887
DA
3746 f6i->fib6_type = RTN_LOCAL;
3747 f6i->fib6_flags |= RTF_LOCAL;
e8478e80 3748 }
1da177e4 3749
360a9887 3750 f6i->fib6_nh.nh_gw = *addr;
93531c67 3751 dev_hold(dev);
360a9887
DA
3752 f6i->fib6_nh.nh_dev = dev;
3753 f6i->fib6_dst.addr = *addr;
3754 f6i->fib6_dst.plen = 128;
ca254490 3755 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
360a9887 3756 f6i->fib6_table = fib6_get_table(net, tb_id);
1da177e4 3757
360a9887 3758 return f6i;
1da177e4
LT
3759}
3760
c3968a85
DW
3761/* remove deleted ip from prefsrc entries */
3762struct arg_dev_net_ip {
3763 struct net_device *dev;
3764 struct net *net;
3765 struct in6_addr *addr;
3766};
3767
8d1c802b 3768static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
3769{
3770 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3771 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3772 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3773
5e670d84 3774 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
421842ed 3775 rt != net->ipv6.fib6_null_entry &&
93c2fb25 3776 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 3777 spin_lock_bh(&rt6_exception_lock);
c3968a85 3778 /* remove prefsrc entry */
93c2fb25 3779 rt->fib6_prefsrc.plen = 0;
60006a48
WW
3780 /* need to update cache as well */
3781 rt6_exceptions_remove_prefsrc(rt);
3782 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3783 }
3784 return 0;
3785}
3786
3787void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3788{
3789 struct net *net = dev_net(ifp->idev->dev);
3790 struct arg_dev_net_ip adni = {
3791 .dev = ifp->idev->dev,
3792 .net = net,
3793 .addr = &ifp->addr,
3794 };
0c3584d5 3795 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3796}
3797
be7a010d 3798#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3799
3800/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 3801static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
3802{
3803 struct in6_addr *gateway = (struct in6_addr *)arg;
3804
93c2fb25 3805 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
5e670d84 3806 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
be7a010d
DJ
3807 return -1;
3808 }
b16cb459
WW
3809
3810 /* Further clean up cached routes in exception table.
3811 * This is needed because cached route may have a different
3812 * gateway than its 'parent' in the case of an ip redirect.
3813 */
3814 rt6_exceptions_clean_tohost(rt, gateway);
3815
be7a010d
DJ
3816 return 0;
3817}
3818
3819void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3820{
3821 fib6_clean_all(net, fib6_clean_tohost, gateway);
3822}
3823
2127d95a
IS
3824struct arg_netdev_event {
3825 const struct net_device *dev;
4c981e28
IS
3826 union {
3827 unsigned int nh_flags;
3828 unsigned long event;
3829 };
2127d95a
IS
3830};
3831
8d1c802b 3832static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 3833{
8d1c802b 3834 struct fib6_info *iter;
d7dedee1
IS
3835 struct fib6_node *fn;
3836
93c2fb25
DA
3837 fn = rcu_dereference_protected(rt->fib6_node,
3838 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3839 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 3840 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3841 while (iter) {
93c2fb25 3842 if (iter->fib6_metric == rt->fib6_metric &&
f34436a4 3843 iter->fib6_nsiblings)
d7dedee1 3844 return iter;
8fb11a9a 3845 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 3846 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
3847 }
3848
3849 return NULL;
3850}
3851
8d1c802b 3852static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 3853{
5e670d84
DA
3854 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3855 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
dcd1f572 3856 fib6_ignore_linkdown(rt)))
d7dedee1
IS
3857 return true;
3858
3859 return false;
3860}
3861
8d1c802b 3862static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 3863{
8d1c802b 3864 struct fib6_info *iter;
d7dedee1
IS
3865 int total = 0;
3866
3867 if (!rt6_is_dead(rt))
5e670d84 3868 total += rt->fib6_nh.nh_weight;
d7dedee1 3869
93c2fb25 3870 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 3871 if (!rt6_is_dead(iter))
5e670d84 3872 total += iter->fib6_nh.nh_weight;
d7dedee1
IS
3873 }
3874
3875 return total;
3876}
3877
8d1c802b 3878static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
3879{
3880 int upper_bound = -1;
3881
3882 if (!rt6_is_dead(rt)) {
5e670d84 3883 *weight += rt->fib6_nh.nh_weight;
d7dedee1
IS
3884 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3885 total) - 1;
3886 }
5e670d84 3887 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
d7dedee1
IS
3888}
3889
8d1c802b 3890static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 3891{
8d1c802b 3892 struct fib6_info *iter;
d7dedee1
IS
3893 int weight = 0;
3894
3895 rt6_upper_bound_set(rt, &weight, total);
3896
93c2fb25 3897 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
3898 rt6_upper_bound_set(iter, &weight, total);
3899}
3900
8d1c802b 3901void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 3902{
8d1c802b 3903 struct fib6_info *first;
d7dedee1
IS
3904 int total;
3905
3906 /* In case the entire multipath route was marked for flushing,
3907 * then there is no need to rebalance upon the removal of every
3908 * sibling route.
3909 */
93c2fb25 3910 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
3911 return;
3912
3913 /* During lookup routes are evaluated in order, so we need to
3914 * make sure upper bounds are assigned from the first sibling
3915 * onwards.
3916 */
3917 first = rt6_multipath_first_sibling(rt);
3918 if (WARN_ON_ONCE(!first))
3919 return;
3920
3921 total = rt6_multipath_total_weight(first);
3922 rt6_multipath_upper_bound_set(first, total);
3923}
3924
8d1c802b 3925static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
3926{
3927 const struct arg_netdev_event *arg = p_arg;
7aef6859 3928 struct net *net = dev_net(arg->dev);
2127d95a 3929
421842ed 3930 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
5e670d84 3931 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
7aef6859 3932 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3933 rt6_multipath_rebalance(rt);
1de178ed 3934 }
2127d95a
IS
3935
3936 return 0;
3937}
3938
3939void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3940{
3941 struct arg_netdev_event arg = {
3942 .dev = dev,
6802f3ad
IS
3943 {
3944 .nh_flags = nh_flags,
3945 },
2127d95a
IS
3946 };
3947
3948 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3949 arg.nh_flags |= RTNH_F_LINKDOWN;
3950
3951 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3952}
3953
8d1c802b 3954static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
3955 const struct net_device *dev)
3956{
8d1c802b 3957 struct fib6_info *iter;
1de178ed 3958
5e670d84 3959 if (rt->fib6_nh.nh_dev == dev)
1de178ed 3960 return true;
93c2fb25 3961 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84 3962 if (iter->fib6_nh.nh_dev == dev)
1de178ed
IS
3963 return true;
3964
3965 return false;
3966}
3967
8d1c802b 3968static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 3969{
8d1c802b 3970 struct fib6_info *iter;
1de178ed
IS
3971
3972 rt->should_flush = 1;
93c2fb25 3973 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
3974 iter->should_flush = 1;
3975}
3976
8d1c802b 3977static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
3978 const struct net_device *down_dev)
3979{
8d1c802b 3980 struct fib6_info *iter;
1de178ed
IS
3981 unsigned int dead = 0;
3982
5e670d84
DA
3983 if (rt->fib6_nh.nh_dev == down_dev ||
3984 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed 3985 dead++;
93c2fb25 3986 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
3987 if (iter->fib6_nh.nh_dev == down_dev ||
3988 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
3989 dead++;
3990
3991 return dead;
3992}
3993
8d1c802b 3994static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed
IS
3995 const struct net_device *dev,
3996 unsigned int nh_flags)
3997{
8d1c802b 3998 struct fib6_info *iter;
1de178ed 3999
5e670d84
DA
4000 if (rt->fib6_nh.nh_dev == dev)
4001 rt->fib6_nh.nh_flags |= nh_flags;
93c2fb25 4002 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
4003 if (iter->fib6_nh.nh_dev == dev)
4004 iter->fib6_nh.nh_flags |= nh_flags;
1de178ed
IS
4005}
4006
a1a22c12 4007/* called with write lock held for table with rt */
8d1c802b 4008static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 4009{
4c981e28
IS
4010 const struct arg_netdev_event *arg = p_arg;
4011 const struct net_device *dev = arg->dev;
7aef6859 4012 struct net *net = dev_net(dev);
8ed67789 4013
421842ed 4014 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
4015 return 0;
4016
4017 switch (arg->event) {
4018 case NETDEV_UNREGISTER:
5e670d84 4019 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
27c6fa73 4020 case NETDEV_DOWN:
1de178ed 4021 if (rt->should_flush)
27c6fa73 4022 return -1;
93c2fb25 4023 if (!rt->fib6_nsiblings)
5e670d84 4024 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
1de178ed
IS
4025 if (rt6_multipath_uses_dev(rt, dev)) {
4026 unsigned int count;
4027
4028 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 4029 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
4030 rt6_multipath_flush(rt);
4031 return -1;
4032 }
4033 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4034 RTNH_F_LINKDOWN);
7aef6859 4035 fib6_update_sernum(net, rt);
d7dedee1 4036 rt6_multipath_rebalance(rt);
1de178ed
IS
4037 }
4038 return -2;
27c6fa73 4039 case NETDEV_CHANGE:
5e670d84 4040 if (rt->fib6_nh.nh_dev != dev ||
93c2fb25 4041 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 4042 break;
5e670d84 4043 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 4044 rt6_multipath_rebalance(rt);
27c6fa73 4045 break;
2b241361 4046 }
c159d30c 4047
1da177e4
LT
4048 return 0;
4049}
4050
27c6fa73 4051void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 4052{
4c981e28 4053 struct arg_netdev_event arg = {
8ed67789 4054 .dev = dev,
6802f3ad
IS
4055 {
4056 .event = event,
4057 },
8ed67789
DL
4058 };
4059
4c981e28
IS
4060 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
4061}
4062
4063void rt6_disable_ip(struct net_device *dev, unsigned long event)
4064{
4065 rt6_sync_down_dev(dev, event);
4066 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4067 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4068}
4069
95c96174 4070struct rt6_mtu_change_arg {
1da177e4 4071 struct net_device *dev;
95c96174 4072 unsigned int mtu;
1da177e4
LT
4073};
4074
8d1c802b 4075static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4076{
4077 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4078 struct inet6_dev *idev;
4079
4080 /* In IPv6 pmtu discovery is not optional,
4081 so that RTAX_MTU lock cannot disable it.
4082 We still use this lock to block changes
4083 caused by addrconf/ndisc.
4084 */
4085
4086 idev = __in6_dev_get(arg->dev);
38308473 4087 if (!idev)
1da177e4
LT
4088 return 0;
4089
4090 /* For administrative MTU increase, there is no way to discover
4091 IPv6 PMTU increase, so PMTU increase should be updated here.
4092 Since RFC 1981 doesn't include administrative MTU increase
4093 update PMTU increase is a MUST. (i.e. jumbo frame)
4094 */
5e670d84 4095 if (rt->fib6_nh.nh_dev == arg->dev &&
d4ead6b3
DA
4096 !fib6_metric_locked(rt, RTAX_MTU)) {
4097 u32 mtu = rt->fib6_pmtu;
4098
4099 if (mtu >= arg->mtu ||
4100 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4101 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4102
f5bbe7ee 4103 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4104 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4105 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4106 }
1da177e4
LT
4107 return 0;
4108}
4109
95c96174 4110void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4111{
c71099ac
TG
4112 struct rt6_mtu_change_arg arg = {
4113 .dev = dev,
4114 .mtu = mtu,
4115 };
1da177e4 4116
0c3584d5 4117 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4118}
4119
ef7c79ed 4120static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4121 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 4122 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 4123 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4124 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4125 [RTA_PRIORITY] = { .type = NLA_U32 },
4126 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4127 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4128 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4129 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4130 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4131 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4132 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4133 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 4134 [RTA_TABLE] = { .type = NLA_U32 },
eacb9384
RP
4135 [RTA_IP_PROTO] = { .type = NLA_U8 },
4136 [RTA_SPORT] = { .type = NLA_U16 },
4137 [RTA_DPORT] = { .type = NLA_U16 },
86872cb5
TG
4138};
4139
4140static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4141 struct fib6_config *cfg,
4142 struct netlink_ext_ack *extack)
1da177e4 4143{
86872cb5
TG
4144 struct rtmsg *rtm;
4145 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4146 unsigned int pref;
86872cb5 4147 int err;
1da177e4 4148
fceb6435
JB
4149 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4150 NULL);
86872cb5
TG
4151 if (err < 0)
4152 goto errout;
1da177e4 4153
86872cb5
TG
4154 err = -EINVAL;
4155 rtm = nlmsg_data(nlh);
4156 memset(cfg, 0, sizeof(*cfg));
4157
4158 cfg->fc_table = rtm->rtm_table;
4159 cfg->fc_dst_len = rtm->rtm_dst_len;
4160 cfg->fc_src_len = rtm->rtm_src_len;
4161 cfg->fc_flags = RTF_UP;
4162 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 4163 cfg->fc_type = rtm->rtm_type;
86872cb5 4164
ef2c7d7b
ND
4165 if (rtm->rtm_type == RTN_UNREACHABLE ||
4166 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4167 rtm->rtm_type == RTN_PROHIBIT ||
4168 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4169 cfg->fc_flags |= RTF_REJECT;
4170
ab79ad14
4171 if (rtm->rtm_type == RTN_LOCAL)
4172 cfg->fc_flags |= RTF_LOCAL;
4173
1f56a01f
MKL
4174 if (rtm->rtm_flags & RTM_F_CLONED)
4175 cfg->fc_flags |= RTF_CACHE;
4176
fc1e64e1
DA
4177 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4178
15e47304 4179 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 4180 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 4181 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
4182
4183 if (tb[RTA_GATEWAY]) {
67b61f6c 4184 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4185 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4186 }
86872cb5
TG
4187
4188 if (tb[RTA_DST]) {
4189 int plen = (rtm->rtm_dst_len + 7) >> 3;
4190
4191 if (nla_len(tb[RTA_DST]) < plen)
4192 goto errout;
4193
4194 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4195 }
86872cb5
TG
4196
4197 if (tb[RTA_SRC]) {
4198 int plen = (rtm->rtm_src_len + 7) >> 3;
4199
4200 if (nla_len(tb[RTA_SRC]) < plen)
4201 goto errout;
4202
4203 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4204 }
86872cb5 4205
c3968a85 4206 if (tb[RTA_PREFSRC])
67b61f6c 4207 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4208
86872cb5
TG
4209 if (tb[RTA_OIF])
4210 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4211
4212 if (tb[RTA_PRIORITY])
4213 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4214
4215 if (tb[RTA_METRICS]) {
4216 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4217 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4218 }
86872cb5
TG
4219
4220 if (tb[RTA_TABLE])
4221 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4222
51ebd318
ND
4223 if (tb[RTA_MULTIPATH]) {
4224 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4225 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4226
4227 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4228 cfg->fc_mp_len, extack);
9ed59592
DA
4229 if (err < 0)
4230 goto errout;
51ebd318
ND
4231 }
4232
c78ba6d6
LR
4233 if (tb[RTA_PREF]) {
4234 pref = nla_get_u8(tb[RTA_PREF]);
4235 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4236 pref != ICMPV6_ROUTER_PREF_HIGH)
4237 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4238 cfg->fc_flags |= RTF_PREF(pref);
4239 }
4240
19e42e45
RP
4241 if (tb[RTA_ENCAP])
4242 cfg->fc_encap = tb[RTA_ENCAP];
4243
9ed59592 4244 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4245 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4246
c255bd68 4247 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4248 if (err < 0)
4249 goto errout;
4250 }
4251
32bc201e
XL
4252 if (tb[RTA_EXPIRES]) {
4253 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4254
4255 if (addrconf_finite_timeout(timeout)) {
4256 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4257 cfg->fc_flags |= RTF_EXPIRES;
4258 }
4259 }
4260
86872cb5
TG
4261 err = 0;
4262errout:
4263 return err;
1da177e4
LT
4264}
4265
6b9ea5a6 4266struct rt6_nh {
8d1c802b 4267 struct fib6_info *fib6_info;
6b9ea5a6 4268 struct fib6_config r_cfg;
6b9ea5a6
RP
4269 struct list_head next;
4270};
4271
4272static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4273{
4274 struct rt6_nh *nh;
4275
4276 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4277 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4278 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4279 nh->r_cfg.fc_ifindex);
4280 }
4281}
4282
d4ead6b3
DA
4283static int ip6_route_info_append(struct net *net,
4284 struct list_head *rt6_nh_list,
8d1c802b
DA
4285 struct fib6_info *rt,
4286 struct fib6_config *r_cfg)
6b9ea5a6
RP
4287{
4288 struct rt6_nh *nh;
6b9ea5a6
RP
4289 int err = -EEXIST;
4290
4291 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4292 /* check if fib6_info already exists */
4293 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4294 return err;
4295 }
4296
4297 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4298 if (!nh)
4299 return -ENOMEM;
8d1c802b 4300 nh->fib6_info = rt;
d4ead6b3 4301 err = ip6_convert_metrics(net, rt, r_cfg);
6b9ea5a6
RP
4302 if (err) {
4303 kfree(nh);
4304 return err;
4305 }
4306 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4307 list_add_tail(&nh->next, rt6_nh_list);
4308
4309 return 0;
4310}
4311
8d1c802b
DA
4312static void ip6_route_mpath_notify(struct fib6_info *rt,
4313 struct fib6_info *rt_last,
3b1137fe
DA
4314 struct nl_info *info,
4315 __u16 nlflags)
4316{
4317 /* if this is an APPEND route, then rt points to the first route
4318 * inserted and rt_last points to last route inserted. Userspace
4319 * wants a consistent dump of the route which starts at the first
4320 * nexthop. Since sibling routes are always added at the end of
4321 * the list, find the first sibling of the last route appended
4322 */
93c2fb25
DA
4323 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4324 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4325 struct fib6_info,
93c2fb25 4326 fib6_siblings);
3b1137fe
DA
4327 }
4328
4329 if (rt)
4330 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4331}
4332
333c4301
DA
4333static int ip6_route_multipath_add(struct fib6_config *cfg,
4334 struct netlink_ext_ack *extack)
51ebd318 4335{
8d1c802b 4336 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4337 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4338 struct fib6_config r_cfg;
4339 struct rtnexthop *rtnh;
8d1c802b 4340 struct fib6_info *rt;
6b9ea5a6
RP
4341 struct rt6_nh *err_nh;
4342 struct rt6_nh *nh, *nh_safe;
3b1137fe 4343 __u16 nlflags;
51ebd318
ND
4344 int remaining;
4345 int attrlen;
6b9ea5a6
RP
4346 int err = 1;
4347 int nhn = 0;
4348 int replace = (cfg->fc_nlinfo.nlh &&
4349 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4350 LIST_HEAD(rt6_nh_list);
51ebd318 4351
3b1137fe
DA
4352 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4353 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4354 nlflags |= NLM_F_APPEND;
4355
35f1b4e9 4356 remaining = cfg->fc_mp_len;
51ebd318 4357 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4358
6b9ea5a6 4359 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4360 * fib6_info structs per nexthop
6b9ea5a6 4361 */
51ebd318
ND
4362 while (rtnh_ok(rtnh, remaining)) {
4363 memcpy(&r_cfg, cfg, sizeof(*cfg));
4364 if (rtnh->rtnh_ifindex)
4365 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4366
4367 attrlen = rtnh_attrlen(rtnh);
4368 if (attrlen > 0) {
4369 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4370
4371 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4372 if (nla) {
67b61f6c 4373 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4374 r_cfg.fc_flags |= RTF_GATEWAY;
4375 }
19e42e45
RP
4376 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4377 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4378 if (nla)
4379 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4380 }
6b9ea5a6 4381
68e2ffde 4382 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4383 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4384 if (IS_ERR(rt)) {
4385 err = PTR_ERR(rt);
4386 rt = NULL;
6b9ea5a6 4387 goto cleanup;
8c5b83f0 4388 }
6b9ea5a6 4389
5e670d84 4390 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
398958ae 4391
d4ead6b3
DA
4392 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4393 rt, &r_cfg);
51ebd318 4394 if (err) {
93531c67 4395 fib6_info_release(rt);
6b9ea5a6
RP
4396 goto cleanup;
4397 }
4398
4399 rtnh = rtnh_next(rtnh, &remaining);
4400 }
4401
3b1137fe
DA
4402 /* for add and replace send one notification with all nexthops.
4403 * Skip the notification in fib6_add_rt2node and send one with
4404 * the full route when done
4405 */
4406 info->skip_notify = 1;
4407
6b9ea5a6
RP
4408 err_nh = NULL;
4409 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4410 rt_last = nh->fib6_info;
4411 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4412 fib6_info_release(nh->fib6_info);
93531c67 4413
3b1137fe
DA
4414 /* save reference to first route for notification */
4415 if (!rt_notif && !err)
8d1c802b 4416 rt_notif = nh->fib6_info;
3b1137fe 4417
8d1c802b
DA
4418 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4419 nh->fib6_info = NULL;
6b9ea5a6
RP
4420 if (err) {
4421 if (replace && nhn)
4422 ip6_print_replace_route_err(&rt6_nh_list);
4423 err_nh = nh;
4424 goto add_errout;
51ebd318 4425 }
6b9ea5a6 4426
1a72418b 4427 /* Because each route is added like a single route we remove
27596472
MK
4428 * these flags after the first nexthop: if there is a collision,
4429 * we have already failed to add the first nexthop:
4430 * fib6_add_rt2node() has rejected it; when replacing, old
4431 * nexthops have been replaced by first new, the rest should
4432 * be added to it.
1a72418b 4433 */
27596472
MK
4434 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4435 NLM_F_REPLACE);
f34436a4 4436 cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_APPEND;
6b9ea5a6
RP
4437 nhn++;
4438 }
4439
3b1137fe
DA
4440 /* success ... tell user about new route */
4441 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4442 goto cleanup;
4443
4444add_errout:
3b1137fe
DA
4445 /* send notification for routes that were added so that
4446 * the delete notifications sent by ip6_route_del are
4447 * coherent
4448 */
4449 if (rt_notif)
4450 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4451
6b9ea5a6
RP
4452 /* Delete routes that were already added */
4453 list_for_each_entry(nh, &rt6_nh_list, next) {
4454 if (err_nh == nh)
4455 break;
333c4301 4456 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4457 }
4458
4459cleanup:
4460 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4461 if (nh->fib6_info)
4462 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4463 list_del(&nh->next);
4464 kfree(nh);
4465 }
4466
4467 return err;
4468}
4469
333c4301
DA
4470static int ip6_route_multipath_del(struct fib6_config *cfg,
4471 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4472{
4473 struct fib6_config r_cfg;
4474 struct rtnexthop *rtnh;
4475 int remaining;
4476 int attrlen;
4477 int err = 1, last_err = 0;
4478
4479 remaining = cfg->fc_mp_len;
4480 rtnh = (struct rtnexthop *)cfg->fc_mp;
4481
4482 /* Parse a Multipath Entry */
4483 while (rtnh_ok(rtnh, remaining)) {
4484 memcpy(&r_cfg, cfg, sizeof(*cfg));
4485 if (rtnh->rtnh_ifindex)
4486 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4487
4488 attrlen = rtnh_attrlen(rtnh);
4489 if (attrlen > 0) {
4490 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4491
4492 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4493 if (nla) {
4494 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4495 r_cfg.fc_flags |= RTF_GATEWAY;
4496 }
4497 }
333c4301 4498 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4499 if (err)
4500 last_err = err;
4501
51ebd318
ND
4502 rtnh = rtnh_next(rtnh, &remaining);
4503 }
4504
4505 return last_err;
4506}
4507
c21ef3e3
DA
4508static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4509 struct netlink_ext_ack *extack)
1da177e4 4510{
86872cb5
TG
4511 struct fib6_config cfg;
4512 int err;
1da177e4 4513
333c4301 4514 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4515 if (err < 0)
4516 return err;
4517
51ebd318 4518 if (cfg.fc_mp)
333c4301 4519 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4520 else {
4521 cfg.fc_delete_all_nh = 1;
333c4301 4522 return ip6_route_del(&cfg, extack);
0ae81335 4523 }
1da177e4
LT
4524}
4525
c21ef3e3
DA
4526static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4527 struct netlink_ext_ack *extack)
1da177e4 4528{
86872cb5
TG
4529 struct fib6_config cfg;
4530 int err;
1da177e4 4531
333c4301 4532 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4533 if (err < 0)
4534 return err;
4535
51ebd318 4536 if (cfg.fc_mp)
333c4301 4537 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4538 else
acb54e3c 4539 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4540}
4541
8d1c802b 4542static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4543{
beb1afac
DA
4544 int nexthop_len = 0;
4545
93c2fb25 4546 if (rt->fib6_nsiblings) {
beb1afac
DA
4547 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4548 + NLA_ALIGN(sizeof(struct rtnexthop))
4549 + nla_total_size(16) /* RTA_GATEWAY */
5e670d84 4550 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
beb1afac 4551
93c2fb25 4552 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4553 }
4554
339bf98f
TG
4555 return NLMSG_ALIGN(sizeof(struct rtmsg))
4556 + nla_total_size(16) /* RTA_SRC */
4557 + nla_total_size(16) /* RTA_DST */
4558 + nla_total_size(16) /* RTA_GATEWAY */
4559 + nla_total_size(16) /* RTA_PREFSRC */
4560 + nla_total_size(4) /* RTA_TABLE */
4561 + nla_total_size(4) /* RTA_IIF */
4562 + nla_total_size(4) /* RTA_OIF */
4563 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4564 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4565 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4566 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4567 + nla_total_size(1) /* RTA_PREF */
5e670d84 4568 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
beb1afac
DA
4569 + nexthop_len;
4570}
4571
8d1c802b 4572static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
5be083ce 4573 unsigned int *flags, bool skip_oif)
beb1afac 4574{
5e670d84 4575 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
f9d882ea
IS
4576 *flags |= RTNH_F_DEAD;
4577
5e670d84 4578 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
beb1afac 4579 *flags |= RTNH_F_LINKDOWN;
dcd1f572
DA
4580
4581 rcu_read_lock();
4582 if (fib6_ignore_linkdown(rt))
beb1afac 4583 *flags |= RTNH_F_DEAD;
dcd1f572 4584 rcu_read_unlock();
beb1afac
DA
4585 }
4586
93c2fb25 4587 if (rt->fib6_flags & RTF_GATEWAY) {
5e670d84 4588 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
beb1afac
DA
4589 goto nla_put_failure;
4590 }
4591
5e670d84
DA
4592 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4593 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4594 *flags |= RTNH_F_OFFLOAD;
4595
5be083ce 4596 /* not needed for multipath encoding b/c it has a rtnexthop struct */
5e670d84
DA
4597 if (!skip_oif && rt->fib6_nh.nh_dev &&
4598 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
beb1afac
DA
4599 goto nla_put_failure;
4600
5e670d84
DA
4601 if (rt->fib6_nh.nh_lwtstate &&
4602 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
beb1afac
DA
4603 goto nla_put_failure;
4604
4605 return 0;
4606
4607nla_put_failure:
4608 return -EMSGSIZE;
4609}
4610
5be083ce 4611/* add multipath next hop */
8d1c802b 4612static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
beb1afac 4613{
5e670d84 4614 const struct net_device *dev = rt->fib6_nh.nh_dev;
beb1afac
DA
4615 struct rtnexthop *rtnh;
4616 unsigned int flags = 0;
4617
4618 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4619 if (!rtnh)
4620 goto nla_put_failure;
4621
5e670d84
DA
4622 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4623 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
beb1afac 4624
5be083ce 4625 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4626 goto nla_put_failure;
4627
4628 rtnh->rtnh_flags = flags;
4629
4630 /* length of rtnetlink header + attributes */
4631 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4632
4633 return 0;
4634
4635nla_put_failure:
4636 return -EMSGSIZE;
339bf98f
TG
4637}
4638
d4ead6b3 4639static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4640 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4641 struct in6_addr *dest, struct in6_addr *src,
15e47304 4642 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4643 unsigned int flags)
1da177e4
LT
4644{
4645 struct rtmsg *rtm;
2d7202bf 4646 struct nlmsghdr *nlh;
d4ead6b3
DA
4647 long expires = 0;
4648 u32 *pmetrics;
9e762a4a 4649 u32 table;
1da177e4 4650
15e47304 4651 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4652 if (!nlh)
26932566 4653 return -EMSGSIZE;
2d7202bf
TG
4654
4655 rtm = nlmsg_data(nlh);
1da177e4 4656 rtm->rtm_family = AF_INET6;
93c2fb25
DA
4657 rtm->rtm_dst_len = rt->fib6_dst.plen;
4658 rtm->rtm_src_len = rt->fib6_src.plen;
1da177e4 4659 rtm->rtm_tos = 0;
93c2fb25
DA
4660 if (rt->fib6_table)
4661 table = rt->fib6_table->tb6_id;
c71099ac 4662 else
9e762a4a
PM
4663 table = RT6_TABLE_UNSPEC;
4664 rtm->rtm_table = table;
c78679e8
DM
4665 if (nla_put_u32(skb, RTA_TABLE, table))
4666 goto nla_put_failure;
e8478e80
DA
4667
4668 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4669 rtm->rtm_flags = 0;
4670 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4671 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4672
93c2fb25 4673 if (rt->fib6_flags & RTF_CACHE)
1da177e4
LT
4674 rtm->rtm_flags |= RTM_F_CLONED;
4675
d4ead6b3
DA
4676 if (dest) {
4677 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4678 goto nla_put_failure;
1ab1457c 4679 rtm->rtm_dst_len = 128;
1da177e4 4680 } else if (rtm->rtm_dst_len)
93c2fb25 4681 if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr))
c78679e8 4682 goto nla_put_failure;
1da177e4
LT
4683#ifdef CONFIG_IPV6_SUBTREES
4684 if (src) {
930345ea 4685 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4686 goto nla_put_failure;
1ab1457c 4687 rtm->rtm_src_len = 128;
c78679e8 4688 } else if (rtm->rtm_src_len &&
93c2fb25 4689 nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr))
c78679e8 4690 goto nla_put_failure;
1da177e4 4691#endif
7bc570c8
YH
4692 if (iif) {
4693#ifdef CONFIG_IPV6_MROUTE
93c2fb25 4694 if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) {
fd61c6ba
DA
4695 int err = ip6mr_get_route(net, skb, rtm, portid);
4696
4697 if (err == 0)
4698 return 0;
4699 if (err < 0)
4700 goto nla_put_failure;
7bc570c8
YH
4701 } else
4702#endif
c78679e8
DM
4703 if (nla_put_u32(skb, RTA_IIF, iif))
4704 goto nla_put_failure;
d4ead6b3 4705 } else if (dest) {
1da177e4 4706 struct in6_addr saddr_buf;
d4ead6b3 4707 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4708 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4709 goto nla_put_failure;
1da177e4 4710 }
2d7202bf 4711
93c2fb25 4712 if (rt->fib6_prefsrc.plen) {
c3968a85 4713 struct in6_addr saddr_buf;
93c2fb25 4714 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4715 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4716 goto nla_put_failure;
c3968a85
DW
4717 }
4718
d4ead6b3
DA
4719 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4720 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4721 goto nla_put_failure;
4722
93c2fb25 4723 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 4724 goto nla_put_failure;
8253947e 4725
beb1afac
DA
4726 /* For multipath routes, walk the siblings list and add
4727 * each as a nexthop within RTA_MULTIPATH.
4728 */
93c2fb25 4729 if (rt->fib6_nsiblings) {
8d1c802b 4730 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
4731 struct nlattr *mp;
4732
4733 mp = nla_nest_start(skb, RTA_MULTIPATH);
4734 if (!mp)
4735 goto nla_put_failure;
4736
4737 if (rt6_add_nexthop(skb, rt) < 0)
4738 goto nla_put_failure;
4739
4740 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 4741 &rt->fib6_siblings, fib6_siblings) {
beb1afac
DA
4742 if (rt6_add_nexthop(skb, sibling) < 0)
4743 goto nla_put_failure;
4744 }
4745
4746 nla_nest_end(skb, mp);
4747 } else {
5be083ce 4748 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4749 goto nla_put_failure;
4750 }
4751
93c2fb25 4752 if (rt->fib6_flags & RTF_EXPIRES) {
14895687
DA
4753 expires = dst ? dst->expires : rt->expires;
4754 expires -= jiffies;
4755 }
69cdf8f9 4756
d4ead6b3 4757 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4758 goto nla_put_failure;
2d7202bf 4759
93c2fb25 4760 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags)))
c78ba6d6
LR
4761 goto nla_put_failure;
4762
19e42e45 4763
053c095a
JB
4764 nlmsg_end(skb, nlh);
4765 return 0;
2d7202bf
TG
4766
4767nla_put_failure:
26932566
PM
4768 nlmsg_cancel(skb, nlh);
4769 return -EMSGSIZE;
1da177e4
LT
4770}
4771
8d1c802b 4772int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4773{
4774 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4775 struct net *net = arg->net;
4776
421842ed 4777 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4778 return 0;
1da177e4 4779
2d7202bf
TG
4780 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4781 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4782
4783 /* user wants prefix routes only */
4784 if (rtm->rtm_flags & RTM_F_PREFIX &&
93c2fb25 4785 !(rt->fib6_flags & RTF_PREFIX_RT)) {
f8cfe2ce
DA
4786 /* success since this is not a prefix route */
4787 return 1;
4788 }
4789 }
1da177e4 4790
d4ead6b3
DA
4791 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4792 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4793 arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
1da177e4
LT
4794}
4795
c21ef3e3
DA
4796static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4797 struct netlink_ext_ack *extack)
1da177e4 4798{
3b1e0a65 4799 struct net *net = sock_net(in_skb->sk);
ab364a6f 4800 struct nlattr *tb[RTA_MAX+1];
18c3a61c 4801 int err, iif = 0, oif = 0;
a68886a6 4802 struct fib6_info *from;
18c3a61c 4803 struct dst_entry *dst;
ab364a6f 4804 struct rt6_info *rt;
1da177e4 4805 struct sk_buff *skb;
ab364a6f 4806 struct rtmsg *rtm;
4c9483b2 4807 struct flowi6 fl6;
18c3a61c 4808 bool fibmatch;
1da177e4 4809
fceb6435 4810 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4811 extack);
ab364a6f
TG
4812 if (err < 0)
4813 goto errout;
1da177e4 4814
ab364a6f 4815 err = -EINVAL;
4c9483b2 4816 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4817 rtm = nlmsg_data(nlh);
4818 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4819 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4820
ab364a6f
TG
4821 if (tb[RTA_SRC]) {
4822 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4823 goto errout;
4824
4e3fd7a0 4825 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4826 }
4827
4828 if (tb[RTA_DST]) {
4829 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4830 goto errout;
4831
4e3fd7a0 4832 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4833 }
4834
4835 if (tb[RTA_IIF])
4836 iif = nla_get_u32(tb[RTA_IIF]);
4837
4838 if (tb[RTA_OIF])
72331bc0 4839 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4840
2e47b291
LC
4841 if (tb[RTA_MARK])
4842 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4843
622ec2c9
LC
4844 if (tb[RTA_UID])
4845 fl6.flowi6_uid = make_kuid(current_user_ns(),
4846 nla_get_u32(tb[RTA_UID]));
4847 else
4848 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4849
eacb9384
RP
4850 if (tb[RTA_SPORT])
4851 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4852
4853 if (tb[RTA_DPORT])
4854 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4855
4856 if (tb[RTA_IP_PROTO]) {
4857 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
4858 &fl6.flowi6_proto, extack);
4859 if (err)
4860 goto errout;
4861 }
4862
1da177e4
LT
4863 if (iif) {
4864 struct net_device *dev;
72331bc0
SL
4865 int flags = 0;
4866
121622db
FW
4867 rcu_read_lock();
4868
4869 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4870 if (!dev) {
121622db 4871 rcu_read_unlock();
1da177e4 4872 err = -ENODEV;
ab364a6f 4873 goto errout;
1da177e4 4874 }
72331bc0
SL
4875
4876 fl6.flowi6_iif = iif;
4877
4878 if (!ipv6_addr_any(&fl6.saddr))
4879 flags |= RT6_LOOKUP_F_HAS_SADDR;
4880
b75cc8f9 4881 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4882
4883 rcu_read_unlock();
72331bc0
SL
4884 } else {
4885 fl6.flowi6_oif = oif;
4886
58acfd71 4887 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4888 }
4889
18c3a61c
RP
4890
4891 rt = container_of(dst, struct rt6_info, dst);
4892 if (rt->dst.error) {
4893 err = rt->dst.error;
4894 ip6_rt_put(rt);
4895 goto errout;
1da177e4
LT
4896 }
4897
9d6acb3b
WC
4898 if (rt == net->ipv6.ip6_null_entry) {
4899 err = rt->dst.error;
4900 ip6_rt_put(rt);
4901 goto errout;
4902 }
4903
ab364a6f 4904 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4905 if (!skb) {
94e187c0 4906 ip6_rt_put(rt);
ab364a6f
TG
4907 err = -ENOBUFS;
4908 goto errout;
4909 }
1da177e4 4910
d8d1f30b 4911 skb_dst_set(skb, &rt->dst);
a68886a6
DA
4912
4913 rcu_read_lock();
4914 from = rcu_dereference(rt->from);
4915
18c3a61c 4916 if (fibmatch)
a68886a6 4917 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
18c3a61c
RP
4918 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4919 nlh->nlmsg_seq, 0);
4920 else
a68886a6
DA
4921 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
4922 &fl6.saddr, iif, RTM_NEWROUTE,
d4ead6b3
DA
4923 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4924 0);
a68886a6
DA
4925 rcu_read_unlock();
4926
1da177e4 4927 if (err < 0) {
ab364a6f
TG
4928 kfree_skb(skb);
4929 goto errout;
1da177e4
LT
4930 }
4931
15e47304 4932 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4933errout:
1da177e4 4934 return err;
1da177e4
LT
4935}
4936
8d1c802b 4937void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 4938 unsigned int nlm_flags)
1da177e4
LT
4939{
4940 struct sk_buff *skb;
5578689a 4941 struct net *net = info->nl_net;
528c4ceb
DL
4942 u32 seq;
4943 int err;
4944
4945 err = -ENOBUFS;
38308473 4946 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4947
19e42e45 4948 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4949 if (!skb)
21713ebc
TG
4950 goto errout;
4951
d4ead6b3
DA
4952 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4953 event, info->portid, seq, nlm_flags);
26932566
PM
4954 if (err < 0) {
4955 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4956 WARN_ON(err == -EMSGSIZE);
4957 kfree_skb(skb);
4958 goto errout;
4959 }
15e47304 4960 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4961 info->nlh, gfp_any());
4962 return;
21713ebc
TG
4963errout:
4964 if (err < 0)
5578689a 4965 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4966}
4967
8ed67789 4968static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4969 unsigned long event, void *ptr)
8ed67789 4970{
351638e7 4971 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4972 struct net *net = dev_net(dev);
8ed67789 4973
242d3a49
WC
4974 if (!(dev->flags & IFF_LOOPBACK))
4975 return NOTIFY_OK;
4976
4977 if (event == NETDEV_REGISTER) {
421842ed 4978 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
d8d1f30b 4979 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4980 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4981#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4982 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4983 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4984 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4985 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4986#endif
76da0704
WC
4987 } else if (event == NETDEV_UNREGISTER &&
4988 dev->reg_state != NETREG_UNREGISTERED) {
4989 /* NETDEV_UNREGISTER could be fired for multiple times by
4990 * netdev_wait_allrefs(). Make sure we only call this once.
4991 */
12d94a80 4992 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4993#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4994 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4995 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4996#endif
4997 }
4998
4999 return NOTIFY_OK;
5000}
5001
1da177e4
LT
5002/*
5003 * /proc
5004 */
5005
5006#ifdef CONFIG_PROC_FS
5007
33120b30 5008static const struct file_operations ipv6_route_proc_fops = {
33120b30
AD
5009 .open = ipv6_route_open,
5010 .read = seq_read,
5011 .llseek = seq_lseek,
8d2ca1d7 5012 .release = seq_release_net,
33120b30
AD
5013};
5014
1da177e4
LT
5015static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5016{
69ddb805 5017 struct net *net = (struct net *)seq->private;
1da177e4 5018 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
5019 net->ipv6.rt6_stats->fib_nodes,
5020 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 5021 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
5022 net->ipv6.rt6_stats->fib_rt_entries,
5023 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 5024 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 5025 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
5026
5027 return 0;
5028}
5029
5030static int rt6_stats_seq_open(struct inode *inode, struct file *file)
5031{
de05c557 5032 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
5033}
5034
9a32144e 5035static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
5036 .open = rt6_stats_seq_open,
5037 .read = seq_read,
5038 .llseek = seq_lseek,
b6fcbdb4 5039 .release = single_release_net,
1da177e4
LT
5040};
5041#endif /* CONFIG_PROC_FS */
5042
5043#ifdef CONFIG_SYSCTL
5044
1da177e4 5045static
fe2c6338 5046int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
5047 void __user *buffer, size_t *lenp, loff_t *ppos)
5048{
c486da34
LAG
5049 struct net *net;
5050 int delay;
5051 if (!write)
1da177e4 5052 return -EINVAL;
c486da34
LAG
5053
5054 net = (struct net *)ctl->extra1;
5055 delay = net->ipv6.sysctl.flush_delay;
5056 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 5057 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 5058 return 0;
1da177e4
LT
5059}
5060
fe2c6338 5061struct ctl_table ipv6_route_table_template[] = {
1ab1457c 5062 {
1da177e4 5063 .procname = "flush",
4990509f 5064 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 5065 .maxlen = sizeof(int),
89c8b3a1 5066 .mode = 0200,
6d9f239a 5067 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
5068 },
5069 {
1da177e4 5070 .procname = "gc_thresh",
9a7ec3a9 5071 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
5072 .maxlen = sizeof(int),
5073 .mode = 0644,
6d9f239a 5074 .proc_handler = proc_dointvec,
1da177e4
LT
5075 },
5076 {
1da177e4 5077 .procname = "max_size",
4990509f 5078 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
5079 .maxlen = sizeof(int),
5080 .mode = 0644,
6d9f239a 5081 .proc_handler = proc_dointvec,
1da177e4
LT
5082 },
5083 {
1da177e4 5084 .procname = "gc_min_interval",
4990509f 5085 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5086 .maxlen = sizeof(int),
5087 .mode = 0644,
6d9f239a 5088 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5089 },
5090 {
1da177e4 5091 .procname = "gc_timeout",
4990509f 5092 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5093 .maxlen = sizeof(int),
5094 .mode = 0644,
6d9f239a 5095 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5096 },
5097 {
1da177e4 5098 .procname = "gc_interval",
4990509f 5099 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5100 .maxlen = sizeof(int),
5101 .mode = 0644,
6d9f239a 5102 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5103 },
5104 {
1da177e4 5105 .procname = "gc_elasticity",
4990509f 5106 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5107 .maxlen = sizeof(int),
5108 .mode = 0644,
f3d3f616 5109 .proc_handler = proc_dointvec,
1da177e4
LT
5110 },
5111 {
1da177e4 5112 .procname = "mtu_expires",
4990509f 5113 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5114 .maxlen = sizeof(int),
5115 .mode = 0644,
6d9f239a 5116 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5117 },
5118 {
1da177e4 5119 .procname = "min_adv_mss",
4990509f 5120 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5121 .maxlen = sizeof(int),
5122 .mode = 0644,
f3d3f616 5123 .proc_handler = proc_dointvec,
1da177e4
LT
5124 },
5125 {
1da177e4 5126 .procname = "gc_min_interval_ms",
4990509f 5127 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5128 .maxlen = sizeof(int),
5129 .mode = 0644,
6d9f239a 5130 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5131 },
f8572d8f 5132 { }
1da177e4
LT
5133};
5134
2c8c1e72 5135struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5136{
5137 struct ctl_table *table;
5138
5139 table = kmemdup(ipv6_route_table_template,
5140 sizeof(ipv6_route_table_template),
5141 GFP_KERNEL);
5ee09105
YH
5142
5143 if (table) {
5144 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5145 table[0].extra1 = net;
86393e52 5146 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5147 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5148 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5149 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5150 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5151 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5152 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5153 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5154 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
5155
5156 /* Don't export sysctls to unprivileged users */
5157 if (net->user_ns != &init_user_ns)
5158 table[0].procname = NULL;
5ee09105
YH
5159 }
5160
760f2d01
DL
5161 return table;
5162}
1da177e4
LT
5163#endif
5164
2c8c1e72 5165static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5166{
633d424b 5167 int ret = -ENOMEM;
8ed67789 5168
86393e52
AD
5169 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5170 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5171
fc66f95c
ED
5172 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5173 goto out_ip6_dst_ops;
5174
421842ed
DA
5175 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5176 sizeof(*net->ipv6.fib6_null_entry),
5177 GFP_KERNEL);
5178 if (!net->ipv6.fib6_null_entry)
5179 goto out_ip6_dst_entries;
5180
8ed67789
DL
5181 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5182 sizeof(*net->ipv6.ip6_null_entry),
5183 GFP_KERNEL);
5184 if (!net->ipv6.ip6_null_entry)
421842ed 5185 goto out_fib6_null_entry;
d8d1f30b 5186 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5187 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5188 ip6_template_metrics, true);
8ed67789
DL
5189
5190#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5191 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5192 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5193 sizeof(*net->ipv6.ip6_prohibit_entry),
5194 GFP_KERNEL);
68fffc67
PZ
5195 if (!net->ipv6.ip6_prohibit_entry)
5196 goto out_ip6_null_entry;
d8d1f30b 5197 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5198 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5199 ip6_template_metrics, true);
8ed67789
DL
5200
5201 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5202 sizeof(*net->ipv6.ip6_blk_hole_entry),
5203 GFP_KERNEL);
68fffc67
PZ
5204 if (!net->ipv6.ip6_blk_hole_entry)
5205 goto out_ip6_prohibit_entry;
d8d1f30b 5206 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5207 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5208 ip6_template_metrics, true);
8ed67789
DL
5209#endif
5210
b339a47c
PZ
5211 net->ipv6.sysctl.flush_delay = 0;
5212 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5213 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5214 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5215 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5216 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5217 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5218 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5219
6891a346
BT
5220 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5221
8ed67789
DL
5222 ret = 0;
5223out:
5224 return ret;
f2fc6a54 5225
68fffc67
PZ
5226#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5227out_ip6_prohibit_entry:
5228 kfree(net->ipv6.ip6_prohibit_entry);
5229out_ip6_null_entry:
5230 kfree(net->ipv6.ip6_null_entry);
5231#endif
421842ed
DA
5232out_fib6_null_entry:
5233 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5234out_ip6_dst_entries:
5235 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5236out_ip6_dst_ops:
f2fc6a54 5237 goto out;
cdb18761
DL
5238}
5239
2c8c1e72 5240static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5241{
421842ed 5242 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5243 kfree(net->ipv6.ip6_null_entry);
5244#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5245 kfree(net->ipv6.ip6_prohibit_entry);
5246 kfree(net->ipv6.ip6_blk_hole_entry);
5247#endif
41bb78b4 5248 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5249}
5250
d189634e
TG
5251static int __net_init ip6_route_net_init_late(struct net *net)
5252{
5253#ifdef CONFIG_PROC_FS
d4beaa66 5254 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
d6444062 5255 proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
5256#endif
5257 return 0;
5258}
5259
5260static void __net_exit ip6_route_net_exit_late(struct net *net)
5261{
5262#ifdef CONFIG_PROC_FS
ece31ffd
G
5263 remove_proc_entry("ipv6_route", net->proc_net);
5264 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5265#endif
5266}
5267
cdb18761
DL
5268static struct pernet_operations ip6_route_net_ops = {
5269 .init = ip6_route_net_init,
5270 .exit = ip6_route_net_exit,
5271};
5272
c3426b47
DM
5273static int __net_init ipv6_inetpeer_init(struct net *net)
5274{
5275 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5276
5277 if (!bp)
5278 return -ENOMEM;
5279 inet_peer_base_init(bp);
5280 net->ipv6.peers = bp;
5281 return 0;
5282}
5283
5284static void __net_exit ipv6_inetpeer_exit(struct net *net)
5285{
5286 struct inet_peer_base *bp = net->ipv6.peers;
5287
5288 net->ipv6.peers = NULL;
56a6b248 5289 inetpeer_invalidate_tree(bp);
c3426b47
DM
5290 kfree(bp);
5291}
5292
2b823f72 5293static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5294 .init = ipv6_inetpeer_init,
5295 .exit = ipv6_inetpeer_exit,
5296};
5297
d189634e
TG
5298static struct pernet_operations ip6_route_net_late_ops = {
5299 .init = ip6_route_net_init_late,
5300 .exit = ip6_route_net_exit_late,
5301};
5302
8ed67789
DL
5303static struct notifier_block ip6_route_dev_notifier = {
5304 .notifier_call = ip6_route_dev_notify,
242d3a49 5305 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5306};
5307
2f460933
WC
5308void __init ip6_route_init_special_entries(void)
5309{
5310 /* Registering of the loopback is done before this portion of code,
5311 * the loopback reference in rt6_info will not be taken, do it
5312 * manually for init_net */
421842ed 5313 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
2f460933
WC
5314 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5315 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5316 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5317 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5318 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5319 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5320 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5321 #endif
5322}
5323
433d49c3 5324int __init ip6_route_init(void)
1da177e4 5325{
433d49c3 5326 int ret;
8d0b94af 5327 int cpu;
433d49c3 5328
9a7ec3a9
DL
5329 ret = -ENOMEM;
5330 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5331 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5332 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5333 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5334 goto out;
14e50e57 5335
fc66f95c 5336 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5337 if (ret)
bdb3289f 5338 goto out_kmem_cache;
bdb3289f 5339
c3426b47
DM
5340 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5341 if (ret)
e8803b6c 5342 goto out_dst_entries;
2a0c451a 5343
7e52b33b
DM
5344 ret = register_pernet_subsys(&ip6_route_net_ops);
5345 if (ret)
5346 goto out_register_inetpeer;
c3426b47 5347
5dc121e9
AE
5348 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5349
e8803b6c 5350 ret = fib6_init();
433d49c3 5351 if (ret)
8ed67789 5352 goto out_register_subsys;
433d49c3 5353
433d49c3
DL
5354 ret = xfrm6_init();
5355 if (ret)
e8803b6c 5356 goto out_fib6_init;
c35b7e72 5357
433d49c3
DL
5358 ret = fib6_rules_init();
5359 if (ret)
5360 goto xfrm6_init;
7e5449c2 5361
d189634e
TG
5362 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5363 if (ret)
5364 goto fib6_rules_init;
5365
16feebcf
FW
5366 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5367 inet6_rtm_newroute, NULL, 0);
5368 if (ret < 0)
5369 goto out_register_late_subsys;
5370
5371 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5372 inet6_rtm_delroute, NULL, 0);
5373 if (ret < 0)
5374 goto out_register_late_subsys;
5375
5376 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5377 inet6_rtm_getroute, NULL,
5378 RTNL_FLAG_DOIT_UNLOCKED);
5379 if (ret < 0)
d189634e 5380 goto out_register_late_subsys;
c127ea2c 5381
8ed67789 5382 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5383 if (ret)
d189634e 5384 goto out_register_late_subsys;
8ed67789 5385
8d0b94af
MKL
5386 for_each_possible_cpu(cpu) {
5387 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5388
5389 INIT_LIST_HEAD(&ul->head);
5390 spin_lock_init(&ul->lock);
5391 }
5392
433d49c3
DL
5393out:
5394 return ret;
5395
d189634e 5396out_register_late_subsys:
16feebcf 5397 rtnl_unregister_all(PF_INET6);
d189634e 5398 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5399fib6_rules_init:
433d49c3
DL
5400 fib6_rules_cleanup();
5401xfrm6_init:
433d49c3 5402 xfrm6_fini();
2a0c451a
TG
5403out_fib6_init:
5404 fib6_gc_cleanup();
8ed67789
DL
5405out_register_subsys:
5406 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5407out_register_inetpeer:
5408 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5409out_dst_entries:
5410 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5411out_kmem_cache:
f2fc6a54 5412 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5413 goto out;
1da177e4
LT
5414}
5415
5416void ip6_route_cleanup(void)
5417{
8ed67789 5418 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5419 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5420 fib6_rules_cleanup();
1da177e4 5421 xfrm6_fini();
1da177e4 5422 fib6_gc_cleanup();
c3426b47 5423 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5424 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5425 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5426 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5427}