Merge tag 'drm-fixes-2018-09-12' of git://anongit.freedesktop.org/drm/drm
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
eacb9384 66#include <net/ip.h>
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
30d444d3
DA
73static int ip6_rt_type_to_error(u8 fib6_type);
74
75#define CREATE_TRACE_POINTS
76#include <trace/events/fib6.h>
77EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78#undef CREATE_TRACE_POINTS
79
afc154e9 80enum rt6_nud_state {
7e980569
JB
81 RT6_NUD_FAIL_HARD = -3,
82 RT6_NUD_FAIL_PROBE = -2,
83 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
84 RT6_NUD_SUCCEED = 1
85};
86
1da177e4 87static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 88static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 89static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
569d3645 94static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
95
96static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 97static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 98static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 99static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 100static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
101static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102 struct sk_buff *skb, u32 mtu);
103static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
104 struct sk_buff *skb);
8d1c802b
DA
105static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
106static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 107static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 108 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 109 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
110 int iif, int type, u32 portid, u32 seq,
111 unsigned int flags);
8d1c802b 112static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
113 struct in6_addr *daddr,
114 struct in6_addr *saddr);
1da177e4 115
70ceb4f5 116#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 117static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 118 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
119 const struct in6_addr *gwaddr,
120 struct net_device *dev,
95c96174 121 unsigned int pref);
8d1c802b 122static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 123 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
124 const struct in6_addr *gwaddr,
125 struct net_device *dev);
70ceb4f5
YH
126#endif
127
8d0b94af
MKL
128struct uncached_list {
129 spinlock_t lock;
130 struct list_head head;
131};
132
133static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
134
510c321b 135void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
136{
137 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
138
8d0b94af
MKL
139 rt->rt6i_uncached_list = ul;
140
141 spin_lock_bh(&ul->lock);
142 list_add_tail(&rt->rt6i_uncached, &ul->head);
143 spin_unlock_bh(&ul->lock);
144}
145
510c321b 146void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
147{
148 if (!list_empty(&rt->rt6i_uncached)) {
149 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 150 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
151
152 spin_lock_bh(&ul->lock);
153 list_del(&rt->rt6i_uncached);
81eb8447 154 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
155 spin_unlock_bh(&ul->lock);
156 }
157}
158
159static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
160{
161 struct net_device *loopback_dev = net->loopback_dev;
162 int cpu;
163
e332bc67
EB
164 if (dev == loopback_dev)
165 return;
166
8d0b94af
MKL
167 for_each_possible_cpu(cpu) {
168 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
169 struct rt6_info *rt;
170
171 spin_lock_bh(&ul->lock);
172 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
173 struct inet6_dev *rt_idev = rt->rt6i_idev;
174 struct net_device *rt_dev = rt->dst.dev;
175
e332bc67 176 if (rt_idev->dev == dev) {
8d0b94af
MKL
177 rt->rt6i_idev = in6_dev_get(loopback_dev);
178 in6_dev_put(rt_idev);
179 }
180
e332bc67 181 if (rt_dev == dev) {
8d0b94af
MKL
182 rt->dst.dev = loopback_dev;
183 dev_hold(rt->dst.dev);
184 dev_put(rt_dev);
185 }
186 }
187 spin_unlock_bh(&ul->lock);
188 }
189}
190
f8a1b43b 191static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
192 struct sk_buff *skb,
193 const void *daddr)
39232973 194{
a7563f34 195 if (!ipv6_addr_any(p))
39232973 196 return (const void *) p;
f894cbf8
DM
197 else if (skb)
198 return &ipv6_hdr(skb)->daddr;
39232973
DM
199 return daddr;
200}
201
f8a1b43b
DA
202struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
203 struct net_device *dev,
204 struct sk_buff *skb,
205 const void *daddr)
d3aaeb38 206{
39232973
DM
207 struct neighbour *n;
208
f8a1b43b
DA
209 daddr = choose_neigh_daddr(gw, skb, daddr);
210 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
211 if (n)
212 return n;
f8a1b43b
DA
213 return neigh_create(&nd_tbl, daddr, dev);
214}
215
216static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
217 struct sk_buff *skb,
218 const void *daddr)
219{
220 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
221
222 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
223}
224
63fca65d
JA
225static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
226{
227 struct net_device *dev = dst->dev;
228 struct rt6_info *rt = (struct rt6_info *)dst;
229
f8a1b43b 230 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
231 if (!daddr)
232 return;
233 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
234 return;
235 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
236 return;
237 __ipv6_confirm_neigh(dev, daddr);
238}
239
9a7ec3a9 240static struct dst_ops ip6_dst_ops_template = {
1da177e4 241 .family = AF_INET6,
1da177e4
LT
242 .gc = ip6_dst_gc,
243 .gc_thresh = 1024,
244 .check = ip6_dst_check,
0dbaee3b 245 .default_advmss = ip6_default_advmss,
ebb762f2 246 .mtu = ip6_mtu,
d4ead6b3 247 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
248 .destroy = ip6_dst_destroy,
249 .ifdown = ip6_dst_ifdown,
250 .negative_advice = ip6_negative_advice,
251 .link_failure = ip6_link_failure,
252 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 253 .redirect = rt6_do_redirect,
9f8955cc 254 .local_out = __ip6_local_out,
f8a1b43b 255 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 256 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
257};
258
ebb762f2 259static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 260{
618f9bc7
SK
261 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
262
263 return mtu ? : dst->dev->mtu;
ec831ea7
RD
264}
265
6700c270
DM
266static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
267 struct sk_buff *skb, u32 mtu)
14e50e57
DM
268{
269}
270
6700c270
DM
271static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
272 struct sk_buff *skb)
b587ee3b
DM
273{
274}
275
14e50e57
DM
276static struct dst_ops ip6_dst_blackhole_ops = {
277 .family = AF_INET6,
14e50e57
DM
278 .destroy = ip6_dst_destroy,
279 .check = ip6_dst_check,
ebb762f2 280 .mtu = ip6_blackhole_mtu,
214f45c9 281 .default_advmss = ip6_default_advmss,
14e50e57 282 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 283 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 284 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 285 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
286};
287
62fa8a84 288static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 289 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
290};
291
8d1c802b 292static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
293 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
294 .fib6_protocol = RTPROT_KERNEL,
295 .fib6_metric = ~(u32)0,
296 .fib6_ref = ATOMIC_INIT(1),
421842ed
DA
297 .fib6_type = RTN_UNREACHABLE,
298 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
299};
300
fb0af4c7 301static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
302 .dst = {
303 .__refcnt = ATOMIC_INIT(1),
304 .__use = 1,
2c20cbd7 305 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 306 .error = -ENETUNREACH,
d8d1f30b
CG
307 .input = ip6_pkt_discard,
308 .output = ip6_pkt_discard_out,
1da177e4
LT
309 },
310 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
311};
312
101367c2
TG
313#ifdef CONFIG_IPV6_MULTIPLE_TABLES
314
fb0af4c7 315static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
316 .dst = {
317 .__refcnt = ATOMIC_INIT(1),
318 .__use = 1,
2c20cbd7 319 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 320 .error = -EACCES,
d8d1f30b
CG
321 .input = ip6_pkt_prohibit,
322 .output = ip6_pkt_prohibit_out,
101367c2
TG
323 },
324 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
325};
326
fb0af4c7 327static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
328 .dst = {
329 .__refcnt = ATOMIC_INIT(1),
330 .__use = 1,
2c20cbd7 331 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 332 .error = -EINVAL,
d8d1f30b 333 .input = dst_discard,
ede2059d 334 .output = dst_discard_out,
101367c2
TG
335 },
336 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
337};
338
339#endif
340
ebfa45f0
MKL
341static void rt6_info_init(struct rt6_info *rt)
342{
343 struct dst_entry *dst = &rt->dst;
344
345 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
346 INIT_LIST_HEAD(&rt->rt6i_uncached);
347}
348
1da177e4 349/* allocate dst with ip6_dst_ops */
93531c67
DA
350struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
351 int flags)
1da177e4 352{
97bab73f 353 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 354 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 355
81eb8447 356 if (rt) {
ebfa45f0 357 rt6_info_init(rt);
81eb8447
WW
358 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
359 }
8104891b 360
cf911662 361 return rt;
1da177e4 362}
9ab179d8 363EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 364
1da177e4
LT
365static void ip6_dst_destroy(struct dst_entry *dst)
366{
367 struct rt6_info *rt = (struct rt6_info *)dst;
a68886a6 368 struct fib6_info *from;
8d0b94af 369 struct inet6_dev *idev;
1da177e4 370
4b32b5ad 371 dst_destroy_metrics_generic(dst);
8d0b94af
MKL
372 rt6_uncached_list_del(rt);
373
374 idev = rt->rt6i_idev;
38308473 375 if (idev) {
1da177e4
LT
376 rt->rt6i_idev = NULL;
377 in6_dev_put(idev);
1ab1457c 378 }
1716a961 379
a68886a6
DA
380 rcu_read_lock();
381 from = rcu_dereference(rt->from);
382 rcu_assign_pointer(rt->from, NULL);
93531c67 383 fib6_info_release(from);
a68886a6 384 rcu_read_unlock();
b3419363
DM
385}
386
1da177e4
LT
387static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
388 int how)
389{
390 struct rt6_info *rt = (struct rt6_info *)dst;
391 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 392 struct net_device *loopback_dev =
c346dca1 393 dev_net(dev)->loopback_dev;
1da177e4 394
e5645f51
WW
395 if (idev && idev->dev != loopback_dev) {
396 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
397 if (loopback_idev) {
398 rt->rt6i_idev = loopback_idev;
399 in6_dev_put(idev);
97cac082 400 }
1da177e4
LT
401 }
402}
403
5973fb1e
MKL
404static bool __rt6_check_expired(const struct rt6_info *rt)
405{
406 if (rt->rt6i_flags & RTF_EXPIRES)
407 return time_after(jiffies, rt->dst.expires);
408 else
409 return false;
410}
411
a50feda5 412static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 413{
a68886a6
DA
414 struct fib6_info *from;
415
416 from = rcu_dereference(rt->from);
417
1716a961
G
418 if (rt->rt6i_flags & RTF_EXPIRES) {
419 if (time_after(jiffies, rt->dst.expires))
a50feda5 420 return true;
a68886a6 421 } else if (from) {
1e2ea8ad 422 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 423 fib6_check_expired(from);
1716a961 424 }
a50feda5 425 return false;
1da177e4
LT
426}
427
3b290a31
DA
428struct fib6_info *fib6_multipath_select(const struct net *net,
429 struct fib6_info *match,
430 struct flowi6 *fl6, int oif,
431 const struct sk_buff *skb,
432 int strict)
51ebd318 433{
8d1c802b 434 struct fib6_info *sibling, *next_sibling;
51ebd318 435
b673d6cc
JS
436 /* We might have already computed the hash for ICMPv6 errors. In such
437 * case it will always be non-zero. Otherwise now is the time to do it.
438 */
439 if (!fl6->mp_hash)
b4bac172 440 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 441
5e670d84 442 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
3d709f69
IS
443 return match;
444
93c2fb25
DA
445 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
446 fib6_siblings) {
5e670d84
DA
447 int nh_upper_bound;
448
449 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
450 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
451 continue;
452 if (rt6_score_route(sibling, oif, strict) < 0)
453 break;
454 match = sibling;
455 break;
456 }
457
51ebd318
ND
458 return match;
459}
460
1da177e4 461/*
66f5d6ce 462 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
463 */
464
8d1c802b
DA
465static inline struct fib6_info *rt6_device_match(struct net *net,
466 struct fib6_info *rt,
b71d1d42 467 const struct in6_addr *saddr,
1da177e4 468 int oif,
d420895e 469 int flags)
1da177e4 470{
8d1c802b 471 struct fib6_info *sprt;
1da177e4 472
5e670d84
DA
473 if (!oif && ipv6_addr_any(saddr) &&
474 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
8067bb8c 475 return rt;
dd3abc4e 476
8fb11a9a 477 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
5e670d84 478 const struct net_device *dev = sprt->fib6_nh.nh_dev;
dd3abc4e 479
5e670d84 480 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
481 continue;
482
dd3abc4e 483 if (oif) {
1da177e4
LT
484 if (dev->ifindex == oif)
485 return sprt;
dd3abc4e
YH
486 } else {
487 if (ipv6_chk_addr(net, saddr, dev,
488 flags & RT6_LOOKUP_F_IFACE))
489 return sprt;
1da177e4 490 }
dd3abc4e 491 }
1da177e4 492
eea68cd3
DA
493 if (oif && flags & RT6_LOOKUP_F_IFACE)
494 return net->ipv6.fib6_null_entry;
8067bb8c 495
421842ed 496 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
497}
498
27097255 499#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
500struct __rt6_probe_work {
501 struct work_struct work;
502 struct in6_addr target;
503 struct net_device *dev;
504};
505
506static void rt6_probe_deferred(struct work_struct *w)
507{
508 struct in6_addr mcaddr;
509 struct __rt6_probe_work *work =
510 container_of(w, struct __rt6_probe_work, work);
511
512 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 513 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 514 dev_put(work->dev);
662f5533 515 kfree(work);
c2f17e82
HFS
516}
517
8d1c802b 518static void rt6_probe(struct fib6_info *rt)
27097255 519{
990edb42 520 struct __rt6_probe_work *work;
5e670d84 521 const struct in6_addr *nh_gw;
f2c31e32 522 struct neighbour *neigh;
5e670d84
DA
523 struct net_device *dev;
524
27097255
YH
525 /*
526 * Okay, this does not seem to be appropriate
527 * for now, however, we need to check if it
528 * is really so; aka Router Reachability Probing.
529 *
530 * Router Reachability Probe MUST be rate-limited
531 * to no more than one per minute.
532 */
93c2fb25 533 if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
7ff74a59 534 return;
5e670d84
DA
535
536 nh_gw = &rt->fib6_nh.nh_gw;
537 dev = rt->fib6_nh.nh_dev;
2152caea 538 rcu_read_lock_bh();
5e670d84 539 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 540 if (neigh) {
dcd1f572
DA
541 struct inet6_dev *idev;
542
8d6c31bf
MKL
543 if (neigh->nud_state & NUD_VALID)
544 goto out;
545
dcd1f572 546 idev = __in6_dev_get(dev);
990edb42 547 work = NULL;
2152caea 548 write_lock(&neigh->lock);
990edb42
MKL
549 if (!(neigh->nud_state & NUD_VALID) &&
550 time_after(jiffies,
dcd1f572 551 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
552 work = kmalloc(sizeof(*work), GFP_ATOMIC);
553 if (work)
554 __neigh_set_probe_once(neigh);
c2f17e82 555 }
2152caea 556 write_unlock(&neigh->lock);
990edb42
MKL
557 } else {
558 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 559 }
990edb42
MKL
560
561 if (work) {
562 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
563 work->target = *nh_gw;
564 dev_hold(dev);
565 work->dev = dev;
990edb42
MKL
566 schedule_work(&work->work);
567 }
568
8d6c31bf 569out:
2152caea 570 rcu_read_unlock_bh();
27097255
YH
571}
572#else
8d1c802b 573static inline void rt6_probe(struct fib6_info *rt)
27097255 574{
27097255
YH
575}
576#endif
577
1da177e4 578/*
554cfb7e 579 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 580 */
8d1c802b 581static inline int rt6_check_dev(struct fib6_info *rt, int oif)
554cfb7e 582{
5e670d84
DA
583 const struct net_device *dev = rt->fib6_nh.nh_dev;
584
161980f4 585 if (!oif || dev->ifindex == oif)
554cfb7e 586 return 2;
161980f4 587 return 0;
554cfb7e 588}
1da177e4 589
8d1c802b 590static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
1da177e4 591{
afc154e9 592 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 593 struct neighbour *neigh;
f2c31e32 594
93c2fb25
DA
595 if (rt->fib6_flags & RTF_NONEXTHOP ||
596 !(rt->fib6_flags & RTF_GATEWAY))
afc154e9 597 return RT6_NUD_SUCCEED;
145a3621
YH
598
599 rcu_read_lock_bh();
5e670d84
DA
600 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
601 &rt->fib6_nh.nh_gw);
145a3621
YH
602 if (neigh) {
603 read_lock(&neigh->lock);
554cfb7e 604 if (neigh->nud_state & NUD_VALID)
afc154e9 605 ret = RT6_NUD_SUCCEED;
398bcbeb 606#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 607 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 608 ret = RT6_NUD_SUCCEED;
7e980569
JB
609 else
610 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 611#endif
145a3621 612 read_unlock(&neigh->lock);
afc154e9
HFS
613 } else {
614 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 615 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 616 }
145a3621
YH
617 rcu_read_unlock_bh();
618
a5a81f0b 619 return ret;
1da177e4
LT
620}
621
8d1c802b 622static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
1da177e4 623{
a5a81f0b 624 int m;
1ab1457c 625
4d0c5911 626 m = rt6_check_dev(rt, oif);
77d16f45 627 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 628 return RT6_NUD_FAIL_HARD;
ebacaaa0 629#ifdef CONFIG_IPV6_ROUTER_PREF
93c2fb25 630 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
ebacaaa0 631#endif
afc154e9
HFS
632 if (strict & RT6_LOOKUP_F_REACHABLE) {
633 int n = rt6_check_neigh(rt);
634 if (n < 0)
635 return n;
636 }
554cfb7e
YH
637 return m;
638}
639
dcd1f572
DA
640/* called with rc_read_lock held */
641static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
642{
643 const struct net_device *dev = fib6_info_nh_dev(f6i);
644 bool rc = false;
645
646 if (dev) {
647 const struct inet6_dev *idev = __in6_dev_get(dev);
648
649 rc = !!idev->cnf.ignore_routes_with_linkdown;
650 }
651
652 return rc;
653}
654
8d1c802b
DA
655static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
656 int *mpri, struct fib6_info *match,
afc154e9 657 bool *do_rr)
554cfb7e 658{
f11e6659 659 int m;
afc154e9 660 bool match_do_rr = false;
35103d11 661
5e670d84 662 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
663 goto out;
664
dcd1f572 665 if (fib6_ignore_linkdown(rt) &&
5e670d84 666 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 667 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 668 goto out;
f11e6659 669
14895687 670 if (fib6_check_expired(rt))
f11e6659
DM
671 goto out;
672
673 m = rt6_score_route(rt, oif, strict);
7e980569 674 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
675 match_do_rr = true;
676 m = 0; /* lowest valid score */
7e980569 677 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 678 goto out;
afc154e9
HFS
679 }
680
681 if (strict & RT6_LOOKUP_F_REACHABLE)
682 rt6_probe(rt);
f11e6659 683
7e980569 684 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 685 if (m > *mpri) {
afc154e9 686 *do_rr = match_do_rr;
f11e6659
DM
687 *mpri = m;
688 match = rt;
f11e6659 689 }
f11e6659
DM
690out:
691 return match;
692}
693
8d1c802b
DA
694static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
695 struct fib6_info *leaf,
696 struct fib6_info *rr_head,
afc154e9
HFS
697 u32 metric, int oif, int strict,
698 bool *do_rr)
f11e6659 699{
8d1c802b 700 struct fib6_info *rt, *match, *cont;
554cfb7e 701 int mpri = -1;
1da177e4 702
f11e6659 703 match = NULL;
9fbdcfaf 704 cont = NULL;
8fb11a9a 705 for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 706 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
707 cont = rt;
708 break;
709 }
710
711 match = find_match(rt, oif, strict, &mpri, match, do_rr);
712 }
713
66f5d6ce 714 for (rt = leaf; rt && rt != rr_head;
8fb11a9a 715 rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 716 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
717 cont = rt;
718 break;
719 }
720
afc154e9 721 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
722 }
723
724 if (match || !cont)
725 return match;
726
8fb11a9a 727 for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
afc154e9 728 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 729
f11e6659
DM
730 return match;
731}
1da177e4 732
8d1c802b 733static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
8d1040e8 734 int oif, int strict)
f11e6659 735{
8d1c802b
DA
736 struct fib6_info *leaf = rcu_dereference(fn->leaf);
737 struct fib6_info *match, *rt0;
afc154e9 738 bool do_rr = false;
17ecf590 739 int key_plen;
1da177e4 740
421842ed
DA
741 if (!leaf || leaf == net->ipv6.fib6_null_entry)
742 return net->ipv6.fib6_null_entry;
8d1040e8 743
66f5d6ce 744 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 745 if (!rt0)
66f5d6ce 746 rt0 = leaf;
1da177e4 747
17ecf590
WW
748 /* Double check to make sure fn is not an intermediate node
749 * and fn->leaf does not points to its child's leaf
750 * (This might happen if all routes under fn are deleted from
751 * the tree and fib6_repair_tree() is called on the node.)
752 */
93c2fb25 753 key_plen = rt0->fib6_dst.plen;
17ecf590 754#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
755 if (rt0->fib6_src.plen)
756 key_plen = rt0->fib6_src.plen;
17ecf590
WW
757#endif
758 if (fn->fn_bit != key_plen)
421842ed 759 return net->ipv6.fib6_null_entry;
17ecf590 760
93c2fb25 761 match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
afc154e9 762 &do_rr);
1da177e4 763
afc154e9 764 if (do_rr) {
8fb11a9a 765 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
f11e6659 766
554cfb7e 767 /* no entries matched; do round-robin */
93c2fb25 768 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 769 next = leaf;
f11e6659 770
66f5d6ce 771 if (next != rt0) {
93c2fb25 772 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 773 /* make sure next is not being deleted from the tree */
93c2fb25 774 if (next->fib6_node)
66f5d6ce 775 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 776 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 777 }
1da177e4 778 }
1da177e4 779
421842ed 780 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
781}
782
8d1c802b 783static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
8b9df265 784{
93c2fb25 785 return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
8b9df265
MKL
786}
787
70ceb4f5
YH
788#ifdef CONFIG_IPV6_ROUTE_INFO
789int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 790 const struct in6_addr *gwaddr)
70ceb4f5 791{
c346dca1 792 struct net *net = dev_net(dev);
70ceb4f5
YH
793 struct route_info *rinfo = (struct route_info *) opt;
794 struct in6_addr prefix_buf, *prefix;
795 unsigned int pref;
4bed72e4 796 unsigned long lifetime;
8d1c802b 797 struct fib6_info *rt;
70ceb4f5
YH
798
799 if (len < sizeof(struct route_info)) {
800 return -EINVAL;
801 }
802
803 /* Sanity check for prefix_len and length */
804 if (rinfo->length > 3) {
805 return -EINVAL;
806 } else if (rinfo->prefix_len > 128) {
807 return -EINVAL;
808 } else if (rinfo->prefix_len > 64) {
809 if (rinfo->length < 2) {
810 return -EINVAL;
811 }
812 } else if (rinfo->prefix_len > 0) {
813 if (rinfo->length < 1) {
814 return -EINVAL;
815 }
816 }
817
818 pref = rinfo->route_pref;
819 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 820 return -EINVAL;
70ceb4f5 821
4bed72e4 822 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
823
824 if (rinfo->length == 3)
825 prefix = (struct in6_addr *)rinfo->prefix;
826 else {
827 /* this function is safe */
828 ipv6_addr_prefix(&prefix_buf,
829 (struct in6_addr *)rinfo->prefix,
830 rinfo->prefix_len);
831 prefix = &prefix_buf;
832 }
833
f104a567 834 if (rinfo->prefix_len == 0)
afb1d4b5 835 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
836 else
837 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 838 gwaddr, dev);
70ceb4f5
YH
839
840 if (rt && !lifetime) {
afb1d4b5 841 ip6_del_rt(net, rt);
70ceb4f5
YH
842 rt = NULL;
843 }
844
845 if (!rt && lifetime)
830218c1
DA
846 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
847 dev, pref);
70ceb4f5 848 else if (rt)
93c2fb25
DA
849 rt->fib6_flags = RTF_ROUTEINFO |
850 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
851
852 if (rt) {
1716a961 853 if (!addrconf_finite_timeout(lifetime))
14895687 854 fib6_clean_expires(rt);
1716a961 855 else
14895687 856 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 857
93531c67 858 fib6_info_release(rt);
70ceb4f5
YH
859 }
860 return 0;
861}
862#endif
863
ae90d867
DA
864/*
865 * Misc support functions
866 */
867
868/* called with rcu_lock held */
8d1c802b 869static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
ae90d867 870{
5e670d84 871 struct net_device *dev = rt->fib6_nh.nh_dev;
ae90d867 872
93c2fb25 873 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
874 /* for copies of local routes, dst->dev needs to be the
875 * device if it is a master device, the master device if
876 * device is enslaved, and the loopback as the default
877 */
878 if (netif_is_l3_slave(dev) &&
93c2fb25 879 !rt6_need_strict(&rt->fib6_dst.addr))
ae90d867
DA
880 dev = l3mdev_master_dev_rcu(dev);
881 else if (!netif_is_l3_master(dev))
882 dev = dev_net(dev)->loopback_dev;
883 /* last case is netif_is_l3_master(dev) is true in which
884 * case we want dev returned to be dev
885 */
886 }
887
888 return dev;
889}
890
6edb3c96
DA
891static const int fib6_prop[RTN_MAX + 1] = {
892 [RTN_UNSPEC] = 0,
893 [RTN_UNICAST] = 0,
894 [RTN_LOCAL] = 0,
895 [RTN_BROADCAST] = 0,
896 [RTN_ANYCAST] = 0,
897 [RTN_MULTICAST] = 0,
898 [RTN_BLACKHOLE] = -EINVAL,
899 [RTN_UNREACHABLE] = -EHOSTUNREACH,
900 [RTN_PROHIBIT] = -EACCES,
901 [RTN_THROW] = -EAGAIN,
902 [RTN_NAT] = -EINVAL,
903 [RTN_XRESOLVE] = -EINVAL,
904};
905
906static int ip6_rt_type_to_error(u8 fib6_type)
907{
908 return fib6_prop[fib6_type];
909}
910
8d1c802b 911static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
912{
913 unsigned short flags = 0;
914
915 if (rt->dst_nocount)
916 flags |= DST_NOCOUNT;
917 if (rt->dst_nopolicy)
918 flags |= DST_NOPOLICY;
919 if (rt->dst_host)
920 flags |= DST_HOST;
921
922 return flags;
923}
924
8d1c802b 925static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96
DA
926{
927 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
928
929 switch (ort->fib6_type) {
930 case RTN_BLACKHOLE:
931 rt->dst.output = dst_discard_out;
932 rt->dst.input = dst_discard;
933 break;
934 case RTN_PROHIBIT:
935 rt->dst.output = ip6_pkt_prohibit_out;
936 rt->dst.input = ip6_pkt_prohibit;
937 break;
938 case RTN_THROW:
939 case RTN_UNREACHABLE:
940 default:
941 rt->dst.output = ip6_pkt_discard_out;
942 rt->dst.input = ip6_pkt_discard;
943 break;
944 }
945}
946
8d1c802b 947static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96 948{
3b6761d1
DA
949 rt->dst.flags |= fib6_info_dst_flags(ort);
950
93c2fb25 951 if (ort->fib6_flags & RTF_REJECT) {
6edb3c96
DA
952 ip6_rt_init_dst_reject(rt, ort);
953 return;
954 }
955
956 rt->dst.error = 0;
957 rt->dst.output = ip6_output;
958
d23c4b63 959 if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
6edb3c96 960 rt->dst.input = ip6_input;
93c2fb25 961 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
962 rt->dst.input = ip6_mc_input;
963 } else {
964 rt->dst.input = ip6_forward;
965 }
966
967 if (ort->fib6_nh.nh_lwtstate) {
968 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
969 lwtunnel_set_redirect(&rt->dst);
970 }
971
972 rt->dst.lastuse = jiffies;
973}
974
e873e4b9 975/* Caller must already hold reference to @from */
8d1c802b 976static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 977{
ae90d867 978 rt->rt6i_flags &= ~RTF_EXPIRES;
a68886a6 979 rcu_assign_pointer(rt->from, from);
d4ead6b3 980 dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
ae90d867
DA
981}
982
e873e4b9 983/* Caller must already hold reference to @ort */
8d1c802b 984static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
ae90d867 985{
dcd1f572
DA
986 struct net_device *dev = fib6_info_nh_dev(ort);
987
6edb3c96
DA
988 ip6_rt_init_dst(rt, ort);
989
93c2fb25 990 rt->rt6i_dst = ort->fib6_dst;
dcd1f572 991 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
5e670d84 992 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
93c2fb25 993 rt->rt6i_flags = ort->fib6_flags;
ae90d867 994 rt6_set_from(rt, ort);
ae90d867 995#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 996 rt->rt6i_src = ort->fib6_src;
ae90d867 997#endif
93c2fb25 998 rt->rt6i_prefsrc = ort->fib6_prefsrc;
ae90d867
DA
999}
1000
a3c00e46
MKL
1001static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1002 struct in6_addr *saddr)
1003{
66f5d6ce 1004 struct fib6_node *pn, *sn;
a3c00e46
MKL
1005 while (1) {
1006 if (fn->fn_flags & RTN_TL_ROOT)
1007 return NULL;
66f5d6ce
WW
1008 pn = rcu_dereference(fn->parent);
1009 sn = FIB6_SUBTREE(pn);
1010 if (sn && sn != fn)
6454743b 1011 fn = fib6_node_lookup(sn, NULL, saddr);
a3c00e46
MKL
1012 else
1013 fn = pn;
1014 if (fn->fn_flags & RTN_RTINFO)
1015 return fn;
1016 }
1017}
c71099ac 1018
d3843fe5
WW
1019static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1020 bool null_fallback)
1021{
1022 struct rt6_info *rt = *prt;
1023
1024 if (dst_hold_safe(&rt->dst))
1025 return true;
1026 if (null_fallback) {
1027 rt = net->ipv6.ip6_null_entry;
1028 dst_hold(&rt->dst);
1029 } else {
1030 rt = NULL;
1031 }
1032 *prt = rt;
1033 return false;
1034}
1035
dec9b0e2 1036/* called with rcu_lock held */
8d1c802b 1037static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
dec9b0e2 1038{
3b6761d1 1039 unsigned short flags = fib6_info_dst_flags(rt);
dec9b0e2
DA
1040 struct net_device *dev = rt->fib6_nh.nh_dev;
1041 struct rt6_info *nrt;
1042
e873e4b9
WW
1043 if (!fib6_info_hold_safe(rt))
1044 return NULL;
1045
93531c67 1046 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
dec9b0e2
DA
1047 if (nrt)
1048 ip6_rt_copy_init(nrt, rt);
e873e4b9
WW
1049 else
1050 fib6_info_release(rt);
dec9b0e2
DA
1051
1052 return nrt;
1053}
1054
8ed67789
DL
1055static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1056 struct fib6_table *table,
b75cc8f9
DA
1057 struct flowi6 *fl6,
1058 const struct sk_buff *skb,
1059 int flags)
1da177e4 1060{
8d1c802b 1061 struct fib6_info *f6i;
1da177e4 1062 struct fib6_node *fn;
23fb93a4 1063 struct rt6_info *rt;
1da177e4 1064
b6cdbc85
DA
1065 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1066 flags &= ~RT6_LOOKUP_F_IFACE;
1067
66f5d6ce 1068 rcu_read_lock();
6454743b 1069 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1070restart:
23fb93a4
DA
1071 f6i = rcu_dereference(fn->leaf);
1072 if (!f6i) {
1073 f6i = net->ipv6.fib6_null_entry;
66f5d6ce 1074 } else {
23fb93a4 1075 f6i = rt6_device_match(net, f6i, &fl6->saddr,
66f5d6ce 1076 fl6->flowi6_oif, flags);
93c2fb25 1077 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
3b290a31
DA
1078 f6i = fib6_multipath_select(net, f6i, fl6,
1079 fl6->flowi6_oif, skb,
1080 flags);
66f5d6ce 1081 }
23fb93a4 1082 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1083 fn = fib6_backtrack(fn, &fl6->saddr);
1084 if (fn)
1085 goto restart;
1086 }
2b760fcf 1087
d4bea421 1088 trace_fib6_table_lookup(net, f6i, table, fl6);
d3843fe5 1089
2b760fcf 1090 /* Search through exception table */
23fb93a4
DA
1091 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1092 if (rt) {
dec9b0e2
DA
1093 if (ip6_hold_safe(net, &rt, true))
1094 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1095 } else if (f6i == net->ipv6.fib6_null_entry) {
dec9b0e2
DA
1096 rt = net->ipv6.ip6_null_entry;
1097 dst_hold(&rt->dst);
23fb93a4
DA
1098 } else {
1099 rt = ip6_create_rt_rcu(f6i);
1100 if (!rt) {
1101 rt = net->ipv6.ip6_null_entry;
1102 dst_hold(&rt->dst);
1103 }
dec9b0e2 1104 }
b811580d 1105
66f5d6ce 1106 rcu_read_unlock();
b811580d 1107
c71099ac 1108 return rt;
c71099ac
TG
1109}
1110
67ba4152 1111struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1112 const struct sk_buff *skb, int flags)
ea6e574e 1113{
b75cc8f9 1114 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1115}
1116EXPORT_SYMBOL_GPL(ip6_route_lookup);
1117
9acd9f3a 1118struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1119 const struct in6_addr *saddr, int oif,
1120 const struct sk_buff *skb, int strict)
c71099ac 1121{
4c9483b2
DM
1122 struct flowi6 fl6 = {
1123 .flowi6_oif = oif,
1124 .daddr = *daddr,
c71099ac
TG
1125 };
1126 struct dst_entry *dst;
77d16f45 1127 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1128
adaa70bb 1129 if (saddr) {
4c9483b2 1130 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1131 flags |= RT6_LOOKUP_F_HAS_SADDR;
1132 }
1133
b75cc8f9 1134 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1135 if (dst->error == 0)
1136 return (struct rt6_info *) dst;
1137
1138 dst_release(dst);
1139
1da177e4
LT
1140 return NULL;
1141}
7159039a
YH
1142EXPORT_SYMBOL(rt6_lookup);
1143
c71099ac 1144/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1145 * It takes new route entry, the addition fails by any reason the
1146 * route is released.
1147 * Caller must hold dst before calling it.
1da177e4
LT
1148 */
1149
8d1c802b 1150static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1151 struct netlink_ext_ack *extack)
1da177e4
LT
1152{
1153 int err;
c71099ac 1154 struct fib6_table *table;
1da177e4 1155
93c2fb25 1156 table = rt->fib6_table;
66f5d6ce 1157 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1158 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1159 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1160
1161 return err;
1162}
1163
8d1c802b 1164int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1165{
afb1d4b5 1166 struct nl_info info = { .nl_net = net, };
e715b6d3 1167
d4ead6b3 1168 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1169}
1170
8d1c802b 1171static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
8b9df265
MKL
1172 const struct in6_addr *daddr,
1173 const struct in6_addr *saddr)
1da177e4 1174{
4832c30d 1175 struct net_device *dev;
1da177e4
LT
1176 struct rt6_info *rt;
1177
1178 /*
1179 * Clone the route.
1180 */
1181
e873e4b9
WW
1182 if (!fib6_info_hold_safe(ort))
1183 return NULL;
1184
4832c30d 1185 dev = ip6_rt_get_dev_rcu(ort);
93531c67 1186 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
e873e4b9
WW
1187 if (!rt) {
1188 fib6_info_release(ort);
83a09abd 1189 return NULL;
e873e4b9 1190 }
83a09abd
MKL
1191
1192 ip6_rt_copy_init(rt, ort);
1193 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1194 rt->dst.flags |= DST_HOST;
1195 rt->rt6i_dst.addr = *daddr;
1196 rt->rt6i_dst.plen = 128;
1da177e4 1197
83a09abd 1198 if (!rt6_is_gw_or_nonexthop(ort)) {
93c2fb25
DA
1199 if (ort->fib6_dst.plen != 128 &&
1200 ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
83a09abd 1201 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1202#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1203 if (rt->rt6i_src.plen && saddr) {
1204 rt->rt6i_src.addr = *saddr;
1205 rt->rt6i_src.plen = 128;
8b9df265 1206 }
83a09abd 1207#endif
95a9a5ba 1208 }
1da177e4 1209
95a9a5ba
YH
1210 return rt;
1211}
1da177e4 1212
8d1c802b 1213static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
d52d3997 1214{
3b6761d1 1215 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1216 struct net_device *dev;
d52d3997
MKL
1217 struct rt6_info *pcpu_rt;
1218
e873e4b9
WW
1219 if (!fib6_info_hold_safe(rt))
1220 return NULL;
1221
4832c30d
DA
1222 rcu_read_lock();
1223 dev = ip6_rt_get_dev_rcu(rt);
93531c67 1224 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1225 rcu_read_unlock();
e873e4b9
WW
1226 if (!pcpu_rt) {
1227 fib6_info_release(rt);
d52d3997 1228 return NULL;
e873e4b9 1229 }
d52d3997 1230 ip6_rt_copy_init(pcpu_rt, rt);
d52d3997
MKL
1231 pcpu_rt->rt6i_flags |= RTF_PCPU;
1232 return pcpu_rt;
1233}
1234
66f5d6ce 1235/* It should be called with rcu_read_lock() acquired */
8d1c802b 1236static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
d52d3997 1237{
a73e4195 1238 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1239
1240 p = this_cpu_ptr(rt->rt6i_pcpu);
1241 pcpu_rt = *p;
1242
d4ead6b3
DA
1243 if (pcpu_rt)
1244 ip6_hold_safe(NULL, &pcpu_rt, false);
d3843fe5 1245
a73e4195
MKL
1246 return pcpu_rt;
1247}
1248
afb1d4b5 1249static struct rt6_info *rt6_make_pcpu_route(struct net *net,
8d1c802b 1250 struct fib6_info *rt)
a73e4195
MKL
1251{
1252 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1253
1254 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1255 if (!pcpu_rt) {
9c7370a1
MKL
1256 dst_hold(&net->ipv6.ip6_null_entry->dst);
1257 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1258 }
1259
a94b9367
WW
1260 dst_hold(&pcpu_rt->dst);
1261 p = this_cpu_ptr(rt->rt6i_pcpu);
1262 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1263 BUG_ON(prev);
a94b9367 1264
d52d3997
MKL
1265 return pcpu_rt;
1266}
1267
35732d01
WW
1268/* exception hash table implementation
1269 */
1270static DEFINE_SPINLOCK(rt6_exception_lock);
1271
1272/* Remove rt6_ex from hash table and free the memory
1273 * Caller must hold rt6_exception_lock
1274 */
1275static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1276 struct rt6_exception *rt6_ex)
1277{
b2427e67 1278 struct net *net;
81eb8447 1279
35732d01
WW
1280 if (!bucket || !rt6_ex)
1281 return;
b2427e67
CIK
1282
1283 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01 1284 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1285 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1286 kfree_rcu(rt6_ex, rcu);
1287 WARN_ON_ONCE(!bucket->depth);
1288 bucket->depth--;
81eb8447 1289 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1290}
1291
1292/* Remove oldest rt6_ex in bucket and free the memory
1293 * Caller must hold rt6_exception_lock
1294 */
1295static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1296{
1297 struct rt6_exception *rt6_ex, *oldest = NULL;
1298
1299 if (!bucket)
1300 return;
1301
1302 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1303 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1304 oldest = rt6_ex;
1305 }
1306 rt6_remove_exception(bucket, oldest);
1307}
1308
1309static u32 rt6_exception_hash(const struct in6_addr *dst,
1310 const struct in6_addr *src)
1311{
1312 static u32 seed __read_mostly;
1313 u32 val;
1314
1315 net_get_random_once(&seed, sizeof(seed));
1316 val = jhash(dst, sizeof(*dst), seed);
1317
1318#ifdef CONFIG_IPV6_SUBTREES
1319 if (src)
1320 val = jhash(src, sizeof(*src), val);
1321#endif
1322 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1323}
1324
1325/* Helper function to find the cached rt in the hash table
1326 * and update bucket pointer to point to the bucket for this
1327 * (daddr, saddr) pair
1328 * Caller must hold rt6_exception_lock
1329 */
1330static struct rt6_exception *
1331__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1332 const struct in6_addr *daddr,
1333 const struct in6_addr *saddr)
1334{
1335 struct rt6_exception *rt6_ex;
1336 u32 hval;
1337
1338 if (!(*bucket) || !daddr)
1339 return NULL;
1340
1341 hval = rt6_exception_hash(daddr, saddr);
1342 *bucket += hval;
1343
1344 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1345 struct rt6_info *rt6 = rt6_ex->rt6i;
1346 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1347
1348#ifdef CONFIG_IPV6_SUBTREES
1349 if (matched && saddr)
1350 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1351#endif
1352 if (matched)
1353 return rt6_ex;
1354 }
1355 return NULL;
1356}
1357
1358/* Helper function to find the cached rt in the hash table
1359 * and update bucket pointer to point to the bucket for this
1360 * (daddr, saddr) pair
1361 * Caller must hold rcu_read_lock()
1362 */
1363static struct rt6_exception *
1364__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1365 const struct in6_addr *daddr,
1366 const struct in6_addr *saddr)
1367{
1368 struct rt6_exception *rt6_ex;
1369 u32 hval;
1370
1371 WARN_ON_ONCE(!rcu_read_lock_held());
1372
1373 if (!(*bucket) || !daddr)
1374 return NULL;
1375
1376 hval = rt6_exception_hash(daddr, saddr);
1377 *bucket += hval;
1378
1379 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1380 struct rt6_info *rt6 = rt6_ex->rt6i;
1381 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1382
1383#ifdef CONFIG_IPV6_SUBTREES
1384 if (matched && saddr)
1385 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1386#endif
1387 if (matched)
1388 return rt6_ex;
1389 }
1390 return NULL;
1391}
1392
8d1c802b 1393static unsigned int fib6_mtu(const struct fib6_info *rt)
d4ead6b3
DA
1394{
1395 unsigned int mtu;
1396
dcd1f572
DA
1397 if (rt->fib6_pmtu) {
1398 mtu = rt->fib6_pmtu;
1399 } else {
1400 struct net_device *dev = fib6_info_nh_dev(rt);
1401 struct inet6_dev *idev;
1402
1403 rcu_read_lock();
1404 idev = __in6_dev_get(dev);
1405 mtu = idev->cnf.mtu6;
1406 rcu_read_unlock();
1407 }
1408
d4ead6b3
DA
1409 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1410
1411 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1412}
1413
35732d01 1414static int rt6_insert_exception(struct rt6_info *nrt,
8d1c802b 1415 struct fib6_info *ort)
35732d01 1416{
5e670d84 1417 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1418 struct rt6_exception_bucket *bucket;
1419 struct in6_addr *src_key = NULL;
1420 struct rt6_exception *rt6_ex;
1421 int err = 0;
1422
35732d01
WW
1423 spin_lock_bh(&rt6_exception_lock);
1424
1425 if (ort->exception_bucket_flushed) {
1426 err = -EINVAL;
1427 goto out;
1428 }
1429
1430 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1431 lockdep_is_held(&rt6_exception_lock));
1432 if (!bucket) {
1433 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1434 GFP_ATOMIC);
1435 if (!bucket) {
1436 err = -ENOMEM;
1437 goto out;
1438 }
1439 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1440 }
1441
1442#ifdef CONFIG_IPV6_SUBTREES
1443 /* rt6i_src.plen != 0 indicates ort is in subtree
1444 * and exception table is indexed by a hash of
1445 * both rt6i_dst and rt6i_src.
1446 * Otherwise, the exception table is indexed by
1447 * a hash of only rt6i_dst.
1448 */
93c2fb25 1449 if (ort->fib6_src.plen)
35732d01
WW
1450 src_key = &nrt->rt6i_src.addr;
1451#endif
60006a48
WW
1452
1453 /* Update rt6i_prefsrc as it could be changed
1454 * in rt6_remove_prefsrc()
1455 */
93c2fb25 1456 nrt->rt6i_prefsrc = ort->fib6_prefsrc;
f5bbe7ee
WW
1457 /* rt6_mtu_change() might lower mtu on ort.
1458 * Only insert this exception route if its mtu
1459 * is less than ort's mtu value.
1460 */
d4ead6b3 1461 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1462 err = -EINVAL;
1463 goto out;
1464 }
60006a48 1465
35732d01
WW
1466 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1467 src_key);
1468 if (rt6_ex)
1469 rt6_remove_exception(bucket, rt6_ex);
1470
1471 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1472 if (!rt6_ex) {
1473 err = -ENOMEM;
1474 goto out;
1475 }
1476 rt6_ex->rt6i = nrt;
1477 rt6_ex->stamp = jiffies;
35732d01
WW
1478 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1479 bucket->depth++;
81eb8447 1480 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1481
1482 if (bucket->depth > FIB6_MAX_DEPTH)
1483 rt6_exception_remove_oldest(bucket);
1484
1485out:
1486 spin_unlock_bh(&rt6_exception_lock);
1487
1488 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1489 if (!err) {
93c2fb25 1490 spin_lock_bh(&ort->fib6_table->tb6_lock);
7aef6859 1491 fib6_update_sernum(net, ort);
93c2fb25 1492 spin_unlock_bh(&ort->fib6_table->tb6_lock);
b886d5f2
PA
1493 fib6_force_start_gc(net);
1494 }
35732d01
WW
1495
1496 return err;
1497}
1498
8d1c802b 1499void rt6_flush_exceptions(struct fib6_info *rt)
35732d01
WW
1500{
1501 struct rt6_exception_bucket *bucket;
1502 struct rt6_exception *rt6_ex;
1503 struct hlist_node *tmp;
1504 int i;
1505
1506 spin_lock_bh(&rt6_exception_lock);
1507 /* Prevent rt6_insert_exception() to recreate the bucket list */
1508 rt->exception_bucket_flushed = 1;
1509
1510 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1511 lockdep_is_held(&rt6_exception_lock));
1512 if (!bucket)
1513 goto out;
1514
1515 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1516 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1517 rt6_remove_exception(bucket, rt6_ex);
1518 WARN_ON_ONCE(bucket->depth);
1519 bucket++;
1520 }
1521
1522out:
1523 spin_unlock_bh(&rt6_exception_lock);
1524}
1525
1526/* Find cached rt in the hash table inside passed in rt
1527 * Caller has to hold rcu_read_lock()
1528 */
8d1c802b 1529static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
1530 struct in6_addr *daddr,
1531 struct in6_addr *saddr)
1532{
1533 struct rt6_exception_bucket *bucket;
1534 struct in6_addr *src_key = NULL;
1535 struct rt6_exception *rt6_ex;
1536 struct rt6_info *res = NULL;
1537
1538 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1539
1540#ifdef CONFIG_IPV6_SUBTREES
1541 /* rt6i_src.plen != 0 indicates rt is in subtree
1542 * and exception table is indexed by a hash of
1543 * both rt6i_dst and rt6i_src.
1544 * Otherwise, the exception table is indexed by
1545 * a hash of only rt6i_dst.
1546 */
93c2fb25 1547 if (rt->fib6_src.plen)
35732d01
WW
1548 src_key = saddr;
1549#endif
1550 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1551
1552 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1553 res = rt6_ex->rt6i;
1554
1555 return res;
1556}
1557
1558/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1559static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1560{
35732d01
WW
1561 struct rt6_exception_bucket *bucket;
1562 struct in6_addr *src_key = NULL;
1563 struct rt6_exception *rt6_ex;
8a14e46f 1564 struct fib6_info *from;
35732d01
WW
1565 int err;
1566
091311de 1567 from = rcu_dereference(rt->from);
35732d01 1568 if (!from ||
442d713b 1569 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1570 return -EINVAL;
1571
1572 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1573 return -ENOENT;
1574
1575 spin_lock_bh(&rt6_exception_lock);
1576 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1577 lockdep_is_held(&rt6_exception_lock));
1578#ifdef CONFIG_IPV6_SUBTREES
1579 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1580 * and exception table is indexed by a hash of
1581 * both rt6i_dst and rt6i_src.
1582 * Otherwise, the exception table is indexed by
1583 * a hash of only rt6i_dst.
1584 */
93c2fb25 1585 if (from->fib6_src.plen)
35732d01
WW
1586 src_key = &rt->rt6i_src.addr;
1587#endif
1588 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1589 &rt->rt6i_dst.addr,
1590 src_key);
1591 if (rt6_ex) {
1592 rt6_remove_exception(bucket, rt6_ex);
1593 err = 0;
1594 } else {
1595 err = -ENOENT;
1596 }
1597
1598 spin_unlock_bh(&rt6_exception_lock);
1599 return err;
1600}
1601
1602/* Find rt6_ex which contains the passed in rt cache and
1603 * refresh its stamp
1604 */
1605static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1606{
35732d01 1607 struct rt6_exception_bucket *bucket;
8d1c802b 1608 struct fib6_info *from = rt->from;
35732d01
WW
1609 struct in6_addr *src_key = NULL;
1610 struct rt6_exception *rt6_ex;
1611
1612 if (!from ||
442d713b 1613 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1614 return;
1615
1616 rcu_read_lock();
1617 bucket = rcu_dereference(from->rt6i_exception_bucket);
1618
1619#ifdef CONFIG_IPV6_SUBTREES
1620 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1621 * and exception table is indexed by a hash of
1622 * both rt6i_dst and rt6i_src.
1623 * Otherwise, the exception table is indexed by
1624 * a hash of only rt6i_dst.
1625 */
93c2fb25 1626 if (from->fib6_src.plen)
35732d01
WW
1627 src_key = &rt->rt6i_src.addr;
1628#endif
1629 rt6_ex = __rt6_find_exception_rcu(&bucket,
1630 &rt->rt6i_dst.addr,
1631 src_key);
1632 if (rt6_ex)
1633 rt6_ex->stamp = jiffies;
1634
1635 rcu_read_unlock();
1636}
1637
8d1c802b 1638static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
60006a48
WW
1639{
1640 struct rt6_exception_bucket *bucket;
1641 struct rt6_exception *rt6_ex;
1642 int i;
1643
1644 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1645 lockdep_is_held(&rt6_exception_lock));
1646
1647 if (bucket) {
1648 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1649 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1650 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1651 }
1652 bucket++;
1653 }
1654 }
1655}
1656
e9fa1495
SB
1657static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1658 struct rt6_info *rt, int mtu)
1659{
1660 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1661 * lowest MTU in the path: always allow updating the route PMTU to
1662 * reflect PMTU decreases.
1663 *
1664 * If the new MTU is higher, and the route PMTU is equal to the local
1665 * MTU, this means the old MTU is the lowest in the path, so allow
1666 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1667 * handle this.
1668 */
1669
1670 if (dst_mtu(&rt->dst) >= mtu)
1671 return true;
1672
1673 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1674 return true;
1675
1676 return false;
1677}
1678
1679static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
8d1c802b 1680 struct fib6_info *rt, int mtu)
f5bbe7ee
WW
1681{
1682 struct rt6_exception_bucket *bucket;
1683 struct rt6_exception *rt6_ex;
1684 int i;
1685
1686 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1687 lockdep_is_held(&rt6_exception_lock));
1688
e9fa1495
SB
1689 if (!bucket)
1690 return;
1691
1692 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1693 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1694 struct rt6_info *entry = rt6_ex->rt6i;
1695
1696 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1697 * route), the metrics of its rt->from have already
e9fa1495
SB
1698 * been updated.
1699 */
d4ead6b3 1700 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1701 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1702 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1703 }
e9fa1495 1704 bucket++;
f5bbe7ee
WW
1705 }
1706}
1707
b16cb459
WW
1708#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1709
8d1c802b 1710static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
b16cb459
WW
1711 struct in6_addr *gateway)
1712{
1713 struct rt6_exception_bucket *bucket;
1714 struct rt6_exception *rt6_ex;
1715 struct hlist_node *tmp;
1716 int i;
1717
1718 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1719 return;
1720
1721 spin_lock_bh(&rt6_exception_lock);
1722 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1723 lockdep_is_held(&rt6_exception_lock));
1724
1725 if (bucket) {
1726 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1727 hlist_for_each_entry_safe(rt6_ex, tmp,
1728 &bucket->chain, hlist) {
1729 struct rt6_info *entry = rt6_ex->rt6i;
1730
1731 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1732 RTF_CACHE_GATEWAY &&
1733 ipv6_addr_equal(gateway,
1734 &entry->rt6i_gateway)) {
1735 rt6_remove_exception(bucket, rt6_ex);
1736 }
1737 }
1738 bucket++;
1739 }
1740 }
1741
1742 spin_unlock_bh(&rt6_exception_lock);
1743}
1744
c757faa8
WW
1745static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1746 struct rt6_exception *rt6_ex,
1747 struct fib6_gc_args *gc_args,
1748 unsigned long now)
1749{
1750 struct rt6_info *rt = rt6_ex->rt6i;
1751
1859bac0
PA
1752 /* we are pruning and obsoleting aged-out and non gateway exceptions
1753 * even if others have still references to them, so that on next
1754 * dst_check() such references can be dropped.
1755 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1756 * expired, independently from their aging, as per RFC 8201 section 4
1757 */
31afeb42
WW
1758 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1759 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1760 RT6_TRACE("aging clone %p\n", rt);
1761 rt6_remove_exception(bucket, rt6_ex);
1762 return;
1763 }
1764 } else if (time_after(jiffies, rt->dst.expires)) {
1765 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1766 rt6_remove_exception(bucket, rt6_ex);
1767 return;
31afeb42
WW
1768 }
1769
1770 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1771 struct neighbour *neigh;
1772 __u8 neigh_flags = 0;
1773
1bfa26ff
ED
1774 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1775 if (neigh)
c757faa8 1776 neigh_flags = neigh->flags;
1bfa26ff 1777
c757faa8
WW
1778 if (!(neigh_flags & NTF_ROUTER)) {
1779 RT6_TRACE("purging route %p via non-router but gateway\n",
1780 rt);
1781 rt6_remove_exception(bucket, rt6_ex);
1782 return;
1783 }
1784 }
31afeb42 1785
c757faa8
WW
1786 gc_args->more++;
1787}
1788
8d1c802b 1789void rt6_age_exceptions(struct fib6_info *rt,
c757faa8
WW
1790 struct fib6_gc_args *gc_args,
1791 unsigned long now)
1792{
1793 struct rt6_exception_bucket *bucket;
1794 struct rt6_exception *rt6_ex;
1795 struct hlist_node *tmp;
1796 int i;
1797
1798 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1799 return;
1800
1bfa26ff
ED
1801 rcu_read_lock_bh();
1802 spin_lock(&rt6_exception_lock);
c757faa8
WW
1803 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1804 lockdep_is_held(&rt6_exception_lock));
1805
1806 if (bucket) {
1807 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1808 hlist_for_each_entry_safe(rt6_ex, tmp,
1809 &bucket->chain, hlist) {
1810 rt6_age_examine_exception(bucket, rt6_ex,
1811 gc_args, now);
1812 }
1813 bucket++;
1814 }
1815 }
1bfa26ff
ED
1816 spin_unlock(&rt6_exception_lock);
1817 rcu_read_unlock_bh();
c757faa8
WW
1818}
1819
1d053da9
DA
1820/* must be called with rcu lock held */
1821struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1822 int oif, struct flowi6 *fl6, int strict)
1da177e4 1823{
367efcb9 1824 struct fib6_node *fn, *saved_fn;
8d1c802b 1825 struct fib6_info *f6i;
1da177e4 1826
6454743b 1827 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1828 saved_fn = fn;
1da177e4 1829
ca254490
DA
1830 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1831 oif = 0;
1832
a3c00e46 1833redo_rt6_select:
23fb93a4 1834 f6i = rt6_select(net, fn, oif, strict);
23fb93a4 1835 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1836 fn = fib6_backtrack(fn, &fl6->saddr);
1837 if (fn)
1838 goto redo_rt6_select;
367efcb9
MKL
1839 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1840 /* also consider unreachable route */
1841 strict &= ~RT6_LOOKUP_F_REACHABLE;
1842 fn = saved_fn;
1843 goto redo_rt6_select;
367efcb9 1844 }
a3c00e46
MKL
1845 }
1846
d4bea421 1847 trace_fib6_table_lookup(net, f6i, table, fl6);
fb9de91e 1848
1d053da9
DA
1849 return f6i;
1850}
1851
1852struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1853 int oif, struct flowi6 *fl6,
1854 const struct sk_buff *skb, int flags)
1855{
1856 struct fib6_info *f6i;
1857 struct rt6_info *rt;
1858 int strict = 0;
1859
1860 strict |= flags & RT6_LOOKUP_F_IFACE;
1861 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1862 if (net->ipv6.devconf_all->forwarding == 0)
1863 strict |= RT6_LOOKUP_F_REACHABLE;
1864
1865 rcu_read_lock();
1866
1867 f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1868 if (f6i->fib6_nsiblings)
1869 f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1870
23fb93a4 1871 if (f6i == net->ipv6.fib6_null_entry) {
421842ed 1872 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1873 rcu_read_unlock();
d3843fe5 1874 dst_hold(&rt->dst);
d3843fe5 1875 return rt;
23fb93a4
DA
1876 }
1877
1878 /*Search through exception table */
1879 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1880 if (rt) {
d4ead6b3 1881 if (ip6_hold_safe(net, &rt, true))
d3843fe5 1882 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1883
66f5d6ce 1884 rcu_read_unlock();
d52d3997 1885 return rt;
3da59bd9 1886 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
93c2fb25 1887 !(f6i->fib6_flags & RTF_GATEWAY))) {
3da59bd9
MKL
1888 /* Create a RTF_CACHE clone which will not be
1889 * owned by the fib6 tree. It is for the special case where
1890 * the daddr in the skb during the neighbor look-up is different
1891 * from the fl6->daddr used to look-up route here.
1892 */
3da59bd9
MKL
1893 struct rt6_info *uncached_rt;
1894
23fb93a4 1895 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
d52d3997 1896
4d85cd0c 1897 rcu_read_unlock();
c71099ac 1898
1cfb71ee
WW
1899 if (uncached_rt) {
1900 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1901 * No need for another dst_hold()
1902 */
8d0b94af 1903 rt6_uncached_list_add(uncached_rt);
81eb8447 1904 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1905 } else {
3da59bd9 1906 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1907 dst_hold(&uncached_rt->dst);
1908 }
b811580d 1909
3da59bd9 1910 return uncached_rt;
d52d3997
MKL
1911 } else {
1912 /* Get a percpu copy */
1913
1914 struct rt6_info *pcpu_rt;
1915
951f788a 1916 local_bh_disable();
23fb93a4 1917 pcpu_rt = rt6_get_pcpu_route(f6i);
d52d3997 1918
93531c67
DA
1919 if (!pcpu_rt)
1920 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1921
951f788a
ED
1922 local_bh_enable();
1923 rcu_read_unlock();
d4bea421 1924
d52d3997
MKL
1925 return pcpu_rt;
1926 }
1da177e4 1927}
9ff74384 1928EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1929
b75cc8f9
DA
1930static struct rt6_info *ip6_pol_route_input(struct net *net,
1931 struct fib6_table *table,
1932 struct flowi6 *fl6,
1933 const struct sk_buff *skb,
1934 int flags)
4acad72d 1935{
b75cc8f9 1936 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1937}
1938
d409b847
MB
1939struct dst_entry *ip6_route_input_lookup(struct net *net,
1940 struct net_device *dev,
b75cc8f9
DA
1941 struct flowi6 *fl6,
1942 const struct sk_buff *skb,
1943 int flags)
72331bc0
SL
1944{
1945 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1946 flags |= RT6_LOOKUP_F_IFACE;
1947
b75cc8f9 1948 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1949}
d409b847 1950EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1951
23aebdac 1952static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1953 struct flow_keys *keys,
1954 struct flow_keys *flkeys)
23aebdac
JS
1955{
1956 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1957 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1958 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1959 const struct ipv6hdr *inner_iph;
1960 const struct icmp6hdr *icmph;
1961 struct ipv6hdr _inner_iph;
cea67a2d 1962 struct icmp6hdr _icmph;
23aebdac
JS
1963
1964 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1965 goto out;
1966
cea67a2d
ED
1967 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1968 sizeof(_icmph), &_icmph);
1969 if (!icmph)
1970 goto out;
1971
23aebdac
JS
1972 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1973 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1974 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1975 icmph->icmp6_type != ICMPV6_PARAMPROB)
1976 goto out;
1977
1978 inner_iph = skb_header_pointer(skb,
1979 skb_transport_offset(skb) + sizeof(*icmph),
1980 sizeof(_inner_iph), &_inner_iph);
1981 if (!inner_iph)
1982 goto out;
1983
1984 key_iph = inner_iph;
5e5d6fed 1985 _flkeys = NULL;
23aebdac 1986out:
5e5d6fed
RP
1987 if (_flkeys) {
1988 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1989 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1990 keys->tags.flow_label = _flkeys->tags.flow_label;
1991 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1992 } else {
1993 keys->addrs.v6addrs.src = key_iph->saddr;
1994 keys->addrs.v6addrs.dst = key_iph->daddr;
fa1be7e0 1995 keys->tags.flow_label = ip6_flowlabel(key_iph);
5e5d6fed
RP
1996 keys->basic.ip_proto = key_iph->nexthdr;
1997 }
23aebdac
JS
1998}
1999
2000/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
2001u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2002 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
2003{
2004 struct flow_keys hash_keys;
9a2a537a 2005 u32 mhash;
23aebdac 2006
bbfa047a 2007 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
2008 case 0:
2009 memset(&hash_keys, 0, sizeof(hash_keys));
2010 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2011 if (skb) {
2012 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2013 } else {
2014 hash_keys.addrs.v6addrs.src = fl6->saddr;
2015 hash_keys.addrs.v6addrs.dst = fl6->daddr;
fa1be7e0 2016 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
b4bac172
DA
2017 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2018 }
2019 break;
2020 case 1:
2021 if (skb) {
2022 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2023 struct flow_keys keys;
2024
2025 /* short-circuit if we already have L4 hash present */
2026 if (skb->l4_hash)
2027 return skb_get_hash_raw(skb) >> 1;
2028
2029 memset(&hash_keys, 0, sizeof(hash_keys));
2030
2031 if (!flkeys) {
2032 skb_flow_dissect_flow_keys(skb, &keys, flag);
2033 flkeys = &keys;
2034 }
2035 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2036 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2037 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2038 hash_keys.ports.src = flkeys->ports.src;
2039 hash_keys.ports.dst = flkeys->ports.dst;
2040 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2041 } else {
2042 memset(&hash_keys, 0, sizeof(hash_keys));
2043 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2044 hash_keys.addrs.v6addrs.src = fl6->saddr;
2045 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2046 hash_keys.ports.src = fl6->fl6_sport;
2047 hash_keys.ports.dst = fl6->fl6_dport;
2048 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2049 }
2050 break;
23aebdac 2051 }
9a2a537a 2052 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2053
9a2a537a 2054 return mhash >> 1;
23aebdac
JS
2055}
2056
c71099ac
TG
2057void ip6_route_input(struct sk_buff *skb)
2058{
b71d1d42 2059 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2060 struct net *net = dev_net(skb->dev);
adaa70bb 2061 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2062 struct ip_tunnel_info *tun_info;
4c9483b2 2063 struct flowi6 fl6 = {
e0d56fdd 2064 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2065 .daddr = iph->daddr,
2066 .saddr = iph->saddr,
6502ca52 2067 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2068 .flowi6_mark = skb->mark,
2069 .flowi6_proto = iph->nexthdr,
c71099ac 2070 };
5e5d6fed 2071 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2072
904af04d 2073 tun_info = skb_tunnel_info(skb);
46fa062a 2074 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2075 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2076
2077 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2078 flkeys = &_flkeys;
2079
23aebdac 2080 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2081 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2082 skb_dst_drop(skb);
b75cc8f9
DA
2083 skb_dst_set(skb,
2084 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2085}
2086
b75cc8f9
DA
2087static struct rt6_info *ip6_pol_route_output(struct net *net,
2088 struct fib6_table *table,
2089 struct flowi6 *fl6,
2090 const struct sk_buff *skb,
2091 int flags)
1da177e4 2092{
b75cc8f9 2093 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2094}
2095
6f21c96a
PA
2096struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2097 struct flowi6 *fl6, int flags)
c71099ac 2098{
d46a9d67 2099 bool any_src;
c71099ac 2100
4c1feac5
DA
2101 if (rt6_need_strict(&fl6->daddr)) {
2102 struct dst_entry *dst;
2103
2104 dst = l3mdev_link_scope_lookup(net, fl6);
2105 if (dst)
2106 return dst;
2107 }
ca254490 2108
1fb9489b 2109 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2110
d46a9d67 2111 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2112 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2113 (fl6->flowi6_oif && any_src))
77d16f45 2114 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2115
d46a9d67 2116 if (!any_src)
adaa70bb 2117 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2118 else if (sk)
2119 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2120
b75cc8f9 2121 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2122}
6f21c96a 2123EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2124
2774c131 2125struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2126{
5c1e6aa3 2127 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2128 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2129 struct dst_entry *new = NULL;
2130
1dbe3252 2131 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2132 DST_OBSOLETE_DEAD, 0);
14e50e57 2133 if (rt) {
0a1f5962 2134 rt6_info_init(rt);
81eb8447 2135 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2136
0a1f5962 2137 new = &rt->dst;
14e50e57 2138 new->__use = 1;
352e512c 2139 new->input = dst_discard;
ede2059d 2140 new->output = dst_discard_out;
14e50e57 2141
0a1f5962 2142 dst_copy_metrics(new, &ort->dst);
14e50e57 2143
1dbe3252 2144 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2145 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2146 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2147
2148 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2149#ifdef CONFIG_IPV6_SUBTREES
2150 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2151#endif
14e50e57
DM
2152 }
2153
69ead7af
DM
2154 dst_release(dst_orig);
2155 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2156}
14e50e57 2157
1da177e4
LT
2158/*
2159 * Destination cache support functions
2160 */
2161
8d1c802b 2162static bool fib6_check(struct fib6_info *f6i, u32 cookie)
4b32b5ad 2163{
93531c67
DA
2164 u32 rt_cookie = 0;
2165
8ae86971 2166 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
93531c67
DA
2167 return false;
2168
2169 if (fib6_check_expired(f6i))
2170 return false;
2171
2172 return true;
4b32b5ad
MKL
2173}
2174
a68886a6
DA
2175static struct dst_entry *rt6_check(struct rt6_info *rt,
2176 struct fib6_info *from,
2177 u32 cookie)
3da59bd9 2178{
36143645 2179 u32 rt_cookie = 0;
c5cff856 2180
a68886a6 2181 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
93531c67 2182 rt_cookie != cookie)
3da59bd9
MKL
2183 return NULL;
2184
2185 if (rt6_check_expired(rt))
2186 return NULL;
2187
2188 return &rt->dst;
2189}
2190
a68886a6
DA
2191static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2192 struct fib6_info *from,
2193 u32 cookie)
3da59bd9 2194{
5973fb1e
MKL
2195 if (!__rt6_check_expired(rt) &&
2196 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2197 fib6_check(from, cookie))
3da59bd9
MKL
2198 return &rt->dst;
2199 else
2200 return NULL;
2201}
2202
1da177e4
LT
2203static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2204{
a87b7dc9 2205 struct dst_entry *dst_ret;
a68886a6 2206 struct fib6_info *from;
1da177e4
LT
2207 struct rt6_info *rt;
2208
a87b7dc9
DA
2209 rt = container_of(dst, struct rt6_info, dst);
2210
2211 rcu_read_lock();
1da177e4 2212
6f3118b5
ND
2213 /* All IPV6 dsts are created with ->obsolete set to the value
2214 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2215 * into this function always.
2216 */
e3bc10bd 2217
a68886a6 2218 from = rcu_dereference(rt->from);
4b32b5ad 2219
a68886a6
DA
2220 if (from && (rt->rt6i_flags & RTF_PCPU ||
2221 unlikely(!list_empty(&rt->rt6i_uncached))))
2222 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2223 else
a68886a6 2224 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2225
2226 rcu_read_unlock();
2227
2228 return dst_ret;
1da177e4
LT
2229}
2230
2231static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2232{
2233 struct rt6_info *rt = (struct rt6_info *) dst;
2234
2235 if (rt) {
54c1a859 2236 if (rt->rt6i_flags & RTF_CACHE) {
c3c14da0 2237 rcu_read_lock();
54c1a859 2238 if (rt6_check_expired(rt)) {
93531c67 2239 rt6_remove_exception_rt(rt);
54c1a859
YH
2240 dst = NULL;
2241 }
c3c14da0 2242 rcu_read_unlock();
54c1a859 2243 } else {
1da177e4 2244 dst_release(dst);
54c1a859
YH
2245 dst = NULL;
2246 }
1da177e4 2247 }
54c1a859 2248 return dst;
1da177e4
LT
2249}
2250
2251static void ip6_link_failure(struct sk_buff *skb)
2252{
2253 struct rt6_info *rt;
2254
3ffe533c 2255 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2256
adf30907 2257 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2258 if (rt) {
8a14e46f 2259 rcu_read_lock();
1eb4f758 2260 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0 2261 if (dst_hold_safe(&rt->dst))
93531c67 2262 rt6_remove_exception_rt(rt);
c5cff856 2263 } else {
a68886a6 2264 struct fib6_info *from;
c5cff856
WW
2265 struct fib6_node *fn;
2266
a68886a6
DA
2267 from = rcu_dereference(rt->from);
2268 if (from) {
2269 fn = rcu_dereference(from->fib6_node);
2270 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2271 fn->fn_sernum = -1;
2272 }
1eb4f758 2273 }
8a14e46f 2274 rcu_read_unlock();
1da177e4
LT
2275 }
2276}
2277
6a3e030f
DA
2278static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2279{
a68886a6
DA
2280 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2281 struct fib6_info *from;
2282
2283 rcu_read_lock();
2284 from = rcu_dereference(rt0->from);
2285 if (from)
2286 rt0->dst.expires = from->expires;
2287 rcu_read_unlock();
2288 }
6a3e030f
DA
2289
2290 dst_set_expires(&rt0->dst, timeout);
2291 rt0->rt6i_flags |= RTF_EXPIRES;
2292}
2293
45e4fd26
MKL
2294static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2295{
2296 struct net *net = dev_net(rt->dst.dev);
2297
d4ead6b3 2298 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2299 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2300 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2301}
2302
0d3f6d29
MKL
2303static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2304{
a68886a6
DA
2305 bool from_set;
2306
2307 rcu_read_lock();
2308 from_set = !!rcu_dereference(rt->from);
2309 rcu_read_unlock();
2310
0d3f6d29 2311 return !(rt->rt6i_flags & RTF_CACHE) &&
a68886a6 2312 (rt->rt6i_flags & RTF_PCPU || from_set);
0d3f6d29
MKL
2313}
2314
45e4fd26
MKL
2315static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2316 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2317{
0dec879f 2318 const struct in6_addr *daddr, *saddr;
67ba4152 2319 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2320
19bda36c
XL
2321 if (dst_metric_locked(dst, RTAX_MTU))
2322 return;
2323
0dec879f
JA
2324 if (iph) {
2325 daddr = &iph->daddr;
2326 saddr = &iph->saddr;
2327 } else if (sk) {
2328 daddr = &sk->sk_v6_daddr;
2329 saddr = &inet6_sk(sk)->saddr;
2330 } else {
2331 daddr = NULL;
2332 saddr = NULL;
2333 }
2334 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2335 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2336 if (mtu >= dst_mtu(dst))
2337 return;
9d289715 2338
0d3f6d29 2339 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2340 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2341 /* update rt6_ex->stamp for cache */
2342 if (rt6->rt6i_flags & RTF_CACHE)
2343 rt6_update_exception_stamp_rt(rt6);
0dec879f 2344 } else if (daddr) {
a68886a6 2345 struct fib6_info *from;
45e4fd26
MKL
2346 struct rt6_info *nrt6;
2347
4d85cd0c 2348 rcu_read_lock();
a68886a6
DA
2349 from = rcu_dereference(rt6->from);
2350 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
45e4fd26
MKL
2351 if (nrt6) {
2352 rt6_do_update_pmtu(nrt6, mtu);
a68886a6 2353 if (rt6_insert_exception(nrt6, from))
2b760fcf 2354 dst_release_immediate(&nrt6->dst);
45e4fd26 2355 }
a68886a6 2356 rcu_read_unlock();
1da177e4
LT
2357 }
2358}
2359
45e4fd26
MKL
2360static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2361 struct sk_buff *skb, u32 mtu)
2362{
2363 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2364}
2365
42ae66c8 2366void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2367 int oif, u32 mark, kuid_t uid)
81aded24
DM
2368{
2369 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2370 struct dst_entry *dst;
2371 struct flowi6 fl6;
2372
2373 memset(&fl6, 0, sizeof(fl6));
2374 fl6.flowi6_oif = oif;
1b3c61dc 2375 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2376 fl6.daddr = iph->daddr;
2377 fl6.saddr = iph->saddr;
6502ca52 2378 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2379 fl6.flowi6_uid = uid;
81aded24
DM
2380
2381 dst = ip6_route_output(net, NULL, &fl6);
2382 if (!dst->error)
45e4fd26 2383 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2384 dst_release(dst);
2385}
2386EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2387
2388void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2389{
33c162a9
MKL
2390 struct dst_entry *dst;
2391
81aded24 2392 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2393 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2394
2395 dst = __sk_dst_get(sk);
2396 if (!dst || !dst->obsolete ||
2397 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2398 return;
2399
2400 bh_lock_sock(sk);
2401 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2402 ip6_datagram_dst_update(sk, false);
2403 bh_unlock_sock(sk);
81aded24
DM
2404}
2405EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2406
7d6850f7
AK
2407void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2408 const struct flowi6 *fl6)
2409{
2410#ifdef CONFIG_IPV6_SUBTREES
2411 struct ipv6_pinfo *np = inet6_sk(sk);
2412#endif
2413
2414 ip6_dst_store(sk, dst,
2415 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2416 &sk->sk_v6_daddr : NULL,
2417#ifdef CONFIG_IPV6_SUBTREES
2418 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2419 &np->saddr :
2420#endif
2421 NULL);
2422}
2423
b55b76b2
DJ
2424/* Handle redirects */
2425struct ip6rd_flowi {
2426 struct flowi6 fl6;
2427 struct in6_addr gateway;
2428};
2429
2430static struct rt6_info *__ip6_route_redirect(struct net *net,
2431 struct fib6_table *table,
2432 struct flowi6 *fl6,
b75cc8f9 2433 const struct sk_buff *skb,
b55b76b2
DJ
2434 int flags)
2435{
2436 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
23fb93a4 2437 struct rt6_info *ret = NULL, *rt_cache;
8d1c802b 2438 struct fib6_info *rt;
b55b76b2
DJ
2439 struct fib6_node *fn;
2440
2441 /* Get the "current" route for this destination and
67c408cf 2442 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2443 *
2444 * RFC 4861 specifies that redirects should only be
2445 * accepted if they come from the nexthop to the target.
2446 * Due to the way the routes are chosen, this notion
2447 * is a bit fuzzy and one might need to check all possible
2448 * routes.
2449 */
2450
66f5d6ce 2451 rcu_read_lock();
6454743b 2452 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
b55b76b2 2453restart:
66f5d6ce 2454 for_each_fib6_node_rt_rcu(fn) {
5e670d84 2455 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c 2456 continue;
14895687 2457 if (fib6_check_expired(rt))
b55b76b2 2458 continue;
93c2fb25 2459 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2460 break;
93c2fb25 2461 if (!(rt->fib6_flags & RTF_GATEWAY))
b55b76b2 2462 continue;
5e670d84 2463 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
b55b76b2 2464 continue;
2b760fcf
WW
2465 /* rt_cache's gateway might be different from its 'parent'
2466 * in the case of an ip redirect.
2467 * So we keep searching in the exception table if the gateway
2468 * is different.
2469 */
5e670d84 2470 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2b760fcf
WW
2471 rt_cache = rt6_find_cached_rt(rt,
2472 &fl6->daddr,
2473 &fl6->saddr);
2474 if (rt_cache &&
2475 ipv6_addr_equal(&rdfl->gateway,
2476 &rt_cache->rt6i_gateway)) {
23fb93a4 2477 ret = rt_cache;
2b760fcf
WW
2478 break;
2479 }
b55b76b2 2480 continue;
2b760fcf 2481 }
b55b76b2
DJ
2482 break;
2483 }
2484
2485 if (!rt)
421842ed 2486 rt = net->ipv6.fib6_null_entry;
93c2fb25 2487 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2488 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2489 goto out;
2490 }
2491
421842ed 2492 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2493 fn = fib6_backtrack(fn, &fl6->saddr);
2494 if (fn)
2495 goto restart;
b55b76b2 2496 }
a3c00e46 2497
b0a1ba59 2498out:
23fb93a4 2499 if (ret)
e873e4b9 2500 ip6_hold_safe(net, &ret, true);
23fb93a4
DA
2501 else
2502 ret = ip6_create_rt_rcu(rt);
b55b76b2 2503
66f5d6ce 2504 rcu_read_unlock();
b55b76b2 2505
b65f164d 2506 trace_fib6_table_lookup(net, rt, table, fl6);
23fb93a4 2507 return ret;
b55b76b2
DJ
2508};
2509
2510static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2511 const struct flowi6 *fl6,
2512 const struct sk_buff *skb,
2513 const struct in6_addr *gateway)
b55b76b2
DJ
2514{
2515 int flags = RT6_LOOKUP_F_HAS_SADDR;
2516 struct ip6rd_flowi rdfl;
2517
2518 rdfl.fl6 = *fl6;
2519 rdfl.gateway = *gateway;
2520
b75cc8f9 2521 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2522 flags, __ip6_route_redirect);
2523}
2524
e2d118a1
LC
2525void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2526 kuid_t uid)
3a5ad2ee
DM
2527{
2528 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2529 struct dst_entry *dst;
2530 struct flowi6 fl6;
2531
2532 memset(&fl6, 0, sizeof(fl6));
e374c618 2533 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2534 fl6.flowi6_oif = oif;
2535 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2536 fl6.daddr = iph->daddr;
2537 fl6.saddr = iph->saddr;
6502ca52 2538 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2539 fl6.flowi6_uid = uid;
3a5ad2ee 2540
b75cc8f9 2541 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2542 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2543 dst_release(dst);
2544}
2545EXPORT_SYMBOL_GPL(ip6_redirect);
2546
c92a59ec
DJ
2547void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2548 u32 mark)
2549{
2550 const struct ipv6hdr *iph = ipv6_hdr(skb);
2551 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2552 struct dst_entry *dst;
2553 struct flowi6 fl6;
2554
2555 memset(&fl6, 0, sizeof(fl6));
e374c618 2556 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2557 fl6.flowi6_oif = oif;
2558 fl6.flowi6_mark = mark;
c92a59ec
DJ
2559 fl6.daddr = msg->dest;
2560 fl6.saddr = iph->daddr;
e2d118a1 2561 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2562
b75cc8f9 2563 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2564 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2565 dst_release(dst);
2566}
2567
3a5ad2ee
DM
2568void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2569{
e2d118a1
LC
2570 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2571 sk->sk_uid);
3a5ad2ee
DM
2572}
2573EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2574
0dbaee3b 2575static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2576{
0dbaee3b
DM
2577 struct net_device *dev = dst->dev;
2578 unsigned int mtu = dst_mtu(dst);
2579 struct net *net = dev_net(dev);
2580
1da177e4
LT
2581 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2582
5578689a
DL
2583 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2584 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2585
2586 /*
1ab1457c
YH
2587 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2588 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2589 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2590 * rely only on pmtu discovery"
2591 */
2592 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2593 mtu = IPV6_MAXPLEN;
2594 return mtu;
2595}
2596
ebb762f2 2597static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2598{
d33e4553 2599 struct inet6_dev *idev;
d4ead6b3 2600 unsigned int mtu;
4b32b5ad
MKL
2601
2602 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2603 if (mtu)
30f78d8e 2604 goto out;
618f9bc7
SK
2605
2606 mtu = IPV6_MIN_MTU;
d33e4553
DM
2607
2608 rcu_read_lock();
2609 idev = __in6_dev_get(dst->dev);
2610 if (idev)
2611 mtu = idev->cnf.mtu6;
2612 rcu_read_unlock();
2613
30f78d8e 2614out:
14972cbd
RP
2615 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2616
2617 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2618}
2619
901731b8
DA
2620/* MTU selection:
2621 * 1. mtu on route is locked - use it
2622 * 2. mtu from nexthop exception
2623 * 3. mtu from egress device
2624 *
2625 * based on ip6_dst_mtu_forward and exception logic of
2626 * rt6_find_cached_rt; called with rcu_read_lock
2627 */
2628u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2629 struct in6_addr *saddr)
2630{
2631 struct rt6_exception_bucket *bucket;
2632 struct rt6_exception *rt6_ex;
2633 struct in6_addr *src_key;
2634 struct inet6_dev *idev;
2635 u32 mtu = 0;
2636
2637 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2638 mtu = f6i->fib6_pmtu;
2639 if (mtu)
2640 goto out;
2641 }
2642
2643 src_key = NULL;
2644#ifdef CONFIG_IPV6_SUBTREES
2645 if (f6i->fib6_src.plen)
2646 src_key = saddr;
2647#endif
2648
2649 bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2650 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2651 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2652 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2653
2654 if (likely(!mtu)) {
2655 struct net_device *dev = fib6_info_nh_dev(f6i);
2656
2657 mtu = IPV6_MIN_MTU;
2658 idev = __in6_dev_get(dev);
2659 if (idev && idev->cnf.mtu6 > mtu)
2660 mtu = idev->cnf.mtu6;
2661 }
2662
2663 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2664out:
2665 return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2666}
2667
3b00944c 2668struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2669 struct flowi6 *fl6)
1da177e4 2670{
87a11578 2671 struct dst_entry *dst;
1da177e4
LT
2672 struct rt6_info *rt;
2673 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2674 struct net *net = dev_net(dev);
1da177e4 2675
38308473 2676 if (unlikely(!idev))
122bdf67 2677 return ERR_PTR(-ENODEV);
1da177e4 2678
ad706862 2679 rt = ip6_dst_alloc(net, dev, 0);
38308473 2680 if (unlikely(!rt)) {
1da177e4 2681 in6_dev_put(idev);
87a11578 2682 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2683 goto out;
2684 }
2685
8e2ec639 2686 rt->dst.flags |= DST_HOST;
588753f1 2687 rt->dst.input = ip6_input;
8e2ec639 2688 rt->dst.output = ip6_output;
550bab42 2689 rt->rt6i_gateway = fl6->daddr;
87a11578 2690 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2691 rt->rt6i_dst.plen = 128;
2692 rt->rt6i_idev = idev;
14edd87d 2693 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2694
4c981e28 2695 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2696 * do proper release of the net_device
2697 */
2698 rt6_uncached_list_add(rt);
81eb8447 2699 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2700
87a11578
DM
2701 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2702
1da177e4 2703out:
87a11578 2704 return dst;
1da177e4
LT
2705}
2706
569d3645 2707static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2708{
86393e52 2709 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2710 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2711 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2712 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2713 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2714 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2715 int entries;
7019b78e 2716
fc66f95c 2717 entries = dst_entries_get_fast(ops);
49a18d86 2718 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2719 entries <= rt_max_size)
1da177e4
LT
2720 goto out;
2721
6891a346 2722 net->ipv6.ip6_rt_gc_expire++;
14956643 2723 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2724 entries = dst_entries_get_slow(ops);
2725 if (entries < ops->gc_thresh)
7019b78e 2726 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2727out:
7019b78e 2728 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2729 return entries > rt_max_size;
1da177e4
LT
2730}
2731
8d1c802b 2732static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
d4ead6b3 2733 struct fib6_config *cfg)
e715b6d3 2734{
263243d6 2735 struct dst_metrics *p;
e715b6d3 2736
63159f29 2737 if (!cfg->fc_mx)
e715b6d3
FW
2738 return 0;
2739
263243d6
ED
2740 p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
2741 if (unlikely(!p))
e715b6d3
FW
2742 return -ENOMEM;
2743
263243d6
ED
2744 refcount_set(&p->refcnt, 1);
2745 rt->fib6_metrics = p;
e715b6d3 2746
263243d6 2747 return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
e715b6d3 2748}
1da177e4 2749
8c14586f
DA
2750static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2751 struct fib6_config *cfg,
f4797b33
DA
2752 const struct in6_addr *gw_addr,
2753 u32 tbid, int flags)
8c14586f
DA
2754{
2755 struct flowi6 fl6 = {
2756 .flowi6_oif = cfg->fc_ifindex,
2757 .daddr = *gw_addr,
2758 .saddr = cfg->fc_prefsrc,
2759 };
2760 struct fib6_table *table;
2761 struct rt6_info *rt;
8c14586f 2762
f4797b33 2763 table = fib6_get_table(net, tbid);
8c14586f
DA
2764 if (!table)
2765 return NULL;
2766
2767 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2768 flags |= RT6_LOOKUP_F_HAS_SADDR;
2769
f4797b33 2770 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2771 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2772
2773 /* if table lookup failed, fall back to full lookup */
2774 if (rt == net->ipv6.ip6_null_entry) {
2775 ip6_rt_put(rt);
2776 rt = NULL;
2777 }
2778
2779 return rt;
2780}
2781
fc1e64e1
DA
2782static int ip6_route_check_nh_onlink(struct net *net,
2783 struct fib6_config *cfg,
9fbb704c 2784 const struct net_device *dev,
fc1e64e1
DA
2785 struct netlink_ext_ack *extack)
2786{
44750f84 2787 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2788 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2789 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2790 struct rt6_info *grt;
2791 int err;
2792
2793 err = 0;
2794 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2795 if (grt) {
58e354c0
DA
2796 if (!grt->dst.error &&
2797 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2798 NL_SET_ERR_MSG(extack,
2799 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2800 err = -EINVAL;
2801 }
2802
2803 ip6_rt_put(grt);
2804 }
2805
2806 return err;
2807}
2808
1edce99f
DA
2809static int ip6_route_check_nh(struct net *net,
2810 struct fib6_config *cfg,
2811 struct net_device **_dev,
2812 struct inet6_dev **idev)
2813{
2814 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2815 struct net_device *dev = _dev ? *_dev : NULL;
2816 struct rt6_info *grt = NULL;
2817 int err = -EHOSTUNREACH;
2818
2819 if (cfg->fc_table) {
f4797b33
DA
2820 int flags = RT6_LOOKUP_F_IFACE;
2821
2822 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2823 cfg->fc_table, flags);
1edce99f
DA
2824 if (grt) {
2825 if (grt->rt6i_flags & RTF_GATEWAY ||
2826 (dev && dev != grt->dst.dev)) {
2827 ip6_rt_put(grt);
2828 grt = NULL;
2829 }
2830 }
2831 }
2832
2833 if (!grt)
b75cc8f9 2834 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2835
2836 if (!grt)
2837 goto out;
2838
2839 if (dev) {
2840 if (dev != grt->dst.dev) {
2841 ip6_rt_put(grt);
2842 goto out;
2843 }
2844 } else {
2845 *_dev = dev = grt->dst.dev;
2846 *idev = grt->rt6i_idev;
2847 dev_hold(dev);
2848 in6_dev_hold(grt->rt6i_idev);
2849 }
2850
2851 if (!(grt->rt6i_flags & RTF_GATEWAY))
2852 err = 0;
2853
2854 ip6_rt_put(grt);
2855
2856out:
2857 return err;
2858}
2859
9fbb704c
DA
2860static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2861 struct net_device **_dev, struct inet6_dev **idev,
2862 struct netlink_ext_ack *extack)
2863{
2864 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2865 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2866 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2867 const struct net_device *dev = *_dev;
232378e8 2868 bool need_addr_check = !dev;
9fbb704c
DA
2869 int err = -EINVAL;
2870
2871 /* if gw_addr is local we will fail to detect this in case
2872 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2873 * will return already-added prefix route via interface that
2874 * prefix route was assigned to, which might be non-loopback.
2875 */
232378e8
DA
2876 if (dev &&
2877 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2878 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2879 goto out;
2880 }
2881
2882 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2883 /* IPv6 strictly inhibits using not link-local
2884 * addresses as nexthop address.
2885 * Otherwise, router will not able to send redirects.
2886 * It is very good, but in some (rare!) circumstances
2887 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2888 * some exceptions. --ANK
2889 * We allow IPv4-mapped nexthops to support RFC4798-type
2890 * addressing
2891 */
2892 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2893 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2894 goto out;
2895 }
2896
2897 if (cfg->fc_flags & RTNH_F_ONLINK)
2898 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2899 else
2900 err = ip6_route_check_nh(net, cfg, _dev, idev);
2901
2902 if (err)
2903 goto out;
2904 }
2905
2906 /* reload in case device was changed */
2907 dev = *_dev;
2908
2909 err = -EINVAL;
2910 if (!dev) {
2911 NL_SET_ERR_MSG(extack, "Egress device not specified");
2912 goto out;
2913 } else if (dev->flags & IFF_LOOPBACK) {
2914 NL_SET_ERR_MSG(extack,
2915 "Egress device can not be loopback device for this route");
2916 goto out;
2917 }
232378e8
DA
2918
2919 /* if we did not check gw_addr above, do so now that the
2920 * egress device has been resolved.
2921 */
2922 if (need_addr_check &&
2923 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2924 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2925 goto out;
2926 }
2927
9fbb704c
DA
2928 err = 0;
2929out:
2930 return err;
2931}
2932
8d1c802b 2933static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 2934 gfp_t gfp_flags,
333c4301 2935 struct netlink_ext_ack *extack)
1da177e4 2936{
5578689a 2937 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 2938 struct fib6_info *rt = NULL;
1da177e4
LT
2939 struct net_device *dev = NULL;
2940 struct inet6_dev *idev = NULL;
c71099ac 2941 struct fib6_table *table;
1da177e4 2942 int addr_type;
8c5b83f0 2943 int err = -EINVAL;
1da177e4 2944
557c44be 2945 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2946 if (cfg->fc_flags & RTF_PCPU) {
2947 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2948 goto out;
d5d531cb 2949 }
557c44be 2950
2ea2352e
WW
2951 /* RTF_CACHE is an internal flag; can not be set by userspace */
2952 if (cfg->fc_flags & RTF_CACHE) {
2953 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2954 goto out;
2955 }
2956
e8478e80
DA
2957 if (cfg->fc_type > RTN_MAX) {
2958 NL_SET_ERR_MSG(extack, "Invalid route type");
2959 goto out;
2960 }
2961
d5d531cb
DA
2962 if (cfg->fc_dst_len > 128) {
2963 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2964 goto out;
2965 }
2966 if (cfg->fc_src_len > 128) {
2967 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2968 goto out;
d5d531cb 2969 }
1da177e4 2970#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2971 if (cfg->fc_src_len) {
2972 NL_SET_ERR_MSG(extack,
2973 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2974 goto out;
d5d531cb 2975 }
1da177e4 2976#endif
86872cb5 2977 if (cfg->fc_ifindex) {
1da177e4 2978 err = -ENODEV;
5578689a 2979 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2980 if (!dev)
2981 goto out;
2982 idev = in6_dev_get(dev);
2983 if (!idev)
2984 goto out;
2985 }
2986
86872cb5
TG
2987 if (cfg->fc_metric == 0)
2988 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2989
fc1e64e1
DA
2990 if (cfg->fc_flags & RTNH_F_ONLINK) {
2991 if (!dev) {
2992 NL_SET_ERR_MSG(extack,
2993 "Nexthop device required for onlink");
2994 err = -ENODEV;
2995 goto out;
2996 }
2997
2998 if (!(dev->flags & IFF_UP)) {
2999 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3000 err = -ENETDOWN;
3001 goto out;
3002 }
3003 }
3004
d71314b4 3005 err = -ENOBUFS;
38308473
DM
3006 if (cfg->fc_nlinfo.nlh &&
3007 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 3008 table = fib6_get_table(net, cfg->fc_table);
38308473 3009 if (!table) {
f3213831 3010 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
3011 table = fib6_new_table(net, cfg->fc_table);
3012 }
3013 } else {
3014 table = fib6_new_table(net, cfg->fc_table);
3015 }
38308473
DM
3016
3017 if (!table)
c71099ac 3018 goto out;
c71099ac 3019
93531c67
DA
3020 err = -ENOMEM;
3021 rt = fib6_info_alloc(gfp_flags);
3022 if (!rt)
1da177e4 3023 goto out;
93531c67
DA
3024
3025 if (cfg->fc_flags & RTF_ADDRCONF)
3026 rt->dst_nocount = true;
1da177e4 3027
d4ead6b3
DA
3028 err = ip6_convert_metrics(net, rt, cfg);
3029 if (err < 0)
1da177e4 3030 goto out;
1da177e4 3031
1716a961 3032 if (cfg->fc_flags & RTF_EXPIRES)
14895687 3033 fib6_set_expires(rt, jiffies +
1716a961
G
3034 clock_t_to_jiffies(cfg->fc_expires));
3035 else
14895687 3036 fib6_clean_expires(rt);
1da177e4 3037
86872cb5
TG
3038 if (cfg->fc_protocol == RTPROT_UNSPEC)
3039 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 3040 rt->fib6_protocol = cfg->fc_protocol;
86872cb5
TG
3041
3042 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4 3043
19e42e45
RP
3044 if (cfg->fc_encap) {
3045 struct lwtunnel_state *lwtstate;
3046
30357d7d 3047 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 3048 cfg->fc_encap, AF_INET6, cfg,
9ae28727 3049 &lwtstate, extack);
19e42e45
RP
3050 if (err)
3051 goto out;
5e670d84 3052 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
19e42e45
RP
3053 }
3054
93c2fb25
DA
3055 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3056 rt->fib6_dst.plen = cfg->fc_dst_len;
3057 if (rt->fib6_dst.plen == 128)
3b6761d1 3058 rt->dst_host = true;
e5fd387a 3059
1da177e4 3060#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
3061 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3062 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4
LT
3063#endif
3064
93c2fb25 3065 rt->fib6_metric = cfg->fc_metric;
5e670d84 3066 rt->fib6_nh.nh_weight = 1;
1da177e4 3067
e8478e80 3068 rt->fib6_type = cfg->fc_type;
1da177e4
LT
3069
3070 /* We cannot add true routes via loopback here,
3071 they would result in kernel looping; promote them to reject routes
3072 */
86872cb5 3073 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
3074 (dev && (dev->flags & IFF_LOOPBACK) &&
3075 !(addr_type & IPV6_ADDR_LOOPBACK) &&
3076 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 3077 /* hold loopback dev/idev if we haven't done so. */
5578689a 3078 if (dev != net->loopback_dev) {
1da177e4
LT
3079 if (dev) {
3080 dev_put(dev);
3081 in6_dev_put(idev);
3082 }
5578689a 3083 dev = net->loopback_dev;
1da177e4
LT
3084 dev_hold(dev);
3085 idev = in6_dev_get(dev);
3086 if (!idev) {
3087 err = -ENODEV;
3088 goto out;
3089 }
3090 }
93c2fb25 3091 rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
1da177e4
LT
3092 goto install_route;
3093 }
3094
86872cb5 3095 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
3096 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3097 if (err)
48ed7b26 3098 goto out;
1da177e4 3099
93531c67 3100 rt->fib6_nh.nh_gw = cfg->fc_gateway;
1da177e4
LT
3101 }
3102
3103 err = -ENODEV;
38308473 3104 if (!dev)
1da177e4
LT
3105 goto out;
3106
428604fb
LB
3107 if (idev->cnf.disable_ipv6) {
3108 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3109 err = -EACCES;
3110 goto out;
3111 }
3112
955ec4cb
DA
3113 if (!(dev->flags & IFF_UP)) {
3114 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3115 err = -ENETDOWN;
3116 goto out;
3117 }
3118
c3968a85
DW
3119 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3120 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3121 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3122 err = -EINVAL;
3123 goto out;
3124 }
93c2fb25
DA
3125 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3126 rt->fib6_prefsrc.plen = 128;
c3968a85 3127 } else
93c2fb25 3128 rt->fib6_prefsrc.plen = 0;
c3968a85 3129
93c2fb25 3130 rt->fib6_flags = cfg->fc_flags;
1da177e4
LT
3131
3132install_route:
93c2fb25 3133 if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
5609b80a 3134 !netif_carrier_ok(dev))
5e670d84
DA
3135 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3136 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
93531c67 3137 rt->fib6_nh.nh_dev = dev;
93c2fb25 3138 rt->fib6_table = table;
63152fc0 3139
c346dca1 3140 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 3141
dcd1f572
DA
3142 if (idev)
3143 in6_dev_put(idev);
3144
8c5b83f0 3145 return rt;
6b9ea5a6
RP
3146out:
3147 if (dev)
3148 dev_put(dev);
3149 if (idev)
3150 in6_dev_put(idev);
6b9ea5a6 3151
93531c67 3152 fib6_info_release(rt);
8c5b83f0 3153 return ERR_PTR(err);
6b9ea5a6
RP
3154}
3155
acb54e3c 3156int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
333c4301 3157 struct netlink_ext_ack *extack)
6b9ea5a6 3158{
8d1c802b 3159 struct fib6_info *rt;
6b9ea5a6
RP
3160 int err;
3161
acb54e3c 3162 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3163 if (IS_ERR(rt))
3164 return PTR_ERR(rt);
6b9ea5a6 3165
d4ead6b3 3166 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3167 fib6_info_release(rt);
6b9ea5a6 3168
1da177e4
LT
3169 return err;
3170}
3171
8d1c802b 3172static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3173{
afb1d4b5 3174 struct net *net = info->nl_net;
c71099ac 3175 struct fib6_table *table;
afb1d4b5 3176 int err;
1da177e4 3177
421842ed 3178 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3179 err = -ENOENT;
3180 goto out;
3181 }
6c813a72 3182
93c2fb25 3183 table = rt->fib6_table;
66f5d6ce 3184 spin_lock_bh(&table->tb6_lock);
86872cb5 3185 err = fib6_del(rt, info);
66f5d6ce 3186 spin_unlock_bh(&table->tb6_lock);
1da177e4 3187
6825a26c 3188out:
93531c67 3189 fib6_info_release(rt);
1da177e4
LT
3190 return err;
3191}
3192
8d1c802b 3193int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3194{
afb1d4b5
DA
3195 struct nl_info info = { .nl_net = net };
3196
528c4ceb 3197 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3198}
3199
8d1c802b 3200static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3201{
3202 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3203 struct net *net = info->nl_net;
16a16cd3 3204 struct sk_buff *skb = NULL;
0ae81335 3205 struct fib6_table *table;
e3330039 3206 int err = -ENOENT;
0ae81335 3207
421842ed 3208 if (rt == net->ipv6.fib6_null_entry)
e3330039 3209 goto out_put;
93c2fb25 3210 table = rt->fib6_table;
66f5d6ce 3211 spin_lock_bh(&table->tb6_lock);
0ae81335 3212
93c2fb25 3213 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3214 struct fib6_info *sibling, *next_sibling;
0ae81335 3215
16a16cd3
DA
3216 /* prefer to send a single notification with all hops */
3217 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3218 if (skb) {
3219 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3220
d4ead6b3 3221 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3222 NULL, NULL, 0, RTM_DELROUTE,
3223 info->portid, seq, 0) < 0) {
3224 kfree_skb(skb);
3225 skb = NULL;
3226 } else
3227 info->skip_notify = 1;
3228 }
3229
0ae81335 3230 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3231 &rt->fib6_siblings,
3232 fib6_siblings) {
0ae81335
DA
3233 err = fib6_del(sibling, info);
3234 if (err)
e3330039 3235 goto out_unlock;
0ae81335
DA
3236 }
3237 }
3238
3239 err = fib6_del(rt, info);
e3330039 3240out_unlock:
66f5d6ce 3241 spin_unlock_bh(&table->tb6_lock);
e3330039 3242out_put:
93531c67 3243 fib6_info_release(rt);
16a16cd3
DA
3244
3245 if (skb) {
e3330039 3246 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3247 info->nlh, gfp_any());
3248 }
0ae81335
DA
3249 return err;
3250}
3251
23fb93a4
DA
3252static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3253{
3254 int rc = -ESRCH;
3255
3256 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3257 goto out;
3258
3259 if (cfg->fc_flags & RTF_GATEWAY &&
3260 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3261 goto out;
3262 if (dst_hold_safe(&rt->dst))
3263 rc = rt6_remove_exception_rt(rt);
3264out:
3265 return rc;
3266}
3267
333c4301
DA
3268static int ip6_route_del(struct fib6_config *cfg,
3269 struct netlink_ext_ack *extack)
1da177e4 3270{
8d1c802b 3271 struct rt6_info *rt_cache;
c71099ac 3272 struct fib6_table *table;
8d1c802b 3273 struct fib6_info *rt;
1da177e4 3274 struct fib6_node *fn;
1da177e4
LT
3275 int err = -ESRCH;
3276
5578689a 3277 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3278 if (!table) {
3279 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3280 return err;
d5d531cb 3281 }
c71099ac 3282
66f5d6ce 3283 rcu_read_lock();
1da177e4 3284
c71099ac 3285 fn = fib6_locate(&table->tb6_root,
86872cb5 3286 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3287 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3288 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3289
1da177e4 3290 if (fn) {
66f5d6ce 3291 for_each_fib6_node_rt_rcu(fn) {
2b760fcf 3292 if (cfg->fc_flags & RTF_CACHE) {
23fb93a4
DA
3293 int rc;
3294
2b760fcf
WW
3295 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3296 &cfg->fc_src);
23fb93a4
DA
3297 if (rt_cache) {
3298 rc = ip6_del_cached_rt(rt_cache, cfg);
9e575010
ED
3299 if (rc != -ESRCH) {
3300 rcu_read_unlock();
23fb93a4 3301 return rc;
9e575010 3302 }
23fb93a4
DA
3303 }
3304 continue;
2b760fcf 3305 }
86872cb5 3306 if (cfg->fc_ifindex &&
5e670d84
DA
3307 (!rt->fib6_nh.nh_dev ||
3308 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3309 continue;
86872cb5 3310 if (cfg->fc_flags & RTF_GATEWAY &&
5e670d84 3311 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
1da177e4 3312 continue;
93c2fb25 3313 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3314 continue;
93c2fb25 3315 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3316 continue;
e873e4b9
WW
3317 if (!fib6_info_hold_safe(rt))
3318 continue;
66f5d6ce 3319 rcu_read_unlock();
1da177e4 3320
0ae81335
DA
3321 /* if gateway was specified only delete the one hop */
3322 if (cfg->fc_flags & RTF_GATEWAY)
3323 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3324
3325 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3326 }
3327 }
66f5d6ce 3328 rcu_read_unlock();
1da177e4
LT
3329
3330 return err;
3331}
3332
6700c270 3333static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3334{
a6279458 3335 struct netevent_redirect netevent;
e8599ff4 3336 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3337 struct ndisc_options ndopts;
3338 struct inet6_dev *in6_dev;
3339 struct neighbour *neigh;
a68886a6 3340 struct fib6_info *from;
71bcdba0 3341 struct rd_msg *msg;
6e157b6a
DM
3342 int optlen, on_link;
3343 u8 *lladdr;
e8599ff4 3344
29a3cad5 3345 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3346 optlen -= sizeof(*msg);
e8599ff4
DM
3347
3348 if (optlen < 0) {
6e157b6a 3349 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3350 return;
3351 }
3352
71bcdba0 3353 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3354
71bcdba0 3355 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3356 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3357 return;
3358 }
3359
6e157b6a 3360 on_link = 0;
71bcdba0 3361 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3362 on_link = 1;
71bcdba0 3363 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3364 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3365 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3366 return;
3367 }
3368
3369 in6_dev = __in6_dev_get(skb->dev);
3370 if (!in6_dev)
3371 return;
3372 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3373 return;
3374
3375 /* RFC2461 8.1:
3376 * The IP source address of the Redirect MUST be the same as the current
3377 * first-hop router for the specified ICMP Destination Address.
3378 */
3379
f997c55c 3380 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3381 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3382 return;
3383 }
6e157b6a
DM
3384
3385 lladdr = NULL;
e8599ff4
DM
3386 if (ndopts.nd_opts_tgt_lladdr) {
3387 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3388 skb->dev);
3389 if (!lladdr) {
3390 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3391 return;
3392 }
3393 }
3394
6e157b6a 3395 rt = (struct rt6_info *) dst;
ec13ad1d 3396 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3397 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3398 return;
6e157b6a 3399 }
e8599ff4 3400
6e157b6a
DM
3401 /* Redirect received -> path was valid.
3402 * Look, redirects are sent only in response to data packets,
3403 * so that this nexthop apparently is reachable. --ANK
3404 */
0dec879f 3405 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3406
71bcdba0 3407 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3408 if (!neigh)
3409 return;
a6279458 3410
1da177e4
LT
3411 /*
3412 * We have finally decided to accept it.
3413 */
3414
f997c55c 3415 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3416 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3417 NEIGH_UPDATE_F_OVERRIDE|
3418 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3419 NEIGH_UPDATE_F_ISROUTER)),
3420 NDISC_REDIRECT, &ndopts);
1da177e4 3421
4d85cd0c 3422 rcu_read_lock();
a68886a6 3423 from = rcu_dereference(rt->from);
e873e4b9
WW
3424 /* This fib6_info_hold() is safe here because we hold reference to rt
3425 * and rt already holds reference to fib6_info.
3426 */
8a14e46f 3427 fib6_info_hold(from);
4d85cd0c 3428 rcu_read_unlock();
8a14e46f
DA
3429
3430 nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
38308473 3431 if (!nrt)
1da177e4
LT
3432 goto out;
3433
3434 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3435 if (on_link)
3436 nrt->rt6i_flags &= ~RTF_GATEWAY;
3437
4e3fd7a0 3438 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3439
2b760fcf
WW
3440 /* No need to remove rt from the exception table if rt is
3441 * a cached route because rt6_insert_exception() will
3442 * takes care of it
3443 */
8a14e46f 3444 if (rt6_insert_exception(nrt, from)) {
2b760fcf
WW
3445 dst_release_immediate(&nrt->dst);
3446 goto out;
3447 }
1da177e4 3448
d8d1f30b
CG
3449 netevent.old = &rt->dst;
3450 netevent.new = &nrt->dst;
71bcdba0 3451 netevent.daddr = &msg->dest;
60592833 3452 netevent.neigh = neigh;
8d71740c
TT
3453 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3454
1da177e4 3455out:
8a14e46f 3456 fib6_info_release(from);
e8599ff4 3457 neigh_release(neigh);
6e157b6a
DM
3458}
3459
70ceb4f5 3460#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3461static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3462 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3463 const struct in6_addr *gwaddr,
3464 struct net_device *dev)
70ceb4f5 3465{
830218c1
DA
3466 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3467 int ifindex = dev->ifindex;
70ceb4f5 3468 struct fib6_node *fn;
8d1c802b 3469 struct fib6_info *rt = NULL;
c71099ac
TG
3470 struct fib6_table *table;
3471
830218c1 3472 table = fib6_get_table(net, tb_id);
38308473 3473 if (!table)
c71099ac 3474 return NULL;
70ceb4f5 3475
66f5d6ce 3476 rcu_read_lock();
38fbeeee 3477 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3478 if (!fn)
3479 goto out;
3480
66f5d6ce 3481 for_each_fib6_node_rt_rcu(fn) {
5e670d84 3482 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
70ceb4f5 3483 continue;
93c2fb25 3484 if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
70ceb4f5 3485 continue;
5e670d84 3486 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
70ceb4f5 3487 continue;
e873e4b9
WW
3488 if (!fib6_info_hold_safe(rt))
3489 continue;
70ceb4f5
YH
3490 break;
3491 }
3492out:
66f5d6ce 3493 rcu_read_unlock();
70ceb4f5
YH
3494 return rt;
3495}
3496
8d1c802b 3497static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3498 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3499 const struct in6_addr *gwaddr,
3500 struct net_device *dev,
95c96174 3501 unsigned int pref)
70ceb4f5 3502{
86872cb5 3503 struct fib6_config cfg = {
238fc7ea 3504 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3505 .fc_ifindex = dev->ifindex,
86872cb5
TG
3506 .fc_dst_len = prefixlen,
3507 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3508 RTF_UP | RTF_PREF(pref),
b91d5329 3509 .fc_protocol = RTPROT_RA,
e8478e80 3510 .fc_type = RTN_UNICAST,
15e47304 3511 .fc_nlinfo.portid = 0,
efa2cea0
DL
3512 .fc_nlinfo.nlh = NULL,
3513 .fc_nlinfo.nl_net = net,
86872cb5
TG
3514 };
3515
830218c1 3516 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3517 cfg.fc_dst = *prefix;
3518 cfg.fc_gateway = *gwaddr;
70ceb4f5 3519
e317da96
YH
3520 /* We should treat it as a default route if prefix length is 0. */
3521 if (!prefixlen)
86872cb5 3522 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3523
acb54e3c 3524 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3525
830218c1 3526 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3527}
3528#endif
3529
8d1c802b 3530struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3531 const struct in6_addr *addr,
3532 struct net_device *dev)
1ab1457c 3533{
830218c1 3534 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3535 struct fib6_info *rt;
c71099ac 3536 struct fib6_table *table;
1da177e4 3537
afb1d4b5 3538 table = fib6_get_table(net, tb_id);
38308473 3539 if (!table)
c71099ac 3540 return NULL;
1da177e4 3541
66f5d6ce
WW
3542 rcu_read_lock();
3543 for_each_fib6_node_rt_rcu(&table->tb6_root) {
5e670d84 3544 if (dev == rt->fib6_nh.nh_dev &&
93c2fb25 3545 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
5e670d84 3546 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
1da177e4
LT
3547 break;
3548 }
e873e4b9
WW
3549 if (rt && !fib6_info_hold_safe(rt))
3550 rt = NULL;
66f5d6ce 3551 rcu_read_unlock();
1da177e4
LT
3552 return rt;
3553}
3554
8d1c802b 3555struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3556 const struct in6_addr *gwaddr,
ebacaaa0
YH
3557 struct net_device *dev,
3558 unsigned int pref)
1da177e4 3559{
86872cb5 3560 struct fib6_config cfg = {
ca254490 3561 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3562 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3563 .fc_ifindex = dev->ifindex,
3564 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3565 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3566 .fc_protocol = RTPROT_RA,
e8478e80 3567 .fc_type = RTN_UNICAST,
15e47304 3568 .fc_nlinfo.portid = 0,
5578689a 3569 .fc_nlinfo.nlh = NULL,
afb1d4b5 3570 .fc_nlinfo.nl_net = net,
86872cb5 3571 };
1da177e4 3572
4e3fd7a0 3573 cfg.fc_gateway = *gwaddr;
1da177e4 3574
acb54e3c 3575 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3576 struct fib6_table *table;
3577
3578 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3579 if (table)
3580 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3581 }
1da177e4 3582
afb1d4b5 3583 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3584}
3585
afb1d4b5
DA
3586static void __rt6_purge_dflt_routers(struct net *net,
3587 struct fib6_table *table)
1da177e4 3588{
8d1c802b 3589 struct fib6_info *rt;
1da177e4
LT
3590
3591restart:
66f5d6ce
WW
3592 rcu_read_lock();
3593 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3594 struct net_device *dev = fib6_info_nh_dev(rt);
3595 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3596
93c2fb25 3597 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
e873e4b9
WW
3598 (!idev || idev->cnf.accept_ra != 2) &&
3599 fib6_info_hold_safe(rt)) {
93531c67
DA
3600 rcu_read_unlock();
3601 ip6_del_rt(net, rt);
1da177e4
LT
3602 goto restart;
3603 }
3604 }
66f5d6ce 3605 rcu_read_unlock();
830218c1
DA
3606
3607 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3608}
3609
3610void rt6_purge_dflt_routers(struct net *net)
3611{
3612 struct fib6_table *table;
3613 struct hlist_head *head;
3614 unsigned int h;
3615
3616 rcu_read_lock();
3617
3618 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3619 head = &net->ipv6.fib_table_hash[h];
3620 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3621 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3622 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3623 }
3624 }
3625
3626 rcu_read_unlock();
1da177e4
LT
3627}
3628
5578689a
DL
3629static void rtmsg_to_fib6_config(struct net *net,
3630 struct in6_rtmsg *rtmsg,
86872cb5
TG
3631 struct fib6_config *cfg)
3632{
3633 memset(cfg, 0, sizeof(*cfg));
3634
ca254490
DA
3635 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3636 : RT6_TABLE_MAIN;
86872cb5
TG
3637 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3638 cfg->fc_metric = rtmsg->rtmsg_metric;
3639 cfg->fc_expires = rtmsg->rtmsg_info;
3640 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3641 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3642 cfg->fc_flags = rtmsg->rtmsg_flags;
e8478e80 3643 cfg->fc_type = rtmsg->rtmsg_type;
86872cb5 3644
5578689a 3645 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3646
4e3fd7a0
AD
3647 cfg->fc_dst = rtmsg->rtmsg_dst;
3648 cfg->fc_src = rtmsg->rtmsg_src;
3649 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3650}
3651
5578689a 3652int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3653{
86872cb5 3654 struct fib6_config cfg;
1da177e4
LT
3655 struct in6_rtmsg rtmsg;
3656 int err;
3657
67ba4152 3658 switch (cmd) {
1da177e4
LT
3659 case SIOCADDRT: /* Add a route */
3660 case SIOCDELRT: /* Delete a route */
af31f412 3661 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3662 return -EPERM;
3663 err = copy_from_user(&rtmsg, arg,
3664 sizeof(struct in6_rtmsg));
3665 if (err)
3666 return -EFAULT;
86872cb5 3667
5578689a 3668 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3669
1da177e4
LT
3670 rtnl_lock();
3671 switch (cmd) {
3672 case SIOCADDRT:
acb54e3c 3673 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3674 break;
3675 case SIOCDELRT:
333c4301 3676 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3677 break;
3678 default:
3679 err = -EINVAL;
3680 }
3681 rtnl_unlock();
3682
3683 return err;
3ff50b79 3684 }
1da177e4
LT
3685
3686 return -EINVAL;
3687}
3688
3689/*
3690 * Drop the packet on the floor
3691 */
3692
d5fdd6ba 3693static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3694{
612f09e8 3695 int type;
adf30907 3696 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3697 switch (ipstats_mib_noroutes) {
3698 case IPSTATS_MIB_INNOROUTES:
0660e03f 3699 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3700 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3701 IP6_INC_STATS(dev_net(dst->dev),
3702 __in6_dev_get_safely(skb->dev),
3bd653c8 3703 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3704 break;
3705 }
3706 /* FALLTHROUGH */
3707 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3708 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3709 ipstats_mib_noroutes);
612f09e8
YH
3710 break;
3711 }
3ffe533c 3712 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3713 kfree_skb(skb);
3714 return 0;
3715}
3716
9ce8ade0
TG
3717static int ip6_pkt_discard(struct sk_buff *skb)
3718{
612f09e8 3719 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3720}
3721
ede2059d 3722static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3723{
adf30907 3724 skb->dev = skb_dst(skb)->dev;
612f09e8 3725 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3726}
3727
9ce8ade0
TG
3728static int ip6_pkt_prohibit(struct sk_buff *skb)
3729{
612f09e8 3730 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3731}
3732
ede2059d 3733static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3734{
adf30907 3735 skb->dev = skb_dst(skb)->dev;
612f09e8 3736 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3737}
3738
1da177e4
LT
3739/*
3740 * Allocate a dst for local (unicast / anycast) address.
3741 */
3742
360a9887
DA
3743struct fib6_info *addrconf_f6i_alloc(struct net *net,
3744 struct inet6_dev *idev,
3745 const struct in6_addr *addr,
3746 bool anycast, gfp_t gfp_flags)
1da177e4 3747{
ca254490 3748 u32 tb_id;
4832c30d 3749 struct net_device *dev = idev->dev;
360a9887 3750 struct fib6_info *f6i;
5f02ce24 3751
360a9887
DA
3752 f6i = fib6_info_alloc(gfp_flags);
3753 if (!f6i)
1da177e4
LT
3754 return ERR_PTR(-ENOMEM);
3755
360a9887 3756 f6i->dst_nocount = true;
360a9887
DA
3757 f6i->dst_host = true;
3758 f6i->fib6_protocol = RTPROT_KERNEL;
3759 f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
e8478e80 3760 if (anycast) {
360a9887
DA
3761 f6i->fib6_type = RTN_ANYCAST;
3762 f6i->fib6_flags |= RTF_ANYCAST;
e8478e80 3763 } else {
360a9887
DA
3764 f6i->fib6_type = RTN_LOCAL;
3765 f6i->fib6_flags |= RTF_LOCAL;
e8478e80 3766 }
1da177e4 3767
360a9887 3768 f6i->fib6_nh.nh_gw = *addr;
93531c67 3769 dev_hold(dev);
360a9887
DA
3770 f6i->fib6_nh.nh_dev = dev;
3771 f6i->fib6_dst.addr = *addr;
3772 f6i->fib6_dst.plen = 128;
ca254490 3773 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
360a9887 3774 f6i->fib6_table = fib6_get_table(net, tb_id);
1da177e4 3775
360a9887 3776 return f6i;
1da177e4
LT
3777}
3778
c3968a85
DW
3779/* remove deleted ip from prefsrc entries */
3780struct arg_dev_net_ip {
3781 struct net_device *dev;
3782 struct net *net;
3783 struct in6_addr *addr;
3784};
3785
8d1c802b 3786static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
3787{
3788 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3789 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3790 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3791
5e670d84 3792 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
421842ed 3793 rt != net->ipv6.fib6_null_entry &&
93c2fb25 3794 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 3795 spin_lock_bh(&rt6_exception_lock);
c3968a85 3796 /* remove prefsrc entry */
93c2fb25 3797 rt->fib6_prefsrc.plen = 0;
60006a48
WW
3798 /* need to update cache as well */
3799 rt6_exceptions_remove_prefsrc(rt);
3800 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3801 }
3802 return 0;
3803}
3804
3805void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3806{
3807 struct net *net = dev_net(ifp->idev->dev);
3808 struct arg_dev_net_ip adni = {
3809 .dev = ifp->idev->dev,
3810 .net = net,
3811 .addr = &ifp->addr,
3812 };
0c3584d5 3813 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3814}
3815
be7a010d 3816#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3817
3818/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 3819static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
3820{
3821 struct in6_addr *gateway = (struct in6_addr *)arg;
3822
93c2fb25 3823 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
5e670d84 3824 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
be7a010d
DJ
3825 return -1;
3826 }
b16cb459
WW
3827
3828 /* Further clean up cached routes in exception table.
3829 * This is needed because cached route may have a different
3830 * gateway than its 'parent' in the case of an ip redirect.
3831 */
3832 rt6_exceptions_clean_tohost(rt, gateway);
3833
be7a010d
DJ
3834 return 0;
3835}
3836
3837void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3838{
3839 fib6_clean_all(net, fib6_clean_tohost, gateway);
3840}
3841
2127d95a
IS
3842struct arg_netdev_event {
3843 const struct net_device *dev;
4c981e28
IS
3844 union {
3845 unsigned int nh_flags;
3846 unsigned long event;
3847 };
2127d95a
IS
3848};
3849
8d1c802b 3850static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 3851{
8d1c802b 3852 struct fib6_info *iter;
d7dedee1
IS
3853 struct fib6_node *fn;
3854
93c2fb25
DA
3855 fn = rcu_dereference_protected(rt->fib6_node,
3856 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3857 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 3858 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3859 while (iter) {
93c2fb25 3860 if (iter->fib6_metric == rt->fib6_metric &&
33bd5ac5 3861 rt6_qualify_for_ecmp(iter))
d7dedee1 3862 return iter;
8fb11a9a 3863 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 3864 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
3865 }
3866
3867 return NULL;
3868}
3869
8d1c802b 3870static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 3871{
5e670d84
DA
3872 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3873 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
dcd1f572 3874 fib6_ignore_linkdown(rt)))
d7dedee1
IS
3875 return true;
3876
3877 return false;
3878}
3879
8d1c802b 3880static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 3881{
8d1c802b 3882 struct fib6_info *iter;
d7dedee1
IS
3883 int total = 0;
3884
3885 if (!rt6_is_dead(rt))
5e670d84 3886 total += rt->fib6_nh.nh_weight;
d7dedee1 3887
93c2fb25 3888 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 3889 if (!rt6_is_dead(iter))
5e670d84 3890 total += iter->fib6_nh.nh_weight;
d7dedee1
IS
3891 }
3892
3893 return total;
3894}
3895
8d1c802b 3896static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
3897{
3898 int upper_bound = -1;
3899
3900 if (!rt6_is_dead(rt)) {
5e670d84 3901 *weight += rt->fib6_nh.nh_weight;
d7dedee1
IS
3902 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3903 total) - 1;
3904 }
5e670d84 3905 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
d7dedee1
IS
3906}
3907
8d1c802b 3908static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 3909{
8d1c802b 3910 struct fib6_info *iter;
d7dedee1
IS
3911 int weight = 0;
3912
3913 rt6_upper_bound_set(rt, &weight, total);
3914
93c2fb25 3915 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
3916 rt6_upper_bound_set(iter, &weight, total);
3917}
3918
8d1c802b 3919void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 3920{
8d1c802b 3921 struct fib6_info *first;
d7dedee1
IS
3922 int total;
3923
3924 /* In case the entire multipath route was marked for flushing,
3925 * then there is no need to rebalance upon the removal of every
3926 * sibling route.
3927 */
93c2fb25 3928 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
3929 return;
3930
3931 /* During lookup routes are evaluated in order, so we need to
3932 * make sure upper bounds are assigned from the first sibling
3933 * onwards.
3934 */
3935 first = rt6_multipath_first_sibling(rt);
3936 if (WARN_ON_ONCE(!first))
3937 return;
3938
3939 total = rt6_multipath_total_weight(first);
3940 rt6_multipath_upper_bound_set(first, total);
3941}
3942
8d1c802b 3943static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
3944{
3945 const struct arg_netdev_event *arg = p_arg;
7aef6859 3946 struct net *net = dev_net(arg->dev);
2127d95a 3947
421842ed 3948 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
5e670d84 3949 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
7aef6859 3950 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3951 rt6_multipath_rebalance(rt);
1de178ed 3952 }
2127d95a
IS
3953
3954 return 0;
3955}
3956
3957void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3958{
3959 struct arg_netdev_event arg = {
3960 .dev = dev,
6802f3ad
IS
3961 {
3962 .nh_flags = nh_flags,
3963 },
2127d95a
IS
3964 };
3965
3966 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3967 arg.nh_flags |= RTNH_F_LINKDOWN;
3968
3969 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3970}
3971
8d1c802b 3972static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
3973 const struct net_device *dev)
3974{
8d1c802b 3975 struct fib6_info *iter;
1de178ed 3976
5e670d84 3977 if (rt->fib6_nh.nh_dev == dev)
1de178ed 3978 return true;
93c2fb25 3979 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84 3980 if (iter->fib6_nh.nh_dev == dev)
1de178ed
IS
3981 return true;
3982
3983 return false;
3984}
3985
8d1c802b 3986static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 3987{
8d1c802b 3988 struct fib6_info *iter;
1de178ed
IS
3989
3990 rt->should_flush = 1;
93c2fb25 3991 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
3992 iter->should_flush = 1;
3993}
3994
8d1c802b 3995static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
3996 const struct net_device *down_dev)
3997{
8d1c802b 3998 struct fib6_info *iter;
1de178ed
IS
3999 unsigned int dead = 0;
4000
5e670d84
DA
4001 if (rt->fib6_nh.nh_dev == down_dev ||
4002 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed 4003 dead++;
93c2fb25 4004 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
4005 if (iter->fib6_nh.nh_dev == down_dev ||
4006 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
4007 dead++;
4008
4009 return dead;
4010}
4011
8d1c802b 4012static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed
IS
4013 const struct net_device *dev,
4014 unsigned int nh_flags)
4015{
8d1c802b 4016 struct fib6_info *iter;
1de178ed 4017
5e670d84
DA
4018 if (rt->fib6_nh.nh_dev == dev)
4019 rt->fib6_nh.nh_flags |= nh_flags;
93c2fb25 4020 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
4021 if (iter->fib6_nh.nh_dev == dev)
4022 iter->fib6_nh.nh_flags |= nh_flags;
1de178ed
IS
4023}
4024
a1a22c12 4025/* called with write lock held for table with rt */
8d1c802b 4026static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 4027{
4c981e28
IS
4028 const struct arg_netdev_event *arg = p_arg;
4029 const struct net_device *dev = arg->dev;
7aef6859 4030 struct net *net = dev_net(dev);
8ed67789 4031
421842ed 4032 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
4033 return 0;
4034
4035 switch (arg->event) {
4036 case NETDEV_UNREGISTER:
5e670d84 4037 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
27c6fa73 4038 case NETDEV_DOWN:
1de178ed 4039 if (rt->should_flush)
27c6fa73 4040 return -1;
93c2fb25 4041 if (!rt->fib6_nsiblings)
5e670d84 4042 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
1de178ed
IS
4043 if (rt6_multipath_uses_dev(rt, dev)) {
4044 unsigned int count;
4045
4046 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 4047 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
4048 rt6_multipath_flush(rt);
4049 return -1;
4050 }
4051 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4052 RTNH_F_LINKDOWN);
7aef6859 4053 fib6_update_sernum(net, rt);
d7dedee1 4054 rt6_multipath_rebalance(rt);
1de178ed
IS
4055 }
4056 return -2;
27c6fa73 4057 case NETDEV_CHANGE:
5e670d84 4058 if (rt->fib6_nh.nh_dev != dev ||
93c2fb25 4059 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 4060 break;
5e670d84 4061 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 4062 rt6_multipath_rebalance(rt);
27c6fa73 4063 break;
2b241361 4064 }
c159d30c 4065
1da177e4
LT
4066 return 0;
4067}
4068
27c6fa73 4069void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 4070{
4c981e28 4071 struct arg_netdev_event arg = {
8ed67789 4072 .dev = dev,
6802f3ad
IS
4073 {
4074 .event = event,
4075 },
8ed67789
DL
4076 };
4077
4c981e28
IS
4078 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
4079}
4080
4081void rt6_disable_ip(struct net_device *dev, unsigned long event)
4082{
4083 rt6_sync_down_dev(dev, event);
4084 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4085 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4086}
4087
95c96174 4088struct rt6_mtu_change_arg {
1da177e4 4089 struct net_device *dev;
95c96174 4090 unsigned int mtu;
1da177e4
LT
4091};
4092
8d1c802b 4093static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4094{
4095 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4096 struct inet6_dev *idev;
4097
4098 /* In IPv6 pmtu discovery is not optional,
4099 so that RTAX_MTU lock cannot disable it.
4100 We still use this lock to block changes
4101 caused by addrconf/ndisc.
4102 */
4103
4104 idev = __in6_dev_get(arg->dev);
38308473 4105 if (!idev)
1da177e4
LT
4106 return 0;
4107
4108 /* For administrative MTU increase, there is no way to discover
4109 IPv6 PMTU increase, so PMTU increase should be updated here.
4110 Since RFC 1981 doesn't include administrative MTU increase
4111 update PMTU increase is a MUST. (i.e. jumbo frame)
4112 */
5e670d84 4113 if (rt->fib6_nh.nh_dev == arg->dev &&
d4ead6b3
DA
4114 !fib6_metric_locked(rt, RTAX_MTU)) {
4115 u32 mtu = rt->fib6_pmtu;
4116
4117 if (mtu >= arg->mtu ||
4118 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4119 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4120
f5bbe7ee 4121 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4122 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4123 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4124 }
1da177e4
LT
4125 return 0;
4126}
4127
95c96174 4128void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4129{
c71099ac
TG
4130 struct rt6_mtu_change_arg arg = {
4131 .dev = dev,
4132 .mtu = mtu,
4133 };
1da177e4 4134
0c3584d5 4135 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4136}
4137
ef7c79ed 4138static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4139 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 4140 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 4141 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4142 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4143 [RTA_PRIORITY] = { .type = NLA_U32 },
4144 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4145 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4146 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4147 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4148 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4149 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4150 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4151 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 4152 [RTA_TABLE] = { .type = NLA_U32 },
eacb9384
RP
4153 [RTA_IP_PROTO] = { .type = NLA_U8 },
4154 [RTA_SPORT] = { .type = NLA_U16 },
4155 [RTA_DPORT] = { .type = NLA_U16 },
86872cb5
TG
4156};
4157
4158static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4159 struct fib6_config *cfg,
4160 struct netlink_ext_ack *extack)
1da177e4 4161{
86872cb5
TG
4162 struct rtmsg *rtm;
4163 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4164 unsigned int pref;
86872cb5 4165 int err;
1da177e4 4166
fceb6435
JB
4167 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4168 NULL);
86872cb5
TG
4169 if (err < 0)
4170 goto errout;
1da177e4 4171
86872cb5
TG
4172 err = -EINVAL;
4173 rtm = nlmsg_data(nlh);
4174 memset(cfg, 0, sizeof(*cfg));
4175
4176 cfg->fc_table = rtm->rtm_table;
4177 cfg->fc_dst_len = rtm->rtm_dst_len;
4178 cfg->fc_src_len = rtm->rtm_src_len;
4179 cfg->fc_flags = RTF_UP;
4180 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 4181 cfg->fc_type = rtm->rtm_type;
86872cb5 4182
ef2c7d7b
ND
4183 if (rtm->rtm_type == RTN_UNREACHABLE ||
4184 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4185 rtm->rtm_type == RTN_PROHIBIT ||
4186 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4187 cfg->fc_flags |= RTF_REJECT;
4188
ab79ad14
4189 if (rtm->rtm_type == RTN_LOCAL)
4190 cfg->fc_flags |= RTF_LOCAL;
4191
1f56a01f
MKL
4192 if (rtm->rtm_flags & RTM_F_CLONED)
4193 cfg->fc_flags |= RTF_CACHE;
4194
fc1e64e1
DA
4195 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4196
15e47304 4197 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 4198 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 4199 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
4200
4201 if (tb[RTA_GATEWAY]) {
67b61f6c 4202 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4203 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4204 }
86872cb5
TG
4205
4206 if (tb[RTA_DST]) {
4207 int plen = (rtm->rtm_dst_len + 7) >> 3;
4208
4209 if (nla_len(tb[RTA_DST]) < plen)
4210 goto errout;
4211
4212 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4213 }
86872cb5
TG
4214
4215 if (tb[RTA_SRC]) {
4216 int plen = (rtm->rtm_src_len + 7) >> 3;
4217
4218 if (nla_len(tb[RTA_SRC]) < plen)
4219 goto errout;
4220
4221 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4222 }
86872cb5 4223
c3968a85 4224 if (tb[RTA_PREFSRC])
67b61f6c 4225 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4226
86872cb5
TG
4227 if (tb[RTA_OIF])
4228 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4229
4230 if (tb[RTA_PRIORITY])
4231 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4232
4233 if (tb[RTA_METRICS]) {
4234 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4235 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4236 }
86872cb5
TG
4237
4238 if (tb[RTA_TABLE])
4239 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4240
51ebd318
ND
4241 if (tb[RTA_MULTIPATH]) {
4242 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4243 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4244
4245 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4246 cfg->fc_mp_len, extack);
9ed59592
DA
4247 if (err < 0)
4248 goto errout;
51ebd318
ND
4249 }
4250
c78ba6d6
LR
4251 if (tb[RTA_PREF]) {
4252 pref = nla_get_u8(tb[RTA_PREF]);
4253 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4254 pref != ICMPV6_ROUTER_PREF_HIGH)
4255 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4256 cfg->fc_flags |= RTF_PREF(pref);
4257 }
4258
19e42e45
RP
4259 if (tb[RTA_ENCAP])
4260 cfg->fc_encap = tb[RTA_ENCAP];
4261
9ed59592 4262 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4263 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4264
c255bd68 4265 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4266 if (err < 0)
4267 goto errout;
4268 }
4269
32bc201e
XL
4270 if (tb[RTA_EXPIRES]) {
4271 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4272
4273 if (addrconf_finite_timeout(timeout)) {
4274 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4275 cfg->fc_flags |= RTF_EXPIRES;
4276 }
4277 }
4278
86872cb5
TG
4279 err = 0;
4280errout:
4281 return err;
1da177e4
LT
4282}
4283
6b9ea5a6 4284struct rt6_nh {
8d1c802b 4285 struct fib6_info *fib6_info;
6b9ea5a6 4286 struct fib6_config r_cfg;
6b9ea5a6
RP
4287 struct list_head next;
4288};
4289
4290static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4291{
4292 struct rt6_nh *nh;
4293
4294 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4295 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4296 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4297 nh->r_cfg.fc_ifindex);
4298 }
4299}
4300
d4ead6b3
DA
4301static int ip6_route_info_append(struct net *net,
4302 struct list_head *rt6_nh_list,
8d1c802b
DA
4303 struct fib6_info *rt,
4304 struct fib6_config *r_cfg)
6b9ea5a6
RP
4305{
4306 struct rt6_nh *nh;
6b9ea5a6
RP
4307 int err = -EEXIST;
4308
4309 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4310 /* check if fib6_info already exists */
4311 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4312 return err;
4313 }
4314
4315 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4316 if (!nh)
4317 return -ENOMEM;
8d1c802b 4318 nh->fib6_info = rt;
d4ead6b3 4319 err = ip6_convert_metrics(net, rt, r_cfg);
6b9ea5a6
RP
4320 if (err) {
4321 kfree(nh);
4322 return err;
4323 }
4324 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4325 list_add_tail(&nh->next, rt6_nh_list);
4326
4327 return 0;
4328}
4329
8d1c802b
DA
4330static void ip6_route_mpath_notify(struct fib6_info *rt,
4331 struct fib6_info *rt_last,
3b1137fe
DA
4332 struct nl_info *info,
4333 __u16 nlflags)
4334{
4335 /* if this is an APPEND route, then rt points to the first route
4336 * inserted and rt_last points to last route inserted. Userspace
4337 * wants a consistent dump of the route which starts at the first
4338 * nexthop. Since sibling routes are always added at the end of
4339 * the list, find the first sibling of the last route appended
4340 */
93c2fb25
DA
4341 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4342 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4343 struct fib6_info,
93c2fb25 4344 fib6_siblings);
3b1137fe
DA
4345 }
4346
4347 if (rt)
4348 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4349}
4350
333c4301
DA
4351static int ip6_route_multipath_add(struct fib6_config *cfg,
4352 struct netlink_ext_ack *extack)
51ebd318 4353{
8d1c802b 4354 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4355 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4356 struct fib6_config r_cfg;
4357 struct rtnexthop *rtnh;
8d1c802b 4358 struct fib6_info *rt;
6b9ea5a6
RP
4359 struct rt6_nh *err_nh;
4360 struct rt6_nh *nh, *nh_safe;
3b1137fe 4361 __u16 nlflags;
51ebd318
ND
4362 int remaining;
4363 int attrlen;
6b9ea5a6
RP
4364 int err = 1;
4365 int nhn = 0;
4366 int replace = (cfg->fc_nlinfo.nlh &&
4367 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4368 LIST_HEAD(rt6_nh_list);
51ebd318 4369
3b1137fe
DA
4370 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4371 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4372 nlflags |= NLM_F_APPEND;
4373
35f1b4e9 4374 remaining = cfg->fc_mp_len;
51ebd318 4375 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4376
6b9ea5a6 4377 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4378 * fib6_info structs per nexthop
6b9ea5a6 4379 */
51ebd318
ND
4380 while (rtnh_ok(rtnh, remaining)) {
4381 memcpy(&r_cfg, cfg, sizeof(*cfg));
4382 if (rtnh->rtnh_ifindex)
4383 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4384
4385 attrlen = rtnh_attrlen(rtnh);
4386 if (attrlen > 0) {
4387 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4388
4389 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4390 if (nla) {
67b61f6c 4391 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4392 r_cfg.fc_flags |= RTF_GATEWAY;
4393 }
19e42e45
RP
4394 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4395 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4396 if (nla)
4397 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4398 }
6b9ea5a6 4399
68e2ffde 4400 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4401 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4402 if (IS_ERR(rt)) {
4403 err = PTR_ERR(rt);
4404 rt = NULL;
6b9ea5a6 4405 goto cleanup;
8c5b83f0 4406 }
b5d2d75e
DA
4407 if (!rt6_qualify_for_ecmp(rt)) {
4408 err = -EINVAL;
4409 NL_SET_ERR_MSG(extack,
4410 "Device only routes can not be added for IPv6 using the multipath API.");
4411 fib6_info_release(rt);
4412 goto cleanup;
4413 }
6b9ea5a6 4414
5e670d84 4415 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
398958ae 4416
d4ead6b3
DA
4417 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4418 rt, &r_cfg);
51ebd318 4419 if (err) {
93531c67 4420 fib6_info_release(rt);
6b9ea5a6
RP
4421 goto cleanup;
4422 }
4423
4424 rtnh = rtnh_next(rtnh, &remaining);
4425 }
4426
3b1137fe
DA
4427 /* for add and replace send one notification with all nexthops.
4428 * Skip the notification in fib6_add_rt2node and send one with
4429 * the full route when done
4430 */
4431 info->skip_notify = 1;
4432
6b9ea5a6
RP
4433 err_nh = NULL;
4434 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4435 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4436 fib6_info_release(nh->fib6_info);
93531c67 4437
f7225172
DA
4438 if (!err) {
4439 /* save reference to last route successfully inserted */
4440 rt_last = nh->fib6_info;
4441
4442 /* save reference to first route for notification */
4443 if (!rt_notif)
4444 rt_notif = nh->fib6_info;
4445 }
3b1137fe 4446
8d1c802b
DA
4447 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4448 nh->fib6_info = NULL;
6b9ea5a6
RP
4449 if (err) {
4450 if (replace && nhn)
4451 ip6_print_replace_route_err(&rt6_nh_list);
4452 err_nh = nh;
4453 goto add_errout;
51ebd318 4454 }
6b9ea5a6 4455
1a72418b 4456 /* Because each route is added like a single route we remove
27596472
MK
4457 * these flags after the first nexthop: if there is a collision,
4458 * we have already failed to add the first nexthop:
4459 * fib6_add_rt2node() has rejected it; when replacing, old
4460 * nexthops have been replaced by first new, the rest should
4461 * be added to it.
1a72418b 4462 */
27596472
MK
4463 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4464 NLM_F_REPLACE);
6b9ea5a6
RP
4465 nhn++;
4466 }
4467
3b1137fe
DA
4468 /* success ... tell user about new route */
4469 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4470 goto cleanup;
4471
4472add_errout:
3b1137fe
DA
4473 /* send notification for routes that were added so that
4474 * the delete notifications sent by ip6_route_del are
4475 * coherent
4476 */
4477 if (rt_notif)
4478 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4479
6b9ea5a6
RP
4480 /* Delete routes that were already added */
4481 list_for_each_entry(nh, &rt6_nh_list, next) {
4482 if (err_nh == nh)
4483 break;
333c4301 4484 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4485 }
4486
4487cleanup:
4488 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4489 if (nh->fib6_info)
4490 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4491 list_del(&nh->next);
4492 kfree(nh);
4493 }
4494
4495 return err;
4496}
4497
333c4301
DA
4498static int ip6_route_multipath_del(struct fib6_config *cfg,
4499 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4500{
4501 struct fib6_config r_cfg;
4502 struct rtnexthop *rtnh;
4503 int remaining;
4504 int attrlen;
4505 int err = 1, last_err = 0;
4506
4507 remaining = cfg->fc_mp_len;
4508 rtnh = (struct rtnexthop *)cfg->fc_mp;
4509
4510 /* Parse a Multipath Entry */
4511 while (rtnh_ok(rtnh, remaining)) {
4512 memcpy(&r_cfg, cfg, sizeof(*cfg));
4513 if (rtnh->rtnh_ifindex)
4514 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4515
4516 attrlen = rtnh_attrlen(rtnh);
4517 if (attrlen > 0) {
4518 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4519
4520 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4521 if (nla) {
4522 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4523 r_cfg.fc_flags |= RTF_GATEWAY;
4524 }
4525 }
333c4301 4526 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4527 if (err)
4528 last_err = err;
4529
51ebd318
ND
4530 rtnh = rtnh_next(rtnh, &remaining);
4531 }
4532
4533 return last_err;
4534}
4535
c21ef3e3
DA
4536static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4537 struct netlink_ext_ack *extack)
1da177e4 4538{
86872cb5
TG
4539 struct fib6_config cfg;
4540 int err;
1da177e4 4541
333c4301 4542 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4543 if (err < 0)
4544 return err;
4545
51ebd318 4546 if (cfg.fc_mp)
333c4301 4547 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4548 else {
4549 cfg.fc_delete_all_nh = 1;
333c4301 4550 return ip6_route_del(&cfg, extack);
0ae81335 4551 }
1da177e4
LT
4552}
4553
c21ef3e3
DA
4554static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4555 struct netlink_ext_ack *extack)
1da177e4 4556{
86872cb5
TG
4557 struct fib6_config cfg;
4558 int err;
1da177e4 4559
333c4301 4560 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4561 if (err < 0)
4562 return err;
4563
51ebd318 4564 if (cfg.fc_mp)
333c4301 4565 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4566 else
acb54e3c 4567 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4568}
4569
8d1c802b 4570static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4571{
beb1afac
DA
4572 int nexthop_len = 0;
4573
93c2fb25 4574 if (rt->fib6_nsiblings) {
beb1afac
DA
4575 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4576 + NLA_ALIGN(sizeof(struct rtnexthop))
4577 + nla_total_size(16) /* RTA_GATEWAY */
5e670d84 4578 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
beb1afac 4579
93c2fb25 4580 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4581 }
4582
339bf98f
TG
4583 return NLMSG_ALIGN(sizeof(struct rtmsg))
4584 + nla_total_size(16) /* RTA_SRC */
4585 + nla_total_size(16) /* RTA_DST */
4586 + nla_total_size(16) /* RTA_GATEWAY */
4587 + nla_total_size(16) /* RTA_PREFSRC */
4588 + nla_total_size(4) /* RTA_TABLE */
4589 + nla_total_size(4) /* RTA_IIF */
4590 + nla_total_size(4) /* RTA_OIF */
4591 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4592 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4593 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4594 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4595 + nla_total_size(1) /* RTA_PREF */
5e670d84 4596 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
beb1afac
DA
4597 + nexthop_len;
4598}
4599
8d1c802b 4600static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
5be083ce 4601 unsigned int *flags, bool skip_oif)
beb1afac 4602{
5e670d84 4603 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
f9d882ea
IS
4604 *flags |= RTNH_F_DEAD;
4605
5e670d84 4606 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
beb1afac 4607 *flags |= RTNH_F_LINKDOWN;
dcd1f572
DA
4608
4609 rcu_read_lock();
4610 if (fib6_ignore_linkdown(rt))
beb1afac 4611 *flags |= RTNH_F_DEAD;
dcd1f572 4612 rcu_read_unlock();
beb1afac
DA
4613 }
4614
93c2fb25 4615 if (rt->fib6_flags & RTF_GATEWAY) {
5e670d84 4616 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
beb1afac
DA
4617 goto nla_put_failure;
4618 }
4619
5e670d84
DA
4620 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4621 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4622 *flags |= RTNH_F_OFFLOAD;
4623
5be083ce 4624 /* not needed for multipath encoding b/c it has a rtnexthop struct */
5e670d84
DA
4625 if (!skip_oif && rt->fib6_nh.nh_dev &&
4626 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
beb1afac
DA
4627 goto nla_put_failure;
4628
5e670d84
DA
4629 if (rt->fib6_nh.nh_lwtstate &&
4630 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
beb1afac
DA
4631 goto nla_put_failure;
4632
4633 return 0;
4634
4635nla_put_failure:
4636 return -EMSGSIZE;
4637}
4638
5be083ce 4639/* add multipath next hop */
8d1c802b 4640static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
beb1afac 4641{
5e670d84 4642 const struct net_device *dev = rt->fib6_nh.nh_dev;
beb1afac
DA
4643 struct rtnexthop *rtnh;
4644 unsigned int flags = 0;
4645
4646 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4647 if (!rtnh)
4648 goto nla_put_failure;
4649
5e670d84
DA
4650 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4651 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
beb1afac 4652
5be083ce 4653 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4654 goto nla_put_failure;
4655
4656 rtnh->rtnh_flags = flags;
4657
4658 /* length of rtnetlink header + attributes */
4659 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4660
4661 return 0;
4662
4663nla_put_failure:
4664 return -EMSGSIZE;
339bf98f
TG
4665}
4666
d4ead6b3 4667static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4668 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4669 struct in6_addr *dest, struct in6_addr *src,
15e47304 4670 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4671 unsigned int flags)
1da177e4
LT
4672{
4673 struct rtmsg *rtm;
2d7202bf 4674 struct nlmsghdr *nlh;
d4ead6b3
DA
4675 long expires = 0;
4676 u32 *pmetrics;
9e762a4a 4677 u32 table;
1da177e4 4678
15e47304 4679 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4680 if (!nlh)
26932566 4681 return -EMSGSIZE;
2d7202bf
TG
4682
4683 rtm = nlmsg_data(nlh);
1da177e4 4684 rtm->rtm_family = AF_INET6;
93c2fb25
DA
4685 rtm->rtm_dst_len = rt->fib6_dst.plen;
4686 rtm->rtm_src_len = rt->fib6_src.plen;
1da177e4 4687 rtm->rtm_tos = 0;
93c2fb25
DA
4688 if (rt->fib6_table)
4689 table = rt->fib6_table->tb6_id;
c71099ac 4690 else
9e762a4a
PM
4691 table = RT6_TABLE_UNSPEC;
4692 rtm->rtm_table = table;
c78679e8
DM
4693 if (nla_put_u32(skb, RTA_TABLE, table))
4694 goto nla_put_failure;
e8478e80
DA
4695
4696 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4697 rtm->rtm_flags = 0;
4698 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4699 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4700
93c2fb25 4701 if (rt->fib6_flags & RTF_CACHE)
1da177e4
LT
4702 rtm->rtm_flags |= RTM_F_CLONED;
4703
d4ead6b3
DA
4704 if (dest) {
4705 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4706 goto nla_put_failure;
1ab1457c 4707 rtm->rtm_dst_len = 128;
1da177e4 4708 } else if (rtm->rtm_dst_len)
93c2fb25 4709 if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr))
c78679e8 4710 goto nla_put_failure;
1da177e4
LT
4711#ifdef CONFIG_IPV6_SUBTREES
4712 if (src) {
930345ea 4713 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4714 goto nla_put_failure;
1ab1457c 4715 rtm->rtm_src_len = 128;
c78679e8 4716 } else if (rtm->rtm_src_len &&
93c2fb25 4717 nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr))
c78679e8 4718 goto nla_put_failure;
1da177e4 4719#endif
7bc570c8
YH
4720 if (iif) {
4721#ifdef CONFIG_IPV6_MROUTE
93c2fb25 4722 if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) {
fd61c6ba
DA
4723 int err = ip6mr_get_route(net, skb, rtm, portid);
4724
4725 if (err == 0)
4726 return 0;
4727 if (err < 0)
4728 goto nla_put_failure;
7bc570c8
YH
4729 } else
4730#endif
c78679e8
DM
4731 if (nla_put_u32(skb, RTA_IIF, iif))
4732 goto nla_put_failure;
d4ead6b3 4733 } else if (dest) {
1da177e4 4734 struct in6_addr saddr_buf;
d4ead6b3 4735 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4736 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4737 goto nla_put_failure;
1da177e4 4738 }
2d7202bf 4739
93c2fb25 4740 if (rt->fib6_prefsrc.plen) {
c3968a85 4741 struct in6_addr saddr_buf;
93c2fb25 4742 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4743 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4744 goto nla_put_failure;
c3968a85
DW
4745 }
4746
d4ead6b3
DA
4747 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4748 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4749 goto nla_put_failure;
4750
93c2fb25 4751 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 4752 goto nla_put_failure;
8253947e 4753
beb1afac
DA
4754 /* For multipath routes, walk the siblings list and add
4755 * each as a nexthop within RTA_MULTIPATH.
4756 */
93c2fb25 4757 if (rt->fib6_nsiblings) {
8d1c802b 4758 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
4759 struct nlattr *mp;
4760
4761 mp = nla_nest_start(skb, RTA_MULTIPATH);
4762 if (!mp)
4763 goto nla_put_failure;
4764
4765 if (rt6_add_nexthop(skb, rt) < 0)
4766 goto nla_put_failure;
4767
4768 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 4769 &rt->fib6_siblings, fib6_siblings) {
beb1afac
DA
4770 if (rt6_add_nexthop(skb, sibling) < 0)
4771 goto nla_put_failure;
4772 }
4773
4774 nla_nest_end(skb, mp);
4775 } else {
5be083ce 4776 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4777 goto nla_put_failure;
4778 }
4779
93c2fb25 4780 if (rt->fib6_flags & RTF_EXPIRES) {
14895687
DA
4781 expires = dst ? dst->expires : rt->expires;
4782 expires -= jiffies;
4783 }
69cdf8f9 4784
d4ead6b3 4785 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4786 goto nla_put_failure;
2d7202bf 4787
93c2fb25 4788 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags)))
c78ba6d6
LR
4789 goto nla_put_failure;
4790
19e42e45 4791
053c095a
JB
4792 nlmsg_end(skb, nlh);
4793 return 0;
2d7202bf
TG
4794
4795nla_put_failure:
26932566
PM
4796 nlmsg_cancel(skb, nlh);
4797 return -EMSGSIZE;
1da177e4
LT
4798}
4799
8d1c802b 4800int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4801{
4802 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4803 struct net *net = arg->net;
4804
421842ed 4805 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4806 return 0;
1da177e4 4807
2d7202bf
TG
4808 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4809 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4810
4811 /* user wants prefix routes only */
4812 if (rtm->rtm_flags & RTM_F_PREFIX &&
93c2fb25 4813 !(rt->fib6_flags & RTF_PREFIX_RT)) {
f8cfe2ce
DA
4814 /* success since this is not a prefix route */
4815 return 1;
4816 }
4817 }
1da177e4 4818
d4ead6b3
DA
4819 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4820 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4821 arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
1da177e4
LT
4822}
4823
c21ef3e3
DA
4824static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4825 struct netlink_ext_ack *extack)
1da177e4 4826{
3b1e0a65 4827 struct net *net = sock_net(in_skb->sk);
ab364a6f 4828 struct nlattr *tb[RTA_MAX+1];
18c3a61c 4829 int err, iif = 0, oif = 0;
a68886a6 4830 struct fib6_info *from;
18c3a61c 4831 struct dst_entry *dst;
ab364a6f 4832 struct rt6_info *rt;
1da177e4 4833 struct sk_buff *skb;
ab364a6f 4834 struct rtmsg *rtm;
4c9483b2 4835 struct flowi6 fl6;
18c3a61c 4836 bool fibmatch;
1da177e4 4837
fceb6435 4838 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4839 extack);
ab364a6f
TG
4840 if (err < 0)
4841 goto errout;
1da177e4 4842
ab364a6f 4843 err = -EINVAL;
4c9483b2 4844 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4845 rtm = nlmsg_data(nlh);
4846 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4847 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4848
ab364a6f
TG
4849 if (tb[RTA_SRC]) {
4850 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4851 goto errout;
4852
4e3fd7a0 4853 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4854 }
4855
4856 if (tb[RTA_DST]) {
4857 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4858 goto errout;
4859
4e3fd7a0 4860 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4861 }
4862
4863 if (tb[RTA_IIF])
4864 iif = nla_get_u32(tb[RTA_IIF]);
4865
4866 if (tb[RTA_OIF])
72331bc0 4867 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4868
2e47b291
LC
4869 if (tb[RTA_MARK])
4870 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4871
622ec2c9
LC
4872 if (tb[RTA_UID])
4873 fl6.flowi6_uid = make_kuid(current_user_ns(),
4874 nla_get_u32(tb[RTA_UID]));
4875 else
4876 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4877
eacb9384
RP
4878 if (tb[RTA_SPORT])
4879 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4880
4881 if (tb[RTA_DPORT])
4882 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4883
4884 if (tb[RTA_IP_PROTO]) {
4885 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
4886 &fl6.flowi6_proto, extack);
4887 if (err)
4888 goto errout;
4889 }
4890
1da177e4
LT
4891 if (iif) {
4892 struct net_device *dev;
72331bc0
SL
4893 int flags = 0;
4894
121622db
FW
4895 rcu_read_lock();
4896
4897 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4898 if (!dev) {
121622db 4899 rcu_read_unlock();
1da177e4 4900 err = -ENODEV;
ab364a6f 4901 goto errout;
1da177e4 4902 }
72331bc0
SL
4903
4904 fl6.flowi6_iif = iif;
4905
4906 if (!ipv6_addr_any(&fl6.saddr))
4907 flags |= RT6_LOOKUP_F_HAS_SADDR;
4908
b75cc8f9 4909 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4910
4911 rcu_read_unlock();
72331bc0
SL
4912 } else {
4913 fl6.flowi6_oif = oif;
4914
58acfd71 4915 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4916 }
4917
18c3a61c
RP
4918
4919 rt = container_of(dst, struct rt6_info, dst);
4920 if (rt->dst.error) {
4921 err = rt->dst.error;
4922 ip6_rt_put(rt);
4923 goto errout;
1da177e4
LT
4924 }
4925
9d6acb3b
WC
4926 if (rt == net->ipv6.ip6_null_entry) {
4927 err = rt->dst.error;
4928 ip6_rt_put(rt);
4929 goto errout;
4930 }
4931
ab364a6f 4932 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4933 if (!skb) {
94e187c0 4934 ip6_rt_put(rt);
ab364a6f
TG
4935 err = -ENOBUFS;
4936 goto errout;
4937 }
1da177e4 4938
d8d1f30b 4939 skb_dst_set(skb, &rt->dst);
a68886a6
DA
4940
4941 rcu_read_lock();
4942 from = rcu_dereference(rt->from);
4943
18c3a61c 4944 if (fibmatch)
a68886a6 4945 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
18c3a61c
RP
4946 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4947 nlh->nlmsg_seq, 0);
4948 else
a68886a6
DA
4949 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
4950 &fl6.saddr, iif, RTM_NEWROUTE,
d4ead6b3
DA
4951 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4952 0);
a68886a6
DA
4953 rcu_read_unlock();
4954
1da177e4 4955 if (err < 0) {
ab364a6f
TG
4956 kfree_skb(skb);
4957 goto errout;
1da177e4
LT
4958 }
4959
15e47304 4960 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4961errout:
1da177e4 4962 return err;
1da177e4
LT
4963}
4964
8d1c802b 4965void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 4966 unsigned int nlm_flags)
1da177e4
LT
4967{
4968 struct sk_buff *skb;
5578689a 4969 struct net *net = info->nl_net;
528c4ceb
DL
4970 u32 seq;
4971 int err;
4972
4973 err = -ENOBUFS;
38308473 4974 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4975
19e42e45 4976 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4977 if (!skb)
21713ebc
TG
4978 goto errout;
4979
d4ead6b3
DA
4980 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4981 event, info->portid, seq, nlm_flags);
26932566
PM
4982 if (err < 0) {
4983 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4984 WARN_ON(err == -EMSGSIZE);
4985 kfree_skb(skb);
4986 goto errout;
4987 }
15e47304 4988 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4989 info->nlh, gfp_any());
4990 return;
21713ebc
TG
4991errout:
4992 if (err < 0)
5578689a 4993 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4994}
4995
8ed67789 4996static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4997 unsigned long event, void *ptr)
8ed67789 4998{
351638e7 4999 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 5000 struct net *net = dev_net(dev);
8ed67789 5001
242d3a49
WC
5002 if (!(dev->flags & IFF_LOOPBACK))
5003 return NOTIFY_OK;
5004
5005 if (event == NETDEV_REGISTER) {
421842ed 5006 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
d8d1f30b 5007 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
5008 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5009#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 5010 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 5011 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 5012 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 5013 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 5014#endif
76da0704
WC
5015 } else if (event == NETDEV_UNREGISTER &&
5016 dev->reg_state != NETREG_UNREGISTERED) {
5017 /* NETDEV_UNREGISTER could be fired for multiple times by
5018 * netdev_wait_allrefs(). Make sure we only call this once.
5019 */
12d94a80 5020 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 5021#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
5022 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5023 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
5024#endif
5025 }
5026
5027 return NOTIFY_OK;
5028}
5029
1da177e4
LT
5030/*
5031 * /proc
5032 */
5033
5034#ifdef CONFIG_PROC_FS
1da177e4
LT
5035static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5036{
69ddb805 5037 struct net *net = (struct net *)seq->private;
1da177e4 5038 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
5039 net->ipv6.rt6_stats->fib_nodes,
5040 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 5041 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
5042 net->ipv6.rt6_stats->fib_rt_entries,
5043 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 5044 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 5045 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
5046
5047 return 0;
5048}
1da177e4
LT
5049#endif /* CONFIG_PROC_FS */
5050
5051#ifdef CONFIG_SYSCTL
5052
1da177e4 5053static
fe2c6338 5054int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
5055 void __user *buffer, size_t *lenp, loff_t *ppos)
5056{
c486da34
LAG
5057 struct net *net;
5058 int delay;
5059 if (!write)
1da177e4 5060 return -EINVAL;
c486da34
LAG
5061
5062 net = (struct net *)ctl->extra1;
5063 delay = net->ipv6.sysctl.flush_delay;
5064 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 5065 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 5066 return 0;
1da177e4
LT
5067}
5068
fe2c6338 5069struct ctl_table ipv6_route_table_template[] = {
1ab1457c 5070 {
1da177e4 5071 .procname = "flush",
4990509f 5072 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 5073 .maxlen = sizeof(int),
89c8b3a1 5074 .mode = 0200,
6d9f239a 5075 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
5076 },
5077 {
1da177e4 5078 .procname = "gc_thresh",
9a7ec3a9 5079 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
5080 .maxlen = sizeof(int),
5081 .mode = 0644,
6d9f239a 5082 .proc_handler = proc_dointvec,
1da177e4
LT
5083 },
5084 {
1da177e4 5085 .procname = "max_size",
4990509f 5086 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
5087 .maxlen = sizeof(int),
5088 .mode = 0644,
6d9f239a 5089 .proc_handler = proc_dointvec,
1da177e4
LT
5090 },
5091 {
1da177e4 5092 .procname = "gc_min_interval",
4990509f 5093 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5094 .maxlen = sizeof(int),
5095 .mode = 0644,
6d9f239a 5096 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5097 },
5098 {
1da177e4 5099 .procname = "gc_timeout",
4990509f 5100 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5101 .maxlen = sizeof(int),
5102 .mode = 0644,
6d9f239a 5103 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5104 },
5105 {
1da177e4 5106 .procname = "gc_interval",
4990509f 5107 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5108 .maxlen = sizeof(int),
5109 .mode = 0644,
6d9f239a 5110 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5111 },
5112 {
1da177e4 5113 .procname = "gc_elasticity",
4990509f 5114 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5115 .maxlen = sizeof(int),
5116 .mode = 0644,
f3d3f616 5117 .proc_handler = proc_dointvec,
1da177e4
LT
5118 },
5119 {
1da177e4 5120 .procname = "mtu_expires",
4990509f 5121 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5122 .maxlen = sizeof(int),
5123 .mode = 0644,
6d9f239a 5124 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5125 },
5126 {
1da177e4 5127 .procname = "min_adv_mss",
4990509f 5128 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5129 .maxlen = sizeof(int),
5130 .mode = 0644,
f3d3f616 5131 .proc_handler = proc_dointvec,
1da177e4
LT
5132 },
5133 {
1da177e4 5134 .procname = "gc_min_interval_ms",
4990509f 5135 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5136 .maxlen = sizeof(int),
5137 .mode = 0644,
6d9f239a 5138 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5139 },
f8572d8f 5140 { }
1da177e4
LT
5141};
5142
2c8c1e72 5143struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5144{
5145 struct ctl_table *table;
5146
5147 table = kmemdup(ipv6_route_table_template,
5148 sizeof(ipv6_route_table_template),
5149 GFP_KERNEL);
5ee09105
YH
5150
5151 if (table) {
5152 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5153 table[0].extra1 = net;
86393e52 5154 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5155 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5156 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5157 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5158 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5159 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5160 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5161 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5162 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
5163
5164 /* Don't export sysctls to unprivileged users */
5165 if (net->user_ns != &init_user_ns)
5166 table[0].procname = NULL;
5ee09105
YH
5167 }
5168
760f2d01
DL
5169 return table;
5170}
1da177e4
LT
5171#endif
5172
2c8c1e72 5173static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5174{
633d424b 5175 int ret = -ENOMEM;
8ed67789 5176
86393e52
AD
5177 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5178 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5179
fc66f95c
ED
5180 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5181 goto out_ip6_dst_ops;
5182
421842ed
DA
5183 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5184 sizeof(*net->ipv6.fib6_null_entry),
5185 GFP_KERNEL);
5186 if (!net->ipv6.fib6_null_entry)
5187 goto out_ip6_dst_entries;
5188
8ed67789
DL
5189 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5190 sizeof(*net->ipv6.ip6_null_entry),
5191 GFP_KERNEL);
5192 if (!net->ipv6.ip6_null_entry)
421842ed 5193 goto out_fib6_null_entry;
d8d1f30b 5194 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5195 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5196 ip6_template_metrics, true);
8ed67789
DL
5197
5198#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5199 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5200 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5201 sizeof(*net->ipv6.ip6_prohibit_entry),
5202 GFP_KERNEL);
68fffc67
PZ
5203 if (!net->ipv6.ip6_prohibit_entry)
5204 goto out_ip6_null_entry;
d8d1f30b 5205 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5206 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5207 ip6_template_metrics, true);
8ed67789
DL
5208
5209 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5210 sizeof(*net->ipv6.ip6_blk_hole_entry),
5211 GFP_KERNEL);
68fffc67
PZ
5212 if (!net->ipv6.ip6_blk_hole_entry)
5213 goto out_ip6_prohibit_entry;
d8d1f30b 5214 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5215 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5216 ip6_template_metrics, true);
8ed67789
DL
5217#endif
5218
b339a47c
PZ
5219 net->ipv6.sysctl.flush_delay = 0;
5220 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5221 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5222 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5223 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5224 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5225 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5226 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5227
6891a346
BT
5228 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5229
8ed67789
DL
5230 ret = 0;
5231out:
5232 return ret;
f2fc6a54 5233
68fffc67
PZ
5234#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5235out_ip6_prohibit_entry:
5236 kfree(net->ipv6.ip6_prohibit_entry);
5237out_ip6_null_entry:
5238 kfree(net->ipv6.ip6_null_entry);
5239#endif
421842ed
DA
5240out_fib6_null_entry:
5241 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5242out_ip6_dst_entries:
5243 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5244out_ip6_dst_ops:
f2fc6a54 5245 goto out;
cdb18761
DL
5246}
5247
2c8c1e72 5248static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5249{
421842ed 5250 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5251 kfree(net->ipv6.ip6_null_entry);
5252#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5253 kfree(net->ipv6.ip6_prohibit_entry);
5254 kfree(net->ipv6.ip6_blk_hole_entry);
5255#endif
41bb78b4 5256 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5257}
5258
d189634e
TG
5259static int __net_init ip6_route_net_init_late(struct net *net)
5260{
5261#ifdef CONFIG_PROC_FS
c3506372
CH
5262 proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5263 sizeof(struct ipv6_route_iter));
3617d949
CH
5264 proc_create_net_single("rt6_stats", 0444, net->proc_net,
5265 rt6_stats_seq_show, NULL);
d189634e
TG
5266#endif
5267 return 0;
5268}
5269
5270static void __net_exit ip6_route_net_exit_late(struct net *net)
5271{
5272#ifdef CONFIG_PROC_FS
ece31ffd
G
5273 remove_proc_entry("ipv6_route", net->proc_net);
5274 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5275#endif
5276}
5277
cdb18761
DL
5278static struct pernet_operations ip6_route_net_ops = {
5279 .init = ip6_route_net_init,
5280 .exit = ip6_route_net_exit,
5281};
5282
c3426b47
DM
5283static int __net_init ipv6_inetpeer_init(struct net *net)
5284{
5285 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5286
5287 if (!bp)
5288 return -ENOMEM;
5289 inet_peer_base_init(bp);
5290 net->ipv6.peers = bp;
5291 return 0;
5292}
5293
5294static void __net_exit ipv6_inetpeer_exit(struct net *net)
5295{
5296 struct inet_peer_base *bp = net->ipv6.peers;
5297
5298 net->ipv6.peers = NULL;
56a6b248 5299 inetpeer_invalidate_tree(bp);
c3426b47
DM
5300 kfree(bp);
5301}
5302
2b823f72 5303static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5304 .init = ipv6_inetpeer_init,
5305 .exit = ipv6_inetpeer_exit,
5306};
5307
d189634e
TG
5308static struct pernet_operations ip6_route_net_late_ops = {
5309 .init = ip6_route_net_init_late,
5310 .exit = ip6_route_net_exit_late,
5311};
5312
8ed67789
DL
5313static struct notifier_block ip6_route_dev_notifier = {
5314 .notifier_call = ip6_route_dev_notify,
242d3a49 5315 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5316};
5317
2f460933
WC
5318void __init ip6_route_init_special_entries(void)
5319{
5320 /* Registering of the loopback is done before this portion of code,
5321 * the loopback reference in rt6_info will not be taken, do it
5322 * manually for init_net */
421842ed 5323 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
2f460933
WC
5324 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5325 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5326 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5327 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5328 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5329 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5330 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5331 #endif
5332}
5333
433d49c3 5334int __init ip6_route_init(void)
1da177e4 5335{
433d49c3 5336 int ret;
8d0b94af 5337 int cpu;
433d49c3 5338
9a7ec3a9
DL
5339 ret = -ENOMEM;
5340 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5341 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5342 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5343 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5344 goto out;
14e50e57 5345
fc66f95c 5346 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5347 if (ret)
bdb3289f 5348 goto out_kmem_cache;
bdb3289f 5349
c3426b47
DM
5350 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5351 if (ret)
e8803b6c 5352 goto out_dst_entries;
2a0c451a 5353
7e52b33b
DM
5354 ret = register_pernet_subsys(&ip6_route_net_ops);
5355 if (ret)
5356 goto out_register_inetpeer;
c3426b47 5357
5dc121e9
AE
5358 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5359
e8803b6c 5360 ret = fib6_init();
433d49c3 5361 if (ret)
8ed67789 5362 goto out_register_subsys;
433d49c3 5363
433d49c3
DL
5364 ret = xfrm6_init();
5365 if (ret)
e8803b6c 5366 goto out_fib6_init;
c35b7e72 5367
433d49c3
DL
5368 ret = fib6_rules_init();
5369 if (ret)
5370 goto xfrm6_init;
7e5449c2 5371
d189634e
TG
5372 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5373 if (ret)
5374 goto fib6_rules_init;
5375
16feebcf
FW
5376 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5377 inet6_rtm_newroute, NULL, 0);
5378 if (ret < 0)
5379 goto out_register_late_subsys;
5380
5381 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5382 inet6_rtm_delroute, NULL, 0);
5383 if (ret < 0)
5384 goto out_register_late_subsys;
5385
5386 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5387 inet6_rtm_getroute, NULL,
5388 RTNL_FLAG_DOIT_UNLOCKED);
5389 if (ret < 0)
d189634e 5390 goto out_register_late_subsys;
c127ea2c 5391
8ed67789 5392 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5393 if (ret)
d189634e 5394 goto out_register_late_subsys;
8ed67789 5395
8d0b94af
MKL
5396 for_each_possible_cpu(cpu) {
5397 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5398
5399 INIT_LIST_HEAD(&ul->head);
5400 spin_lock_init(&ul->lock);
5401 }
5402
433d49c3
DL
5403out:
5404 return ret;
5405
d189634e 5406out_register_late_subsys:
16feebcf 5407 rtnl_unregister_all(PF_INET6);
d189634e 5408 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5409fib6_rules_init:
433d49c3
DL
5410 fib6_rules_cleanup();
5411xfrm6_init:
433d49c3 5412 xfrm6_fini();
2a0c451a
TG
5413out_fib6_init:
5414 fib6_gc_cleanup();
8ed67789
DL
5415out_register_subsys:
5416 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5417out_register_inetpeer:
5418 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5419out_dst_entries:
5420 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5421out_kmem_cache:
f2fc6a54 5422 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5423 goto out;
1da177e4
LT
5424}
5425
5426void ip6_route_cleanup(void)
5427{
8ed67789 5428 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5429 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5430 fib6_rules_cleanup();
1da177e4 5431 xfrm6_fini();
1da177e4 5432 fib6_gc_cleanup();
c3426b47 5433 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5434 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5435 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5436 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5437}