Merge remote-tracking branches 'asoc/topic/wm8753', 'asoc/topic/wm8770', 'asoc/topic...
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
83a09abd 81static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 82static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 83static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 84static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
85static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
569d3645 89static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
90
91static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 92static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 93static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 94static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 95static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
96static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
97 struct sk_buff *skb, u32 mtu);
98static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 struct sk_buff *skb);
4b32b5ad 100static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 101static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3
DA
102static size_t rt6_nlmsg_size(struct rt6_info *rt);
103static int rt6_fill_node(struct net *net,
104 struct sk_buff *skb, struct rt6_info *rt,
105 struct in6_addr *dst, struct in6_addr *src,
106 int iif, int type, u32 portid, u32 seq,
107 unsigned int flags);
35732d01
WW
108static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
109 struct in6_addr *daddr,
110 struct in6_addr *saddr);
1da177e4 111
70ceb4f5 112#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 113static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 114 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
115 const struct in6_addr *gwaddr,
116 struct net_device *dev,
95c96174 117 unsigned int pref);
efa2cea0 118static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 119 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
120 const struct in6_addr *gwaddr,
121 struct net_device *dev);
70ceb4f5
YH
122#endif
123
8d0b94af
MKL
124struct uncached_list {
125 spinlock_t lock;
126 struct list_head head;
127};
128
129static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
130
510c321b 131void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
132{
133 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
134
8d0b94af
MKL
135 rt->rt6i_uncached_list = ul;
136
137 spin_lock_bh(&ul->lock);
138 list_add_tail(&rt->rt6i_uncached, &ul->head);
139 spin_unlock_bh(&ul->lock);
140}
141
510c321b 142void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
143{
144 if (!list_empty(&rt->rt6i_uncached)) {
145 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 146 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
147
148 spin_lock_bh(&ul->lock);
149 list_del(&rt->rt6i_uncached);
81eb8447 150 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
151 spin_unlock_bh(&ul->lock);
152 }
153}
154
155static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
156{
157 struct net_device *loopback_dev = net->loopback_dev;
158 int cpu;
159
e332bc67
EB
160 if (dev == loopback_dev)
161 return;
162
8d0b94af
MKL
163 for_each_possible_cpu(cpu) {
164 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
165 struct rt6_info *rt;
166
167 spin_lock_bh(&ul->lock);
168 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
169 struct inet6_dev *rt_idev = rt->rt6i_idev;
170 struct net_device *rt_dev = rt->dst.dev;
171
e332bc67 172 if (rt_idev->dev == dev) {
8d0b94af
MKL
173 rt->rt6i_idev = in6_dev_get(loopback_dev);
174 in6_dev_put(rt_idev);
175 }
176
e332bc67 177 if (rt_dev == dev) {
8d0b94af
MKL
178 rt->dst.dev = loopback_dev;
179 dev_hold(rt->dst.dev);
180 dev_put(rt_dev);
181 }
182 }
183 spin_unlock_bh(&ul->lock);
184 }
185}
186
d52d3997
MKL
187static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
188{
3a2232e9 189 return dst_metrics_write_ptr(&rt->from->dst);
d52d3997
MKL
190}
191
06582540
DM
192static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
193{
4b32b5ad 194 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 195
d52d3997
MKL
196 if (rt->rt6i_flags & RTF_PCPU)
197 return rt6_pcpu_cow_metrics(rt);
198 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
199 return NULL;
200 else
3b471175 201 return dst_cow_metrics_generic(dst, old);
06582540
DM
202}
203
f894cbf8
DM
204static inline const void *choose_neigh_daddr(struct rt6_info *rt,
205 struct sk_buff *skb,
206 const void *daddr)
39232973
DM
207{
208 struct in6_addr *p = &rt->rt6i_gateway;
209
a7563f34 210 if (!ipv6_addr_any(p))
39232973 211 return (const void *) p;
f894cbf8
DM
212 else if (skb)
213 return &ipv6_hdr(skb)->daddr;
39232973
DM
214 return daddr;
215}
216
f894cbf8
DM
217static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
218 struct sk_buff *skb,
219 const void *daddr)
d3aaeb38 220{
39232973
DM
221 struct rt6_info *rt = (struct rt6_info *) dst;
222 struct neighbour *n;
223
f894cbf8 224 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 225 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
226 if (n)
227 return n;
228 return neigh_create(&nd_tbl, daddr, dst->dev);
229}
230
63fca65d
JA
231static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
232{
233 struct net_device *dev = dst->dev;
234 struct rt6_info *rt = (struct rt6_info *)dst;
235
236 daddr = choose_neigh_daddr(rt, NULL, daddr);
237 if (!daddr)
238 return;
239 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
240 return;
241 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
242 return;
243 __ipv6_confirm_neigh(dev, daddr);
244}
245
9a7ec3a9 246static struct dst_ops ip6_dst_ops_template = {
1da177e4 247 .family = AF_INET6,
1da177e4
LT
248 .gc = ip6_dst_gc,
249 .gc_thresh = 1024,
250 .check = ip6_dst_check,
0dbaee3b 251 .default_advmss = ip6_default_advmss,
ebb762f2 252 .mtu = ip6_mtu,
06582540 253 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
254 .destroy = ip6_dst_destroy,
255 .ifdown = ip6_dst_ifdown,
256 .negative_advice = ip6_negative_advice,
257 .link_failure = ip6_link_failure,
258 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 259 .redirect = rt6_do_redirect,
9f8955cc 260 .local_out = __ip6_local_out,
d3aaeb38 261 .neigh_lookup = ip6_neigh_lookup,
63fca65d 262 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
263};
264
ebb762f2 265static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 266{
618f9bc7
SK
267 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
268
269 return mtu ? : dst->dev->mtu;
ec831ea7
RD
270}
271
6700c270
DM
272static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
273 struct sk_buff *skb, u32 mtu)
14e50e57
DM
274{
275}
276
6700c270
DM
277static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
278 struct sk_buff *skb)
b587ee3b
DM
279{
280}
281
14e50e57
DM
282static struct dst_ops ip6_dst_blackhole_ops = {
283 .family = AF_INET6,
14e50e57
DM
284 .destroy = ip6_dst_destroy,
285 .check = ip6_dst_check,
ebb762f2 286 .mtu = ip6_blackhole_mtu,
214f45c9 287 .default_advmss = ip6_default_advmss,
14e50e57 288 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 289 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 290 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 291 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
292};
293
62fa8a84 294static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 295 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
296};
297
fb0af4c7 298static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
299 .dst = {
300 .__refcnt = ATOMIC_INIT(1),
301 .__use = 1,
2c20cbd7 302 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 303 .error = -ENETUNREACH,
d8d1f30b
CG
304 .input = ip6_pkt_discard,
305 .output = ip6_pkt_discard_out,
1da177e4
LT
306 },
307 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 308 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
309 .rt6i_metric = ~(u32) 0,
310 .rt6i_ref = ATOMIC_INIT(1),
311};
312
101367c2
TG
313#ifdef CONFIG_IPV6_MULTIPLE_TABLES
314
fb0af4c7 315static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
316 .dst = {
317 .__refcnt = ATOMIC_INIT(1),
318 .__use = 1,
2c20cbd7 319 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 320 .error = -EACCES,
d8d1f30b
CG
321 .input = ip6_pkt_prohibit,
322 .output = ip6_pkt_prohibit_out,
101367c2
TG
323 },
324 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 325 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
326 .rt6i_metric = ~(u32) 0,
327 .rt6i_ref = ATOMIC_INIT(1),
328};
329
fb0af4c7 330static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
331 .dst = {
332 .__refcnt = ATOMIC_INIT(1),
333 .__use = 1,
2c20cbd7 334 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 335 .error = -EINVAL,
d8d1f30b 336 .input = dst_discard,
ede2059d 337 .output = dst_discard_out,
101367c2
TG
338 },
339 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 340 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
341 .rt6i_metric = ~(u32) 0,
342 .rt6i_ref = ATOMIC_INIT(1),
343};
344
345#endif
346
ebfa45f0
MKL
347static void rt6_info_init(struct rt6_info *rt)
348{
349 struct dst_entry *dst = &rt->dst;
350
351 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
352 INIT_LIST_HEAD(&rt->rt6i_siblings);
353 INIT_LIST_HEAD(&rt->rt6i_uncached);
354}
355
1da177e4 356/* allocate dst with ip6_dst_ops */
d52d3997
MKL
357static struct rt6_info *__ip6_dst_alloc(struct net *net,
358 struct net_device *dev,
ad706862 359 int flags)
1da177e4 360{
97bab73f 361 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 362 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 363
81eb8447 364 if (rt) {
ebfa45f0 365 rt6_info_init(rt);
81eb8447
WW
366 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
367 }
8104891b 368
cf911662 369 return rt;
1da177e4
LT
370}
371
9ab179d8
DA
372struct rt6_info *ip6_dst_alloc(struct net *net,
373 struct net_device *dev,
374 int flags)
d52d3997 375{
ad706862 376 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
377
378 if (rt) {
379 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
bfd8e5a4 380 if (!rt->rt6i_pcpu) {
587fea74 381 dst_release_immediate(&rt->dst);
d52d3997
MKL
382 return NULL;
383 }
384 }
385
386 return rt;
387}
9ab179d8 388EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 389
1da177e4
LT
390static void ip6_dst_destroy(struct dst_entry *dst)
391{
392 struct rt6_info *rt = (struct rt6_info *)dst;
35732d01 393 struct rt6_exception_bucket *bucket;
3a2232e9 394 struct rt6_info *from = rt->from;
8d0b94af 395 struct inet6_dev *idev;
1da177e4 396
4b32b5ad 397 dst_destroy_metrics_generic(dst);
87775312 398 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
399 rt6_uncached_list_del(rt);
400
401 idev = rt->rt6i_idev;
38308473 402 if (idev) {
1da177e4
LT
403 rt->rt6i_idev = NULL;
404 in6_dev_put(idev);
1ab1457c 405 }
35732d01
WW
406 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
407 if (bucket) {
408 rt->rt6i_exception_bucket = NULL;
409 kfree(bucket);
410 }
1716a961 411
3a2232e9
DM
412 rt->from = NULL;
413 dst_release(&from->dst);
b3419363
DM
414}
415
1da177e4
LT
416static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
417 int how)
418{
419 struct rt6_info *rt = (struct rt6_info *)dst;
420 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 421 struct net_device *loopback_dev =
c346dca1 422 dev_net(dev)->loopback_dev;
1da177e4 423
e5645f51
WW
424 if (idev && idev->dev != loopback_dev) {
425 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
426 if (loopback_idev) {
427 rt->rt6i_idev = loopback_idev;
428 in6_dev_put(idev);
97cac082 429 }
1da177e4
LT
430 }
431}
432
5973fb1e
MKL
433static bool __rt6_check_expired(const struct rt6_info *rt)
434{
435 if (rt->rt6i_flags & RTF_EXPIRES)
436 return time_after(jiffies, rt->dst.expires);
437 else
438 return false;
439}
440
a50feda5 441static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 442{
1716a961
G
443 if (rt->rt6i_flags & RTF_EXPIRES) {
444 if (time_after(jiffies, rt->dst.expires))
a50feda5 445 return true;
3a2232e9 446 } else if (rt->from) {
1e2ea8ad 447 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
3a2232e9 448 rt6_check_expired(rt->from);
1716a961 449 }
a50feda5 450 return false;
1da177e4
LT
451}
452
51ebd318 453static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
454 struct flowi6 *fl6, int oif,
455 int strict)
51ebd318
ND
456{
457 struct rt6_info *sibling, *next_sibling;
51ebd318 458
b673d6cc
JS
459 /* We might have already computed the hash for ICMPv6 errors. In such
460 * case it will always be non-zero. Otherwise now is the time to do it.
461 */
462 if (!fl6->mp_hash)
463 fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
464
3d709f69
IS
465 if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
466 return match;
467
468 list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
469 rt6i_siblings) {
470 if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound))
471 continue;
472 if (rt6_score_route(sibling, oif, strict) < 0)
473 break;
474 match = sibling;
475 break;
476 }
477
51ebd318
ND
478 return match;
479}
480
1da177e4 481/*
66f5d6ce 482 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
483 */
484
8ed67789
DL
485static inline struct rt6_info *rt6_device_match(struct net *net,
486 struct rt6_info *rt,
b71d1d42 487 const struct in6_addr *saddr,
1da177e4 488 int oif,
d420895e 489 int flags)
1da177e4
LT
490{
491 struct rt6_info *local = NULL;
492 struct rt6_info *sprt;
493
8067bb8c
IS
494 if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD))
495 return rt;
dd3abc4e 496
071fb37e 497 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
d1918542 498 struct net_device *dev = sprt->dst.dev;
dd3abc4e 499
8067bb8c
IS
500 if (sprt->rt6i_nh_flags & RTNH_F_DEAD)
501 continue;
502
dd3abc4e 503 if (oif) {
1da177e4
LT
504 if (dev->ifindex == oif)
505 return sprt;
506 if (dev->flags & IFF_LOOPBACK) {
38308473 507 if (!sprt->rt6i_idev ||
1da177e4 508 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 509 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 510 continue;
17fb0b2b
DA
511 if (local &&
512 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
513 continue;
514 }
515 local = sprt;
516 }
dd3abc4e
YH
517 } else {
518 if (ipv6_chk_addr(net, saddr, dev,
519 flags & RT6_LOOKUP_F_IFACE))
520 return sprt;
1da177e4 521 }
dd3abc4e 522 }
1da177e4 523
dd3abc4e 524 if (oif) {
1da177e4
LT
525 if (local)
526 return local;
527
d420895e 528 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 529 return net->ipv6.ip6_null_entry;
1da177e4 530 }
8067bb8c
IS
531
532 return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
1da177e4
LT
533}
534
27097255 535#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
536struct __rt6_probe_work {
537 struct work_struct work;
538 struct in6_addr target;
539 struct net_device *dev;
540};
541
542static void rt6_probe_deferred(struct work_struct *w)
543{
544 struct in6_addr mcaddr;
545 struct __rt6_probe_work *work =
546 container_of(w, struct __rt6_probe_work, work);
547
548 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 549 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 550 dev_put(work->dev);
662f5533 551 kfree(work);
c2f17e82
HFS
552}
553
27097255
YH
554static void rt6_probe(struct rt6_info *rt)
555{
990edb42 556 struct __rt6_probe_work *work;
f2c31e32 557 struct neighbour *neigh;
27097255
YH
558 /*
559 * Okay, this does not seem to be appropriate
560 * for now, however, we need to check if it
561 * is really so; aka Router Reachability Probing.
562 *
563 * Router Reachability Probe MUST be rate-limited
564 * to no more than one per minute.
565 */
2152caea 566 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 567 return;
2152caea
YH
568 rcu_read_lock_bh();
569 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
570 if (neigh) {
8d6c31bf
MKL
571 if (neigh->nud_state & NUD_VALID)
572 goto out;
573
990edb42 574 work = NULL;
2152caea 575 write_lock(&neigh->lock);
990edb42
MKL
576 if (!(neigh->nud_state & NUD_VALID) &&
577 time_after(jiffies,
578 neigh->updated +
579 rt->rt6i_idev->cnf.rtr_probe_interval)) {
580 work = kmalloc(sizeof(*work), GFP_ATOMIC);
581 if (work)
582 __neigh_set_probe_once(neigh);
c2f17e82 583 }
2152caea 584 write_unlock(&neigh->lock);
990edb42
MKL
585 } else {
586 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 587 }
990edb42
MKL
588
589 if (work) {
590 INIT_WORK(&work->work, rt6_probe_deferred);
591 work->target = rt->rt6i_gateway;
592 dev_hold(rt->dst.dev);
593 work->dev = rt->dst.dev;
594 schedule_work(&work->work);
595 }
596
8d6c31bf 597out:
2152caea 598 rcu_read_unlock_bh();
27097255
YH
599}
600#else
601static inline void rt6_probe(struct rt6_info *rt)
602{
27097255
YH
603}
604#endif
605
1da177e4 606/*
554cfb7e 607 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 608 */
b6f99a21 609static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 610{
d1918542 611 struct net_device *dev = rt->dst.dev;
161980f4 612 if (!oif || dev->ifindex == oif)
554cfb7e 613 return 2;
161980f4
DM
614 if ((dev->flags & IFF_LOOPBACK) &&
615 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
616 return 1;
617 return 0;
554cfb7e 618}
1da177e4 619
afc154e9 620static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 621{
f2c31e32 622 struct neighbour *neigh;
afc154e9 623 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 624
4d0c5911
YH
625 if (rt->rt6i_flags & RTF_NONEXTHOP ||
626 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 627 return RT6_NUD_SUCCEED;
145a3621
YH
628
629 rcu_read_lock_bh();
630 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
631 if (neigh) {
632 read_lock(&neigh->lock);
554cfb7e 633 if (neigh->nud_state & NUD_VALID)
afc154e9 634 ret = RT6_NUD_SUCCEED;
398bcbeb 635#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 636 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 637 ret = RT6_NUD_SUCCEED;
7e980569
JB
638 else
639 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 640#endif
145a3621 641 read_unlock(&neigh->lock);
afc154e9
HFS
642 } else {
643 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 644 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 645 }
145a3621
YH
646 rcu_read_unlock_bh();
647
a5a81f0b 648 return ret;
1da177e4
LT
649}
650
554cfb7e
YH
651static int rt6_score_route(struct rt6_info *rt, int oif,
652 int strict)
1da177e4 653{
a5a81f0b 654 int m;
1ab1457c 655
4d0c5911 656 m = rt6_check_dev(rt, oif);
77d16f45 657 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 658 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
659#ifdef CONFIG_IPV6_ROUTER_PREF
660 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
661#endif
afc154e9
HFS
662 if (strict & RT6_LOOKUP_F_REACHABLE) {
663 int n = rt6_check_neigh(rt);
664 if (n < 0)
665 return n;
666 }
554cfb7e
YH
667 return m;
668}
669
f11e6659 670static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
671 int *mpri, struct rt6_info *match,
672 bool *do_rr)
554cfb7e 673{
f11e6659 674 int m;
afc154e9 675 bool match_do_rr = false;
35103d11 676 struct inet6_dev *idev = rt->rt6i_idev;
35103d11 677
8067bb8c
IS
678 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
679 goto out;
680
14c5206c
IS
681 if (idev->cnf.ignore_routes_with_linkdown &&
682 rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 683 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 684 goto out;
f11e6659
DM
685
686 if (rt6_check_expired(rt))
687 goto out;
688
689 m = rt6_score_route(rt, oif, strict);
7e980569 690 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
691 match_do_rr = true;
692 m = 0; /* lowest valid score */
7e980569 693 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 694 goto out;
afc154e9
HFS
695 }
696
697 if (strict & RT6_LOOKUP_F_REACHABLE)
698 rt6_probe(rt);
f11e6659 699
7e980569 700 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 701 if (m > *mpri) {
afc154e9 702 *do_rr = match_do_rr;
f11e6659
DM
703 *mpri = m;
704 match = rt;
f11e6659 705 }
f11e6659
DM
706out:
707 return match;
708}
709
710static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
8d1040e8 711 struct rt6_info *leaf,
f11e6659 712 struct rt6_info *rr_head,
afc154e9
HFS
713 u32 metric, int oif, int strict,
714 bool *do_rr)
f11e6659 715{
9fbdcfaf 716 struct rt6_info *rt, *match, *cont;
554cfb7e 717 int mpri = -1;
1da177e4 718
f11e6659 719 match = NULL;
9fbdcfaf 720 cont = NULL;
071fb37e 721 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
722 if (rt->rt6i_metric != metric) {
723 cont = rt;
724 break;
725 }
726
727 match = find_match(rt, oif, strict, &mpri, match, do_rr);
728 }
729
66f5d6ce 730 for (rt = leaf; rt && rt != rr_head;
071fb37e 731 rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
732 if (rt->rt6i_metric != metric) {
733 cont = rt;
734 break;
735 }
736
afc154e9 737 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
738 }
739
740 if (match || !cont)
741 return match;
742
071fb37e 743 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
afc154e9 744 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 745
f11e6659
DM
746 return match;
747}
1da177e4 748
8d1040e8
WW
749static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
750 int oif, int strict)
f11e6659 751{
66f5d6ce 752 struct rt6_info *leaf = rcu_dereference(fn->leaf);
f11e6659 753 struct rt6_info *match, *rt0;
afc154e9 754 bool do_rr = false;
17ecf590 755 int key_plen;
1da177e4 756
87b1af8d 757 if (!leaf || leaf == net->ipv6.ip6_null_entry)
8d1040e8
WW
758 return net->ipv6.ip6_null_entry;
759
66f5d6ce 760 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 761 if (!rt0)
66f5d6ce 762 rt0 = leaf;
1da177e4 763
17ecf590
WW
764 /* Double check to make sure fn is not an intermediate node
765 * and fn->leaf does not points to its child's leaf
766 * (This might happen if all routes under fn are deleted from
767 * the tree and fib6_repair_tree() is called on the node.)
768 */
769 key_plen = rt0->rt6i_dst.plen;
770#ifdef CONFIG_IPV6_SUBTREES
771 if (rt0->rt6i_src.plen)
772 key_plen = rt0->rt6i_src.plen;
773#endif
774 if (fn->fn_bit != key_plen)
775 return net->ipv6.ip6_null_entry;
776
8d1040e8 777 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
afc154e9 778 &do_rr);
1da177e4 779
afc154e9 780 if (do_rr) {
071fb37e 781 struct rt6_info *next = rcu_dereference(rt0->rt6_next);
f11e6659 782
554cfb7e 783 /* no entries matched; do round-robin */
f11e6659 784 if (!next || next->rt6i_metric != rt0->rt6i_metric)
8d1040e8 785 next = leaf;
f11e6659 786
66f5d6ce
WW
787 if (next != rt0) {
788 spin_lock_bh(&leaf->rt6i_table->tb6_lock);
789 /* make sure next is not being deleted from the tree */
790 if (next->rt6i_node)
791 rcu_assign_pointer(fn->rr_ptr, next);
792 spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
793 }
1da177e4 794 }
1da177e4 795
a02cec21 796 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
797}
798
8b9df265
MKL
799static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
800{
801 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
802}
803
70ceb4f5
YH
804#ifdef CONFIG_IPV6_ROUTE_INFO
805int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 806 const struct in6_addr *gwaddr)
70ceb4f5 807{
c346dca1 808 struct net *net = dev_net(dev);
70ceb4f5
YH
809 struct route_info *rinfo = (struct route_info *) opt;
810 struct in6_addr prefix_buf, *prefix;
811 unsigned int pref;
4bed72e4 812 unsigned long lifetime;
70ceb4f5
YH
813 struct rt6_info *rt;
814
815 if (len < sizeof(struct route_info)) {
816 return -EINVAL;
817 }
818
819 /* Sanity check for prefix_len and length */
820 if (rinfo->length > 3) {
821 return -EINVAL;
822 } else if (rinfo->prefix_len > 128) {
823 return -EINVAL;
824 } else if (rinfo->prefix_len > 64) {
825 if (rinfo->length < 2) {
826 return -EINVAL;
827 }
828 } else if (rinfo->prefix_len > 0) {
829 if (rinfo->length < 1) {
830 return -EINVAL;
831 }
832 }
833
834 pref = rinfo->route_pref;
835 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 836 return -EINVAL;
70ceb4f5 837
4bed72e4 838 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
839
840 if (rinfo->length == 3)
841 prefix = (struct in6_addr *)rinfo->prefix;
842 else {
843 /* this function is safe */
844 ipv6_addr_prefix(&prefix_buf,
845 (struct in6_addr *)rinfo->prefix,
846 rinfo->prefix_len);
847 prefix = &prefix_buf;
848 }
849
f104a567
DJ
850 if (rinfo->prefix_len == 0)
851 rt = rt6_get_dflt_router(gwaddr, dev);
852 else
853 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 854 gwaddr, dev);
70ceb4f5
YH
855
856 if (rt && !lifetime) {
e0a1ad73 857 ip6_del_rt(rt);
70ceb4f5
YH
858 rt = NULL;
859 }
860
861 if (!rt && lifetime)
830218c1
DA
862 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
863 dev, pref);
70ceb4f5
YH
864 else if (rt)
865 rt->rt6i_flags = RTF_ROUTEINFO |
866 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
867
868 if (rt) {
1716a961
G
869 if (!addrconf_finite_timeout(lifetime))
870 rt6_clean_expires(rt);
871 else
872 rt6_set_expires(rt, jiffies + HZ * lifetime);
873
94e187c0 874 ip6_rt_put(rt);
70ceb4f5
YH
875 }
876 return 0;
877}
878#endif
879
a3c00e46
MKL
880static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
881 struct in6_addr *saddr)
882{
66f5d6ce 883 struct fib6_node *pn, *sn;
a3c00e46
MKL
884 while (1) {
885 if (fn->fn_flags & RTN_TL_ROOT)
886 return NULL;
66f5d6ce
WW
887 pn = rcu_dereference(fn->parent);
888 sn = FIB6_SUBTREE(pn);
889 if (sn && sn != fn)
890 fn = fib6_lookup(sn, NULL, saddr);
a3c00e46
MKL
891 else
892 fn = pn;
893 if (fn->fn_flags & RTN_RTINFO)
894 return fn;
895 }
896}
c71099ac 897
d3843fe5
WW
898static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
899 bool null_fallback)
900{
901 struct rt6_info *rt = *prt;
902
903 if (dst_hold_safe(&rt->dst))
904 return true;
905 if (null_fallback) {
906 rt = net->ipv6.ip6_null_entry;
907 dst_hold(&rt->dst);
908 } else {
909 rt = NULL;
910 }
911 *prt = rt;
912 return false;
913}
914
8ed67789
DL
915static struct rt6_info *ip6_pol_route_lookup(struct net *net,
916 struct fib6_table *table,
4c9483b2 917 struct flowi6 *fl6, int flags)
1da177e4 918{
2b760fcf 919 struct rt6_info *rt, *rt_cache;
1da177e4 920 struct fib6_node *fn;
1da177e4 921
66f5d6ce 922 rcu_read_lock();
4c9483b2 923 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 924restart:
66f5d6ce
WW
925 rt = rcu_dereference(fn->leaf);
926 if (!rt) {
927 rt = net->ipv6.ip6_null_entry;
928 } else {
929 rt = rt6_device_match(net, rt, &fl6->saddr,
930 fl6->flowi6_oif, flags);
931 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
932 rt = rt6_multipath_select(rt, fl6,
933 fl6->flowi6_oif, flags);
934 }
a3c00e46
MKL
935 if (rt == net->ipv6.ip6_null_entry) {
936 fn = fib6_backtrack(fn, &fl6->saddr);
937 if (fn)
938 goto restart;
939 }
2b760fcf
WW
940 /* Search through exception table */
941 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
942 if (rt_cache)
943 rt = rt_cache;
944
d3843fe5
WW
945 if (ip6_hold_safe(net, &rt, true))
946 dst_use_noref(&rt->dst, jiffies);
947
66f5d6ce 948 rcu_read_unlock();
b811580d 949
b65f164d 950 trace_fib6_table_lookup(net, rt, table, fl6);
b811580d 951
c71099ac
TG
952 return rt;
953
954}
955
67ba4152 956struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
957 int flags)
958{
959 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
960}
961EXPORT_SYMBOL_GPL(ip6_route_lookup);
962
9acd9f3a
YH
963struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
964 const struct in6_addr *saddr, int oif, int strict)
c71099ac 965{
4c9483b2
DM
966 struct flowi6 fl6 = {
967 .flowi6_oif = oif,
968 .daddr = *daddr,
c71099ac
TG
969 };
970 struct dst_entry *dst;
77d16f45 971 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 972
adaa70bb 973 if (saddr) {
4c9483b2 974 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
975 flags |= RT6_LOOKUP_F_HAS_SADDR;
976 }
977
4c9483b2 978 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
979 if (dst->error == 0)
980 return (struct rt6_info *) dst;
981
982 dst_release(dst);
983
1da177e4
LT
984 return NULL;
985}
7159039a
YH
986EXPORT_SYMBOL(rt6_lookup);
987
c71099ac 988/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
989 * It takes new route entry, the addition fails by any reason the
990 * route is released.
991 * Caller must hold dst before calling it.
1da177e4
LT
992 */
993
e5fd387a 994static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301
DA
995 struct mx6_config *mxc,
996 struct netlink_ext_ack *extack)
1da177e4
LT
997{
998 int err;
c71099ac 999 struct fib6_table *table;
1da177e4 1000
c71099ac 1001 table = rt->rt6i_table;
66f5d6ce 1002 spin_lock_bh(&table->tb6_lock);
333c4301 1003 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
66f5d6ce 1004 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1005
1006 return err;
1007}
1008
40e22e8f
TG
1009int ip6_ins_rt(struct rt6_info *rt)
1010{
e715b6d3
FW
1011 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
1012 struct mx6_config mxc = { .mx = NULL, };
1013
1cfb71ee
WW
1014 /* Hold dst to account for the reference from the fib6 tree */
1015 dst_hold(&rt->dst);
333c4301 1016 return __ip6_ins_rt(rt, &info, &mxc, NULL);
40e22e8f
TG
1017}
1018
4832c30d
DA
1019/* called with rcu_lock held */
1020static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
1021{
1022 struct net_device *dev = rt->dst.dev;
1023
98d11291 1024 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
4832c30d
DA
1025 /* for copies of local routes, dst->dev needs to be the
1026 * device if it is a master device, the master device if
1027 * device is enslaved, and the loopback as the default
1028 */
1029 if (netif_is_l3_slave(dev) &&
1030 !rt6_need_strict(&rt->rt6i_dst.addr))
1031 dev = l3mdev_master_dev_rcu(dev);
1032 else if (!netif_is_l3_master(dev))
1033 dev = dev_net(dev)->loopback_dev;
1034 /* last case is netif_is_l3_master(dev) is true in which
1035 * case we want dev returned to be dev
1036 */
1037 }
1038
1039 return dev;
1040}
1041
8b9df265
MKL
1042static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1043 const struct in6_addr *daddr,
1044 const struct in6_addr *saddr)
1da177e4 1045{
4832c30d 1046 struct net_device *dev;
1da177e4
LT
1047 struct rt6_info *rt;
1048
1049 /*
1050 * Clone the route.
1051 */
1052
d52d3997 1053 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
3a2232e9 1054 ort = ort->from;
1da177e4 1055
4832c30d
DA
1056 rcu_read_lock();
1057 dev = ip6_rt_get_dev_rcu(ort);
1058 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1059 rcu_read_unlock();
83a09abd
MKL
1060 if (!rt)
1061 return NULL;
1062
1063 ip6_rt_copy_init(rt, ort);
1064 rt->rt6i_flags |= RTF_CACHE;
1065 rt->rt6i_metric = 0;
1066 rt->dst.flags |= DST_HOST;
1067 rt->rt6i_dst.addr = *daddr;
1068 rt->rt6i_dst.plen = 128;
1da177e4 1069
83a09abd
MKL
1070 if (!rt6_is_gw_or_nonexthop(ort)) {
1071 if (ort->rt6i_dst.plen != 128 &&
1072 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1073 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1074#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1075 if (rt->rt6i_src.plen && saddr) {
1076 rt->rt6i_src.addr = *saddr;
1077 rt->rt6i_src.plen = 128;
8b9df265 1078 }
83a09abd 1079#endif
95a9a5ba 1080 }
1da177e4 1081
95a9a5ba
YH
1082 return rt;
1083}
1da177e4 1084
d52d3997
MKL
1085static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1086{
4832c30d 1087 struct net_device *dev;
d52d3997
MKL
1088 struct rt6_info *pcpu_rt;
1089
4832c30d
DA
1090 rcu_read_lock();
1091 dev = ip6_rt_get_dev_rcu(rt);
1092 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1093 rcu_read_unlock();
d52d3997
MKL
1094 if (!pcpu_rt)
1095 return NULL;
1096 ip6_rt_copy_init(pcpu_rt, rt);
1097 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1098 pcpu_rt->rt6i_flags |= RTF_PCPU;
1099 return pcpu_rt;
1100}
1101
66f5d6ce 1102/* It should be called with rcu_read_lock() acquired */
d52d3997
MKL
1103static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1104{
a73e4195 1105 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1106
1107 p = this_cpu_ptr(rt->rt6i_pcpu);
1108 pcpu_rt = *p;
1109
d3843fe5 1110 if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
a73e4195 1111 rt6_dst_from_metrics_check(pcpu_rt);
d3843fe5 1112
a73e4195
MKL
1113 return pcpu_rt;
1114}
1115
1116static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1117{
1118 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1119
1120 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1121 if (!pcpu_rt) {
1122 struct net *net = dev_net(rt->dst.dev);
1123
9c7370a1
MKL
1124 dst_hold(&net->ipv6.ip6_null_entry->dst);
1125 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1126 }
1127
a94b9367
WW
1128 dst_hold(&pcpu_rt->dst);
1129 p = this_cpu_ptr(rt->rt6i_pcpu);
1130 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1131 BUG_ON(prev);
a94b9367 1132
d52d3997
MKL
1133 rt6_dst_from_metrics_check(pcpu_rt);
1134 return pcpu_rt;
1135}
1136
35732d01
WW
1137/* exception hash table implementation
1138 */
1139static DEFINE_SPINLOCK(rt6_exception_lock);
1140
1141/* Remove rt6_ex from hash table and free the memory
1142 * Caller must hold rt6_exception_lock
1143 */
1144static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1145 struct rt6_exception *rt6_ex)
1146{
b2427e67 1147 struct net *net;
81eb8447 1148
35732d01
WW
1149 if (!bucket || !rt6_ex)
1150 return;
b2427e67
CIK
1151
1152 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01
WW
1153 rt6_ex->rt6i->rt6i_node = NULL;
1154 hlist_del_rcu(&rt6_ex->hlist);
1155 rt6_release(rt6_ex->rt6i);
1156 kfree_rcu(rt6_ex, rcu);
1157 WARN_ON_ONCE(!bucket->depth);
1158 bucket->depth--;
81eb8447 1159 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1160}
1161
1162/* Remove oldest rt6_ex in bucket and free the memory
1163 * Caller must hold rt6_exception_lock
1164 */
1165static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1166{
1167 struct rt6_exception *rt6_ex, *oldest = NULL;
1168
1169 if (!bucket)
1170 return;
1171
1172 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1173 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1174 oldest = rt6_ex;
1175 }
1176 rt6_remove_exception(bucket, oldest);
1177}
1178
1179static u32 rt6_exception_hash(const struct in6_addr *dst,
1180 const struct in6_addr *src)
1181{
1182 static u32 seed __read_mostly;
1183 u32 val;
1184
1185 net_get_random_once(&seed, sizeof(seed));
1186 val = jhash(dst, sizeof(*dst), seed);
1187
1188#ifdef CONFIG_IPV6_SUBTREES
1189 if (src)
1190 val = jhash(src, sizeof(*src), val);
1191#endif
1192 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1193}
1194
1195/* Helper function to find the cached rt in the hash table
1196 * and update bucket pointer to point to the bucket for this
1197 * (daddr, saddr) pair
1198 * Caller must hold rt6_exception_lock
1199 */
1200static struct rt6_exception *
1201__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1202 const struct in6_addr *daddr,
1203 const struct in6_addr *saddr)
1204{
1205 struct rt6_exception *rt6_ex;
1206 u32 hval;
1207
1208 if (!(*bucket) || !daddr)
1209 return NULL;
1210
1211 hval = rt6_exception_hash(daddr, saddr);
1212 *bucket += hval;
1213
1214 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1215 struct rt6_info *rt6 = rt6_ex->rt6i;
1216 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1217
1218#ifdef CONFIG_IPV6_SUBTREES
1219 if (matched && saddr)
1220 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1221#endif
1222 if (matched)
1223 return rt6_ex;
1224 }
1225 return NULL;
1226}
1227
1228/* Helper function to find the cached rt in the hash table
1229 * and update bucket pointer to point to the bucket for this
1230 * (daddr, saddr) pair
1231 * Caller must hold rcu_read_lock()
1232 */
1233static struct rt6_exception *
1234__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1235 const struct in6_addr *daddr,
1236 const struct in6_addr *saddr)
1237{
1238 struct rt6_exception *rt6_ex;
1239 u32 hval;
1240
1241 WARN_ON_ONCE(!rcu_read_lock_held());
1242
1243 if (!(*bucket) || !daddr)
1244 return NULL;
1245
1246 hval = rt6_exception_hash(daddr, saddr);
1247 *bucket += hval;
1248
1249 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1250 struct rt6_info *rt6 = rt6_ex->rt6i;
1251 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1252
1253#ifdef CONFIG_IPV6_SUBTREES
1254 if (matched && saddr)
1255 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1256#endif
1257 if (matched)
1258 return rt6_ex;
1259 }
1260 return NULL;
1261}
1262
1263static int rt6_insert_exception(struct rt6_info *nrt,
1264 struct rt6_info *ort)
1265{
81eb8447 1266 struct net *net = dev_net(ort->dst.dev);
35732d01
WW
1267 struct rt6_exception_bucket *bucket;
1268 struct in6_addr *src_key = NULL;
1269 struct rt6_exception *rt6_ex;
1270 int err = 0;
1271
1272 /* ort can't be a cache or pcpu route */
1273 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
3a2232e9 1274 ort = ort->from;
35732d01
WW
1275 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1276
1277 spin_lock_bh(&rt6_exception_lock);
1278
1279 if (ort->exception_bucket_flushed) {
1280 err = -EINVAL;
1281 goto out;
1282 }
1283
1284 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1285 lockdep_is_held(&rt6_exception_lock));
1286 if (!bucket) {
1287 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1288 GFP_ATOMIC);
1289 if (!bucket) {
1290 err = -ENOMEM;
1291 goto out;
1292 }
1293 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1294 }
1295
1296#ifdef CONFIG_IPV6_SUBTREES
1297 /* rt6i_src.plen != 0 indicates ort is in subtree
1298 * and exception table is indexed by a hash of
1299 * both rt6i_dst and rt6i_src.
1300 * Otherwise, the exception table is indexed by
1301 * a hash of only rt6i_dst.
1302 */
1303 if (ort->rt6i_src.plen)
1304 src_key = &nrt->rt6i_src.addr;
1305#endif
60006a48
WW
1306
1307 /* Update rt6i_prefsrc as it could be changed
1308 * in rt6_remove_prefsrc()
1309 */
1310 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
f5bbe7ee
WW
1311 /* rt6_mtu_change() might lower mtu on ort.
1312 * Only insert this exception route if its mtu
1313 * is less than ort's mtu value.
1314 */
1315 if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
1316 err = -EINVAL;
1317 goto out;
1318 }
60006a48 1319
35732d01
WW
1320 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1321 src_key);
1322 if (rt6_ex)
1323 rt6_remove_exception(bucket, rt6_ex);
1324
1325 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1326 if (!rt6_ex) {
1327 err = -ENOMEM;
1328 goto out;
1329 }
1330 rt6_ex->rt6i = nrt;
1331 rt6_ex->stamp = jiffies;
1332 atomic_inc(&nrt->rt6i_ref);
1333 nrt->rt6i_node = ort->rt6i_node;
1334 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1335 bucket->depth++;
81eb8447 1336 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1337
1338 if (bucket->depth > FIB6_MAX_DEPTH)
1339 rt6_exception_remove_oldest(bucket);
1340
1341out:
1342 spin_unlock_bh(&rt6_exception_lock);
1343
1344 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1345 if (!err) {
922c2ac8 1346 spin_lock_bh(&ort->rt6i_table->tb6_lock);
35732d01 1347 fib6_update_sernum(ort);
922c2ac8 1348 spin_unlock_bh(&ort->rt6i_table->tb6_lock);
b886d5f2
PA
1349 fib6_force_start_gc(net);
1350 }
35732d01
WW
1351
1352 return err;
1353}
1354
1355void rt6_flush_exceptions(struct rt6_info *rt)
1356{
1357 struct rt6_exception_bucket *bucket;
1358 struct rt6_exception *rt6_ex;
1359 struct hlist_node *tmp;
1360 int i;
1361
1362 spin_lock_bh(&rt6_exception_lock);
1363 /* Prevent rt6_insert_exception() to recreate the bucket list */
1364 rt->exception_bucket_flushed = 1;
1365
1366 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1367 lockdep_is_held(&rt6_exception_lock));
1368 if (!bucket)
1369 goto out;
1370
1371 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1372 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1373 rt6_remove_exception(bucket, rt6_ex);
1374 WARN_ON_ONCE(bucket->depth);
1375 bucket++;
1376 }
1377
1378out:
1379 spin_unlock_bh(&rt6_exception_lock);
1380}
1381
1382/* Find cached rt in the hash table inside passed in rt
1383 * Caller has to hold rcu_read_lock()
1384 */
1385static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1386 struct in6_addr *daddr,
1387 struct in6_addr *saddr)
1388{
1389 struct rt6_exception_bucket *bucket;
1390 struct in6_addr *src_key = NULL;
1391 struct rt6_exception *rt6_ex;
1392 struct rt6_info *res = NULL;
1393
1394 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1395
1396#ifdef CONFIG_IPV6_SUBTREES
1397 /* rt6i_src.plen != 0 indicates rt is in subtree
1398 * and exception table is indexed by a hash of
1399 * both rt6i_dst and rt6i_src.
1400 * Otherwise, the exception table is indexed by
1401 * a hash of only rt6i_dst.
1402 */
1403 if (rt->rt6i_src.plen)
1404 src_key = saddr;
1405#endif
1406 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1407
1408 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1409 res = rt6_ex->rt6i;
1410
1411 return res;
1412}
1413
1414/* Remove the passed in cached rt from the hash table that contains it */
1415int rt6_remove_exception_rt(struct rt6_info *rt)
1416{
35732d01 1417 struct rt6_exception_bucket *bucket;
3a2232e9 1418 struct rt6_info *from = rt->from;
35732d01
WW
1419 struct in6_addr *src_key = NULL;
1420 struct rt6_exception *rt6_ex;
1421 int err;
1422
1423 if (!from ||
442d713b 1424 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1425 return -EINVAL;
1426
1427 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1428 return -ENOENT;
1429
1430 spin_lock_bh(&rt6_exception_lock);
1431 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1432 lockdep_is_held(&rt6_exception_lock));
1433#ifdef CONFIG_IPV6_SUBTREES
1434 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1435 * and exception table is indexed by a hash of
1436 * both rt6i_dst and rt6i_src.
1437 * Otherwise, the exception table is indexed by
1438 * a hash of only rt6i_dst.
1439 */
1440 if (from->rt6i_src.plen)
1441 src_key = &rt->rt6i_src.addr;
1442#endif
1443 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1444 &rt->rt6i_dst.addr,
1445 src_key);
1446 if (rt6_ex) {
1447 rt6_remove_exception(bucket, rt6_ex);
1448 err = 0;
1449 } else {
1450 err = -ENOENT;
1451 }
1452
1453 spin_unlock_bh(&rt6_exception_lock);
1454 return err;
1455}
1456
1457/* Find rt6_ex which contains the passed in rt cache and
1458 * refresh its stamp
1459 */
1460static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1461{
35732d01 1462 struct rt6_exception_bucket *bucket;
3a2232e9 1463 struct rt6_info *from = rt->from;
35732d01
WW
1464 struct in6_addr *src_key = NULL;
1465 struct rt6_exception *rt6_ex;
1466
1467 if (!from ||
442d713b 1468 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1469 return;
1470
1471 rcu_read_lock();
1472 bucket = rcu_dereference(from->rt6i_exception_bucket);
1473
1474#ifdef CONFIG_IPV6_SUBTREES
1475 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1476 * and exception table is indexed by a hash of
1477 * both rt6i_dst and rt6i_src.
1478 * Otherwise, the exception table is indexed by
1479 * a hash of only rt6i_dst.
1480 */
1481 if (from->rt6i_src.plen)
1482 src_key = &rt->rt6i_src.addr;
1483#endif
1484 rt6_ex = __rt6_find_exception_rcu(&bucket,
1485 &rt->rt6i_dst.addr,
1486 src_key);
1487 if (rt6_ex)
1488 rt6_ex->stamp = jiffies;
1489
1490 rcu_read_unlock();
1491}
1492
60006a48
WW
1493static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1494{
1495 struct rt6_exception_bucket *bucket;
1496 struct rt6_exception *rt6_ex;
1497 int i;
1498
1499 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1500 lockdep_is_held(&rt6_exception_lock));
1501
1502 if (bucket) {
1503 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1504 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1505 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1506 }
1507 bucket++;
1508 }
1509 }
1510}
1511
e9fa1495
SB
1512static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1513 struct rt6_info *rt, int mtu)
1514{
1515 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1516 * lowest MTU in the path: always allow updating the route PMTU to
1517 * reflect PMTU decreases.
1518 *
1519 * If the new MTU is higher, and the route PMTU is equal to the local
1520 * MTU, this means the old MTU is the lowest in the path, so allow
1521 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1522 * handle this.
1523 */
1524
1525 if (dst_mtu(&rt->dst) >= mtu)
1526 return true;
1527
1528 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1529 return true;
1530
1531 return false;
1532}
1533
1534static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1535 struct rt6_info *rt, int mtu)
f5bbe7ee
WW
1536{
1537 struct rt6_exception_bucket *bucket;
1538 struct rt6_exception *rt6_ex;
1539 int i;
1540
1541 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1542 lockdep_is_held(&rt6_exception_lock));
1543
e9fa1495
SB
1544 if (!bucket)
1545 return;
1546
1547 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1548 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1549 struct rt6_info *entry = rt6_ex->rt6i;
1550
1551 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1552 * route), the metrics of its rt->dst.from have already
1553 * been updated.
1554 */
1555 if (entry->rt6i_pmtu &&
1556 rt6_mtu_change_route_allowed(idev, entry, mtu))
1557 entry->rt6i_pmtu = mtu;
f5bbe7ee 1558 }
e9fa1495 1559 bucket++;
f5bbe7ee
WW
1560 }
1561}
1562
b16cb459
WW
1563#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1564
1565static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1566 struct in6_addr *gateway)
1567{
1568 struct rt6_exception_bucket *bucket;
1569 struct rt6_exception *rt6_ex;
1570 struct hlist_node *tmp;
1571 int i;
1572
1573 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1574 return;
1575
1576 spin_lock_bh(&rt6_exception_lock);
1577 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1578 lockdep_is_held(&rt6_exception_lock));
1579
1580 if (bucket) {
1581 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1582 hlist_for_each_entry_safe(rt6_ex, tmp,
1583 &bucket->chain, hlist) {
1584 struct rt6_info *entry = rt6_ex->rt6i;
1585
1586 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1587 RTF_CACHE_GATEWAY &&
1588 ipv6_addr_equal(gateway,
1589 &entry->rt6i_gateway)) {
1590 rt6_remove_exception(bucket, rt6_ex);
1591 }
1592 }
1593 bucket++;
1594 }
1595 }
1596
1597 spin_unlock_bh(&rt6_exception_lock);
1598}
1599
c757faa8
WW
1600static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1601 struct rt6_exception *rt6_ex,
1602 struct fib6_gc_args *gc_args,
1603 unsigned long now)
1604{
1605 struct rt6_info *rt = rt6_ex->rt6i;
1606
1859bac0
PA
1607 /* we are pruning and obsoleting aged-out and non gateway exceptions
1608 * even if others have still references to them, so that on next
1609 * dst_check() such references can be dropped.
1610 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1611 * expired, independently from their aging, as per RFC 8201 section 4
1612 */
31afeb42
WW
1613 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1614 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1615 RT6_TRACE("aging clone %p\n", rt);
1616 rt6_remove_exception(bucket, rt6_ex);
1617 return;
1618 }
1619 } else if (time_after(jiffies, rt->dst.expires)) {
1620 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1621 rt6_remove_exception(bucket, rt6_ex);
1622 return;
31afeb42
WW
1623 }
1624
1625 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1626 struct neighbour *neigh;
1627 __u8 neigh_flags = 0;
1628
1629 neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
1630 if (neigh) {
1631 neigh_flags = neigh->flags;
1632 neigh_release(neigh);
1633 }
1634 if (!(neigh_flags & NTF_ROUTER)) {
1635 RT6_TRACE("purging route %p via non-router but gateway\n",
1636 rt);
1637 rt6_remove_exception(bucket, rt6_ex);
1638 return;
1639 }
1640 }
31afeb42 1641
c757faa8
WW
1642 gc_args->more++;
1643}
1644
1645void rt6_age_exceptions(struct rt6_info *rt,
1646 struct fib6_gc_args *gc_args,
1647 unsigned long now)
1648{
1649 struct rt6_exception_bucket *bucket;
1650 struct rt6_exception *rt6_ex;
1651 struct hlist_node *tmp;
1652 int i;
1653
1654 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1655 return;
1656
1657 spin_lock_bh(&rt6_exception_lock);
1658 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1659 lockdep_is_held(&rt6_exception_lock));
1660
1661 if (bucket) {
1662 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1663 hlist_for_each_entry_safe(rt6_ex, tmp,
1664 &bucket->chain, hlist) {
1665 rt6_age_examine_exception(bucket, rt6_ex,
1666 gc_args, now);
1667 }
1668 bucket++;
1669 }
1670 }
1671 spin_unlock_bh(&rt6_exception_lock);
1672}
1673
9ff74384
DA
1674struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1675 int oif, struct flowi6 *fl6, int flags)
1da177e4 1676{
367efcb9 1677 struct fib6_node *fn, *saved_fn;
2b760fcf 1678 struct rt6_info *rt, *rt_cache;
c71099ac 1679 int strict = 0;
1da177e4 1680
77d16f45 1681 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1682 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1683 if (net->ipv6.devconf_all->forwarding == 0)
1684 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1685
66f5d6ce 1686 rcu_read_lock();
1da177e4 1687
4c9483b2 1688 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1689 saved_fn = fn;
1da177e4 1690
ca254490
DA
1691 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1692 oif = 0;
1693
a3c00e46 1694redo_rt6_select:
8d1040e8 1695 rt = rt6_select(net, fn, oif, strict);
52bd4c0c 1696 if (rt->rt6i_nsiblings)
367efcb9 1697 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1698 if (rt == net->ipv6.ip6_null_entry) {
1699 fn = fib6_backtrack(fn, &fl6->saddr);
1700 if (fn)
1701 goto redo_rt6_select;
367efcb9
MKL
1702 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1703 /* also consider unreachable route */
1704 strict &= ~RT6_LOOKUP_F_REACHABLE;
1705 fn = saved_fn;
1706 goto redo_rt6_select;
367efcb9 1707 }
a3c00e46
MKL
1708 }
1709
2b760fcf
WW
1710 /*Search through exception table */
1711 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1712 if (rt_cache)
1713 rt = rt_cache;
fb9de91e 1714
d3843fe5 1715 if (rt == net->ipv6.ip6_null_entry) {
66f5d6ce 1716 rcu_read_unlock();
d3843fe5 1717 dst_hold(&rt->dst);
b65f164d 1718 trace_fib6_table_lookup(net, rt, table, fl6);
d3843fe5
WW
1719 return rt;
1720 } else if (rt->rt6i_flags & RTF_CACHE) {
1721 if (ip6_hold_safe(net, &rt, true)) {
1722 dst_use_noref(&rt->dst, jiffies);
1723 rt6_dst_from_metrics_check(rt);
1724 }
66f5d6ce 1725 rcu_read_unlock();
b65f164d 1726 trace_fib6_table_lookup(net, rt, table, fl6);
d52d3997 1727 return rt;
3da59bd9
MKL
1728 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1729 !(rt->rt6i_flags & RTF_GATEWAY))) {
1730 /* Create a RTF_CACHE clone which will not be
1731 * owned by the fib6 tree. It is for the special case where
1732 * the daddr in the skb during the neighbor look-up is different
1733 * from the fl6->daddr used to look-up route here.
1734 */
1735
1736 struct rt6_info *uncached_rt;
1737
d3843fe5
WW
1738 if (ip6_hold_safe(net, &rt, true)) {
1739 dst_use_noref(&rt->dst, jiffies);
1740 } else {
66f5d6ce 1741 rcu_read_unlock();
d3843fe5
WW
1742 uncached_rt = rt;
1743 goto uncached_rt_out;
1744 }
66f5d6ce 1745 rcu_read_unlock();
d52d3997 1746
3da59bd9
MKL
1747 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1748 dst_release(&rt->dst);
c71099ac 1749
1cfb71ee
WW
1750 if (uncached_rt) {
1751 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1752 * No need for another dst_hold()
1753 */
8d0b94af 1754 rt6_uncached_list_add(uncached_rt);
81eb8447 1755 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1756 } else {
3da59bd9 1757 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1758 dst_hold(&uncached_rt->dst);
1759 }
b811580d 1760
d3843fe5 1761uncached_rt_out:
b65f164d 1762 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
3da59bd9 1763 return uncached_rt;
3da59bd9 1764
d52d3997
MKL
1765 } else {
1766 /* Get a percpu copy */
1767
1768 struct rt6_info *pcpu_rt;
1769
d3843fe5 1770 dst_use_noref(&rt->dst, jiffies);
951f788a 1771 local_bh_disable();
d52d3997 1772 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1773
951f788a 1774 if (!pcpu_rt) {
a94b9367
WW
1775 /* atomic_inc_not_zero() is needed when using rcu */
1776 if (atomic_inc_not_zero(&rt->rt6i_ref)) {
951f788a 1777 /* No dst_hold() on rt is needed because grabbing
a94b9367
WW
1778 * rt->rt6i_ref makes sure rt can't be released.
1779 */
a94b9367
WW
1780 pcpu_rt = rt6_make_pcpu_route(rt);
1781 rt6_release(rt);
1782 } else {
1783 /* rt is already removed from tree */
a94b9367
WW
1784 pcpu_rt = net->ipv6.ip6_null_entry;
1785 dst_hold(&pcpu_rt->dst);
1786 }
9c7370a1 1787 }
951f788a
ED
1788 local_bh_enable();
1789 rcu_read_unlock();
b65f164d 1790 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
d52d3997
MKL
1791 return pcpu_rt;
1792 }
1da177e4 1793}
9ff74384 1794EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1795
8ed67789 1796static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1797 struct flowi6 *fl6, int flags)
4acad72d 1798{
4c9483b2 1799 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1800}
1801
d409b847
MB
1802struct dst_entry *ip6_route_input_lookup(struct net *net,
1803 struct net_device *dev,
1804 struct flowi6 *fl6, int flags)
72331bc0
SL
1805{
1806 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1807 flags |= RT6_LOOKUP_F_IFACE;
1808
1809 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1810}
d409b847 1811EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1812
23aebdac
JS
1813static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1814 struct flow_keys *keys)
1815{
1816 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1817 const struct ipv6hdr *key_iph = outer_iph;
1818 const struct ipv6hdr *inner_iph;
1819 const struct icmp6hdr *icmph;
1820 struct ipv6hdr _inner_iph;
1821
1822 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1823 goto out;
1824
1825 icmph = icmp6_hdr(skb);
1826 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1827 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1828 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1829 icmph->icmp6_type != ICMPV6_PARAMPROB)
1830 goto out;
1831
1832 inner_iph = skb_header_pointer(skb,
1833 skb_transport_offset(skb) + sizeof(*icmph),
1834 sizeof(_inner_iph), &_inner_iph);
1835 if (!inner_iph)
1836 goto out;
1837
1838 key_iph = inner_iph;
1839out:
1840 memset(keys, 0, sizeof(*keys));
1841 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1842 keys->addrs.v6addrs.src = key_iph->saddr;
1843 keys->addrs.v6addrs.dst = key_iph->daddr;
1844 keys->tags.flow_label = ip6_flowinfo(key_iph);
1845 keys->basic.ip_proto = key_iph->nexthdr;
1846}
1847
1848/* if skb is set it will be used and fl6 can be NULL */
1849u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1850{
1851 struct flow_keys hash_keys;
1852
1853 if (skb) {
1854 ip6_multipath_l3_keys(skb, &hash_keys);
7696c06a 1855 return flow_hash_from_keys(&hash_keys) >> 1;
23aebdac
JS
1856 }
1857
7696c06a 1858 return get_hash_from_flowi6(fl6) >> 1;
23aebdac
JS
1859}
1860
c71099ac
TG
1861void ip6_route_input(struct sk_buff *skb)
1862{
b71d1d42 1863 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1864 struct net *net = dev_net(skb->dev);
adaa70bb 1865 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1866 struct ip_tunnel_info *tun_info;
4c9483b2 1867 struct flowi6 fl6 = {
e0d56fdd 1868 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
1869 .daddr = iph->daddr,
1870 .saddr = iph->saddr,
6502ca52 1871 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1872 .flowi6_mark = skb->mark,
1873 .flowi6_proto = iph->nexthdr,
c71099ac 1874 };
adaa70bb 1875
904af04d 1876 tun_info = skb_tunnel_info(skb);
46fa062a 1877 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1878 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
23aebdac
JS
1879 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1880 fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
06e9d040 1881 skb_dst_drop(skb);
72331bc0 1882 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1883}
1884
8ed67789 1885static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1886 struct flowi6 *fl6, int flags)
1da177e4 1887{
4c9483b2 1888 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1889}
1890
6f21c96a
PA
1891struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1892 struct flowi6 *fl6, int flags)
c71099ac 1893{
d46a9d67 1894 bool any_src;
c71099ac 1895
4c1feac5
DA
1896 if (rt6_need_strict(&fl6->daddr)) {
1897 struct dst_entry *dst;
1898
1899 dst = l3mdev_link_scope_lookup(net, fl6);
1900 if (dst)
1901 return dst;
1902 }
ca254490 1903
1fb9489b 1904 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1905
d46a9d67 1906 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1907 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1908 (fl6->flowi6_oif && any_src))
77d16f45 1909 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1910
d46a9d67 1911 if (!any_src)
adaa70bb 1912 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1913 else if (sk)
1914 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1915
4c9483b2 1916 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1917}
6f21c96a 1918EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1919
2774c131 1920struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1921{
5c1e6aa3 1922 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 1923 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
1924 struct dst_entry *new = NULL;
1925
1dbe3252 1926 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 1927 DST_OBSOLETE_DEAD, 0);
14e50e57 1928 if (rt) {
0a1f5962 1929 rt6_info_init(rt);
81eb8447 1930 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 1931
0a1f5962 1932 new = &rt->dst;
14e50e57 1933 new->__use = 1;
352e512c 1934 new->input = dst_discard;
ede2059d 1935 new->output = dst_discard_out;
14e50e57 1936
0a1f5962 1937 dst_copy_metrics(new, &ort->dst);
14e50e57 1938
1dbe3252 1939 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 1940 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1941 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1942 rt->rt6i_metric = 0;
1943
1944 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1945#ifdef CONFIG_IPV6_SUBTREES
1946 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1947#endif
14e50e57
DM
1948 }
1949
69ead7af
DM
1950 dst_release(dst_orig);
1951 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1952}
14e50e57 1953
1da177e4
LT
1954/*
1955 * Destination cache support functions
1956 */
1957
4b32b5ad
MKL
1958static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1959{
3a2232e9
DM
1960 if (rt->from &&
1961 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst))
1962 dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true);
4b32b5ad
MKL
1963}
1964
3da59bd9
MKL
1965static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1966{
36143645 1967 u32 rt_cookie = 0;
c5cff856
WW
1968
1969 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
3da59bd9
MKL
1970 return NULL;
1971
1972 if (rt6_check_expired(rt))
1973 return NULL;
1974
1975 return &rt->dst;
1976}
1977
1978static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1979{
5973fb1e
MKL
1980 if (!__rt6_check_expired(rt) &&
1981 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3a2232e9 1982 rt6_check(rt->from, cookie))
3da59bd9
MKL
1983 return &rt->dst;
1984 else
1985 return NULL;
1986}
1987
1da177e4
LT
1988static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1989{
1990 struct rt6_info *rt;
1991
1992 rt = (struct rt6_info *) dst;
1993
6f3118b5
ND
1994 /* All IPV6 dsts are created with ->obsolete set to the value
1995 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1996 * into this function always.
1997 */
e3bc10bd 1998
4b32b5ad
MKL
1999 rt6_dst_from_metrics_check(rt);
2000
02bcf4e0 2001 if (rt->rt6i_flags & RTF_PCPU ||
3a2232e9 2002 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
3da59bd9
MKL
2003 return rt6_dst_from_check(rt, cookie);
2004 else
2005 return rt6_check(rt, cookie);
1da177e4
LT
2006}
2007
2008static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2009{
2010 struct rt6_info *rt = (struct rt6_info *) dst;
2011
2012 if (rt) {
54c1a859
YH
2013 if (rt->rt6i_flags & RTF_CACHE) {
2014 if (rt6_check_expired(rt)) {
2015 ip6_del_rt(rt);
2016 dst = NULL;
2017 }
2018 } else {
1da177e4 2019 dst_release(dst);
54c1a859
YH
2020 dst = NULL;
2021 }
1da177e4 2022 }
54c1a859 2023 return dst;
1da177e4
LT
2024}
2025
2026static void ip6_link_failure(struct sk_buff *skb)
2027{
2028 struct rt6_info *rt;
2029
3ffe533c 2030 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2031
adf30907 2032 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2033 if (rt) {
1eb4f758 2034 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0
WW
2035 if (dst_hold_safe(&rt->dst))
2036 ip6_del_rt(rt);
c5cff856
WW
2037 } else {
2038 struct fib6_node *fn;
2039
2040 rcu_read_lock();
2041 fn = rcu_dereference(rt->rt6i_node);
2042 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2043 fn->fn_sernum = -1;
2044 rcu_read_unlock();
1eb4f758 2045 }
1da177e4
LT
2046 }
2047}
2048
45e4fd26
MKL
2049static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2050{
2051 struct net *net = dev_net(rt->dst.dev);
2052
2053 rt->rt6i_flags |= RTF_MODIFIED;
2054 rt->rt6i_pmtu = mtu;
2055 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2056}
2057
0d3f6d29
MKL
2058static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2059{
2060 return !(rt->rt6i_flags & RTF_CACHE) &&
4e587ea7
WW
2061 (rt->rt6i_flags & RTF_PCPU ||
2062 rcu_access_pointer(rt->rt6i_node));
0d3f6d29
MKL
2063}
2064
45e4fd26
MKL
2065static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2066 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2067{
0dec879f 2068 const struct in6_addr *daddr, *saddr;
67ba4152 2069 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2070
45e4fd26
MKL
2071 if (rt6->rt6i_flags & RTF_LOCAL)
2072 return;
81aded24 2073
19bda36c
XL
2074 if (dst_metric_locked(dst, RTAX_MTU))
2075 return;
2076
0dec879f
JA
2077 if (iph) {
2078 daddr = &iph->daddr;
2079 saddr = &iph->saddr;
2080 } else if (sk) {
2081 daddr = &sk->sk_v6_daddr;
2082 saddr = &inet6_sk(sk)->saddr;
2083 } else {
2084 daddr = NULL;
2085 saddr = NULL;
2086 }
2087 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2088 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2089 if (mtu >= dst_mtu(dst))
2090 return;
9d289715 2091
0d3f6d29 2092 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2093 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2094 /* update rt6_ex->stamp for cache */
2095 if (rt6->rt6i_flags & RTF_CACHE)
2096 rt6_update_exception_stamp_rt(rt6);
0dec879f 2097 } else if (daddr) {
45e4fd26
MKL
2098 struct rt6_info *nrt6;
2099
45e4fd26
MKL
2100 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
2101 if (nrt6) {
2102 rt6_do_update_pmtu(nrt6, mtu);
2b760fcf
WW
2103 if (rt6_insert_exception(nrt6, rt6))
2104 dst_release_immediate(&nrt6->dst);
45e4fd26 2105 }
1da177e4
LT
2106 }
2107}
2108
45e4fd26
MKL
2109static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2110 struct sk_buff *skb, u32 mtu)
2111{
2112 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2113}
2114
42ae66c8 2115void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2116 int oif, u32 mark, kuid_t uid)
81aded24
DM
2117{
2118 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2119 struct dst_entry *dst;
2120 struct flowi6 fl6;
2121
2122 memset(&fl6, 0, sizeof(fl6));
2123 fl6.flowi6_oif = oif;
1b3c61dc 2124 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2125 fl6.daddr = iph->daddr;
2126 fl6.saddr = iph->saddr;
6502ca52 2127 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2128 fl6.flowi6_uid = uid;
81aded24
DM
2129
2130 dst = ip6_route_output(net, NULL, &fl6);
2131 if (!dst->error)
45e4fd26 2132 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2133 dst_release(dst);
2134}
2135EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2136
2137void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2138{
33c162a9
MKL
2139 struct dst_entry *dst;
2140
81aded24 2141 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2142 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2143
2144 dst = __sk_dst_get(sk);
2145 if (!dst || !dst->obsolete ||
2146 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2147 return;
2148
2149 bh_lock_sock(sk);
2150 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2151 ip6_datagram_dst_update(sk, false);
2152 bh_unlock_sock(sk);
81aded24
DM
2153}
2154EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2155
b55b76b2
DJ
2156/* Handle redirects */
2157struct ip6rd_flowi {
2158 struct flowi6 fl6;
2159 struct in6_addr gateway;
2160};
2161
2162static struct rt6_info *__ip6_route_redirect(struct net *net,
2163 struct fib6_table *table,
2164 struct flowi6 *fl6,
2165 int flags)
2166{
2167 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2b760fcf 2168 struct rt6_info *rt, *rt_cache;
b55b76b2
DJ
2169 struct fib6_node *fn;
2170
2171 /* Get the "current" route for this destination and
67c408cf 2172 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2173 *
2174 * RFC 4861 specifies that redirects should only be
2175 * accepted if they come from the nexthop to the target.
2176 * Due to the way the routes are chosen, this notion
2177 * is a bit fuzzy and one might need to check all possible
2178 * routes.
2179 */
2180
66f5d6ce 2181 rcu_read_lock();
b55b76b2
DJ
2182 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2183restart:
66f5d6ce 2184 for_each_fib6_node_rt_rcu(fn) {
8067bb8c
IS
2185 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
2186 continue;
b55b76b2
DJ
2187 if (rt6_check_expired(rt))
2188 continue;
2189 if (rt->dst.error)
2190 break;
2191 if (!(rt->rt6i_flags & RTF_GATEWAY))
2192 continue;
2193 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
2194 continue;
2b760fcf
WW
2195 /* rt_cache's gateway might be different from its 'parent'
2196 * in the case of an ip redirect.
2197 * So we keep searching in the exception table if the gateway
2198 * is different.
2199 */
2200 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
2201 rt_cache = rt6_find_cached_rt(rt,
2202 &fl6->daddr,
2203 &fl6->saddr);
2204 if (rt_cache &&
2205 ipv6_addr_equal(&rdfl->gateway,
2206 &rt_cache->rt6i_gateway)) {
2207 rt = rt_cache;
2208 break;
2209 }
b55b76b2 2210 continue;
2b760fcf 2211 }
b55b76b2
DJ
2212 break;
2213 }
2214
2215 if (!rt)
2216 rt = net->ipv6.ip6_null_entry;
2217 else if (rt->dst.error) {
2218 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2219 goto out;
2220 }
2221
2222 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
2223 fn = fib6_backtrack(fn, &fl6->saddr);
2224 if (fn)
2225 goto restart;
b55b76b2 2226 }
a3c00e46 2227
b0a1ba59 2228out:
d3843fe5 2229 ip6_hold_safe(net, &rt, true);
b55b76b2 2230
66f5d6ce 2231 rcu_read_unlock();
b55b76b2 2232
b65f164d 2233 trace_fib6_table_lookup(net, rt, table, fl6);
b55b76b2
DJ
2234 return rt;
2235};
2236
2237static struct dst_entry *ip6_route_redirect(struct net *net,
2238 const struct flowi6 *fl6,
2239 const struct in6_addr *gateway)
2240{
2241 int flags = RT6_LOOKUP_F_HAS_SADDR;
2242 struct ip6rd_flowi rdfl;
2243
2244 rdfl.fl6 = *fl6;
2245 rdfl.gateway = *gateway;
2246
2247 return fib6_rule_lookup(net, &rdfl.fl6,
2248 flags, __ip6_route_redirect);
2249}
2250
e2d118a1
LC
2251void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2252 kuid_t uid)
3a5ad2ee
DM
2253{
2254 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2255 struct dst_entry *dst;
2256 struct flowi6 fl6;
2257
2258 memset(&fl6, 0, sizeof(fl6));
e374c618 2259 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2260 fl6.flowi6_oif = oif;
2261 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2262 fl6.daddr = iph->daddr;
2263 fl6.saddr = iph->saddr;
6502ca52 2264 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2265 fl6.flowi6_uid = uid;
3a5ad2ee 2266
b55b76b2
DJ
2267 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
2268 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2269 dst_release(dst);
2270}
2271EXPORT_SYMBOL_GPL(ip6_redirect);
2272
c92a59ec
DJ
2273void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2274 u32 mark)
2275{
2276 const struct ipv6hdr *iph = ipv6_hdr(skb);
2277 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2278 struct dst_entry *dst;
2279 struct flowi6 fl6;
2280
2281 memset(&fl6, 0, sizeof(fl6));
e374c618 2282 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2283 fl6.flowi6_oif = oif;
2284 fl6.flowi6_mark = mark;
c92a59ec
DJ
2285 fl6.daddr = msg->dest;
2286 fl6.saddr = iph->daddr;
e2d118a1 2287 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2288
b55b76b2
DJ
2289 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
2290 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2291 dst_release(dst);
2292}
2293
3a5ad2ee
DM
2294void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2295{
e2d118a1
LC
2296 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2297 sk->sk_uid);
3a5ad2ee
DM
2298}
2299EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2300
0dbaee3b 2301static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2302{
0dbaee3b
DM
2303 struct net_device *dev = dst->dev;
2304 unsigned int mtu = dst_mtu(dst);
2305 struct net *net = dev_net(dev);
2306
1da177e4
LT
2307 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2308
5578689a
DL
2309 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2310 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2311
2312 /*
1ab1457c
YH
2313 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2314 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2315 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2316 * rely only on pmtu discovery"
2317 */
2318 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2319 mtu = IPV6_MAXPLEN;
2320 return mtu;
2321}
2322
ebb762f2 2323static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2324{
4b32b5ad
MKL
2325 const struct rt6_info *rt = (const struct rt6_info *)dst;
2326 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 2327 struct inet6_dev *idev;
618f9bc7 2328
4b32b5ad
MKL
2329 if (mtu)
2330 goto out;
2331
2332 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2333 if (mtu)
30f78d8e 2334 goto out;
618f9bc7
SK
2335
2336 mtu = IPV6_MIN_MTU;
d33e4553
DM
2337
2338 rcu_read_lock();
2339 idev = __in6_dev_get(dst->dev);
2340 if (idev)
2341 mtu = idev->cnf.mtu6;
2342 rcu_read_unlock();
2343
30f78d8e 2344out:
14972cbd
RP
2345 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2346
2347 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2348}
2349
3b00944c 2350struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2351 struct flowi6 *fl6)
1da177e4 2352{
87a11578 2353 struct dst_entry *dst;
1da177e4
LT
2354 struct rt6_info *rt;
2355 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2356 struct net *net = dev_net(dev);
1da177e4 2357
38308473 2358 if (unlikely(!idev))
122bdf67 2359 return ERR_PTR(-ENODEV);
1da177e4 2360
ad706862 2361 rt = ip6_dst_alloc(net, dev, 0);
38308473 2362 if (unlikely(!rt)) {
1da177e4 2363 in6_dev_put(idev);
87a11578 2364 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2365 goto out;
2366 }
2367
8e2ec639 2368 rt->dst.flags |= DST_HOST;
588753f1 2369 rt->dst.input = ip6_input;
8e2ec639 2370 rt->dst.output = ip6_output;
550bab42 2371 rt->rt6i_gateway = fl6->daddr;
87a11578 2372 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2373 rt->rt6i_dst.plen = 128;
2374 rt->rt6i_idev = idev;
14edd87d 2375 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2376
4c981e28 2377 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2378 * do proper release of the net_device
2379 */
2380 rt6_uncached_list_add(rt);
81eb8447 2381 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2382
87a11578
DM
2383 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2384
1da177e4 2385out:
87a11578 2386 return dst;
1da177e4
LT
2387}
2388
569d3645 2389static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2390{
86393e52 2391 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2392 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2393 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2394 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2395 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2396 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2397 int entries;
7019b78e 2398
fc66f95c 2399 entries = dst_entries_get_fast(ops);
49a18d86 2400 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2401 entries <= rt_max_size)
1da177e4
LT
2402 goto out;
2403
6891a346 2404 net->ipv6.ip6_rt_gc_expire++;
14956643 2405 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2406 entries = dst_entries_get_slow(ops);
2407 if (entries < ops->gc_thresh)
7019b78e 2408 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2409out:
7019b78e 2410 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2411 return entries > rt_max_size;
1da177e4
LT
2412}
2413
e715b6d3
FW
2414static int ip6_convert_metrics(struct mx6_config *mxc,
2415 const struct fib6_config *cfg)
2416{
6670e152 2417 struct net *net = cfg->fc_nlinfo.nl_net;
c3a8d947 2418 bool ecn_ca = false;
e715b6d3
FW
2419 struct nlattr *nla;
2420 int remaining;
2421 u32 *mp;
2422
63159f29 2423 if (!cfg->fc_mx)
e715b6d3
FW
2424 return 0;
2425
2426 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
2427 if (unlikely(!mp))
2428 return -ENOMEM;
2429
2430 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
2431 int type = nla_type(nla);
1bb14807 2432 u32 val;
e715b6d3 2433
1bb14807
DB
2434 if (!type)
2435 continue;
2436 if (unlikely(type > RTAX_MAX))
2437 goto err;
ea697639 2438
1bb14807
DB
2439 if (type == RTAX_CC_ALGO) {
2440 char tmp[TCP_CA_NAME_MAX];
e715b6d3 2441
1bb14807 2442 nla_strlcpy(tmp, nla, sizeof(tmp));
6670e152 2443 val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
1bb14807
DB
2444 if (val == TCP_CA_UNSPEC)
2445 goto err;
2446 } else {
2447 val = nla_get_u32(nla);
e715b6d3 2448 }
626abd59
PA
2449 if (type == RTAX_HOPLIMIT && val > 255)
2450 val = 255;
b8d3e416
DB
2451 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
2452 goto err;
1bb14807
DB
2453
2454 mp[type - 1] = val;
2455 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
2456 }
2457
c3a8d947
DB
2458 if (ecn_ca) {
2459 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
2460 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
2461 }
e715b6d3 2462
c3a8d947 2463 mxc->mx = mp;
e715b6d3
FW
2464 return 0;
2465 err:
2466 kfree(mp);
2467 return -EINVAL;
2468}
1da177e4 2469
8c14586f
DA
2470static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2471 struct fib6_config *cfg,
f4797b33
DA
2472 const struct in6_addr *gw_addr,
2473 u32 tbid, int flags)
8c14586f
DA
2474{
2475 struct flowi6 fl6 = {
2476 .flowi6_oif = cfg->fc_ifindex,
2477 .daddr = *gw_addr,
2478 .saddr = cfg->fc_prefsrc,
2479 };
2480 struct fib6_table *table;
2481 struct rt6_info *rt;
8c14586f 2482
f4797b33 2483 table = fib6_get_table(net, tbid);
8c14586f
DA
2484 if (!table)
2485 return NULL;
2486
2487 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2488 flags |= RT6_LOOKUP_F_HAS_SADDR;
2489
f4797b33 2490 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
8c14586f
DA
2491 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
2492
2493 /* if table lookup failed, fall back to full lookup */
2494 if (rt == net->ipv6.ip6_null_entry) {
2495 ip6_rt_put(rt);
2496 rt = NULL;
2497 }
2498
2499 return rt;
2500}
2501
fc1e64e1
DA
2502static int ip6_route_check_nh_onlink(struct net *net,
2503 struct fib6_config *cfg,
2504 struct net_device *dev,
2505 struct netlink_ext_ack *extack)
2506{
44750f84 2507 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2508 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2509 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2510 struct rt6_info *grt;
2511 int err;
2512
2513 err = 0;
2514 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2515 if (grt) {
58e354c0
DA
2516 if (!grt->dst.error &&
2517 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2518 NL_SET_ERR_MSG(extack,
2519 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2520 err = -EINVAL;
2521 }
2522
2523 ip6_rt_put(grt);
2524 }
2525
2526 return err;
2527}
2528
1edce99f
DA
2529static int ip6_route_check_nh(struct net *net,
2530 struct fib6_config *cfg,
2531 struct net_device **_dev,
2532 struct inet6_dev **idev)
2533{
2534 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2535 struct net_device *dev = _dev ? *_dev : NULL;
2536 struct rt6_info *grt = NULL;
2537 int err = -EHOSTUNREACH;
2538
2539 if (cfg->fc_table) {
f4797b33
DA
2540 int flags = RT6_LOOKUP_F_IFACE;
2541
2542 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2543 cfg->fc_table, flags);
1edce99f
DA
2544 if (grt) {
2545 if (grt->rt6i_flags & RTF_GATEWAY ||
2546 (dev && dev != grt->dst.dev)) {
2547 ip6_rt_put(grt);
2548 grt = NULL;
2549 }
2550 }
2551 }
2552
2553 if (!grt)
2554 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
2555
2556 if (!grt)
2557 goto out;
2558
2559 if (dev) {
2560 if (dev != grt->dst.dev) {
2561 ip6_rt_put(grt);
2562 goto out;
2563 }
2564 } else {
2565 *_dev = dev = grt->dst.dev;
2566 *idev = grt->rt6i_idev;
2567 dev_hold(dev);
2568 in6_dev_hold(grt->rt6i_idev);
2569 }
2570
2571 if (!(grt->rt6i_flags & RTF_GATEWAY))
2572 err = 0;
2573
2574 ip6_rt_put(grt);
2575
2576out:
2577 return err;
2578}
2579
333c4301
DA
2580static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2581 struct netlink_ext_ack *extack)
1da177e4 2582{
5578689a 2583 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
2584 struct rt6_info *rt = NULL;
2585 struct net_device *dev = NULL;
2586 struct inet6_dev *idev = NULL;
c71099ac 2587 struct fib6_table *table;
1da177e4 2588 int addr_type;
8c5b83f0 2589 int err = -EINVAL;
1da177e4 2590
557c44be 2591 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2592 if (cfg->fc_flags & RTF_PCPU) {
2593 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2594 goto out;
d5d531cb 2595 }
557c44be 2596
2ea2352e
WW
2597 /* RTF_CACHE is an internal flag; can not be set by userspace */
2598 if (cfg->fc_flags & RTF_CACHE) {
2599 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2600 goto out;
2601 }
2602
d5d531cb
DA
2603 if (cfg->fc_dst_len > 128) {
2604 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2605 goto out;
2606 }
2607 if (cfg->fc_src_len > 128) {
2608 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2609 goto out;
d5d531cb 2610 }
1da177e4 2611#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2612 if (cfg->fc_src_len) {
2613 NL_SET_ERR_MSG(extack,
2614 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2615 goto out;
d5d531cb 2616 }
1da177e4 2617#endif
86872cb5 2618 if (cfg->fc_ifindex) {
1da177e4 2619 err = -ENODEV;
5578689a 2620 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2621 if (!dev)
2622 goto out;
2623 idev = in6_dev_get(dev);
2624 if (!idev)
2625 goto out;
2626 }
2627
86872cb5
TG
2628 if (cfg->fc_metric == 0)
2629 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2630
fc1e64e1
DA
2631 if (cfg->fc_flags & RTNH_F_ONLINK) {
2632 if (!dev) {
2633 NL_SET_ERR_MSG(extack,
2634 "Nexthop device required for onlink");
2635 err = -ENODEV;
2636 goto out;
2637 }
2638
2639 if (!(dev->flags & IFF_UP)) {
2640 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2641 err = -ENETDOWN;
2642 goto out;
2643 }
2644 }
2645
d71314b4 2646 err = -ENOBUFS;
38308473
DM
2647 if (cfg->fc_nlinfo.nlh &&
2648 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2649 table = fib6_get_table(net, cfg->fc_table);
38308473 2650 if (!table) {
f3213831 2651 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2652 table = fib6_new_table(net, cfg->fc_table);
2653 }
2654 } else {
2655 table = fib6_new_table(net, cfg->fc_table);
2656 }
38308473
DM
2657
2658 if (!table)
c71099ac 2659 goto out;
c71099ac 2660
ad706862
MKL
2661 rt = ip6_dst_alloc(net, NULL,
2662 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 2663
38308473 2664 if (!rt) {
1da177e4
LT
2665 err = -ENOMEM;
2666 goto out;
2667 }
2668
1716a961
G
2669 if (cfg->fc_flags & RTF_EXPIRES)
2670 rt6_set_expires(rt, jiffies +
2671 clock_t_to_jiffies(cfg->fc_expires));
2672 else
2673 rt6_clean_expires(rt);
1da177e4 2674
86872cb5
TG
2675 if (cfg->fc_protocol == RTPROT_UNSPEC)
2676 cfg->fc_protocol = RTPROT_BOOT;
2677 rt->rt6i_protocol = cfg->fc_protocol;
2678
2679 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
2680
2681 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 2682 rt->dst.input = ip6_mc_input;
ab79ad14
2683 else if (cfg->fc_flags & RTF_LOCAL)
2684 rt->dst.input = ip6_input;
1da177e4 2685 else
d8d1f30b 2686 rt->dst.input = ip6_forward;
1da177e4 2687
d8d1f30b 2688 rt->dst.output = ip6_output;
1da177e4 2689
19e42e45
RP
2690 if (cfg->fc_encap) {
2691 struct lwtunnel_state *lwtstate;
2692
30357d7d 2693 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2694 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2695 &lwtstate, extack);
19e42e45
RP
2696 if (err)
2697 goto out;
61adedf3
JB
2698 rt->dst.lwtstate = lwtstate_get(lwtstate);
2699 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
2700 rt->dst.lwtstate->orig_output = rt->dst.output;
2701 rt->dst.output = lwtunnel_output;
25368623 2702 }
61adedf3
JB
2703 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
2704 rt->dst.lwtstate->orig_input = rt->dst.input;
2705 rt->dst.input = lwtunnel_input;
25368623 2706 }
19e42e45
RP
2707 }
2708
86872cb5
TG
2709 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2710 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 2711 if (rt->rt6i_dst.plen == 128)
e5fd387a 2712 rt->dst.flags |= DST_HOST;
e5fd387a 2713
1da177e4 2714#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
2715 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2716 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
2717#endif
2718
86872cb5 2719 rt->rt6i_metric = cfg->fc_metric;
398958ae 2720 rt->rt6i_nh_weight = 1;
1da177e4
LT
2721
2722 /* We cannot add true routes via loopback here,
2723 they would result in kernel looping; promote them to reject routes
2724 */
86872cb5 2725 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
2726 (dev && (dev->flags & IFF_LOOPBACK) &&
2727 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2728 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 2729 /* hold loopback dev/idev if we haven't done so. */
5578689a 2730 if (dev != net->loopback_dev) {
1da177e4
LT
2731 if (dev) {
2732 dev_put(dev);
2733 in6_dev_put(idev);
2734 }
5578689a 2735 dev = net->loopback_dev;
1da177e4
LT
2736 dev_hold(dev);
2737 idev = in6_dev_get(dev);
2738 if (!idev) {
2739 err = -ENODEV;
2740 goto out;
2741 }
2742 }
1da177e4 2743 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
2744 switch (cfg->fc_type) {
2745 case RTN_BLACKHOLE:
2746 rt->dst.error = -EINVAL;
ede2059d 2747 rt->dst.output = dst_discard_out;
7150aede 2748 rt->dst.input = dst_discard;
ef2c7d7b
ND
2749 break;
2750 case RTN_PROHIBIT:
2751 rt->dst.error = -EACCES;
7150aede
K
2752 rt->dst.output = ip6_pkt_prohibit_out;
2753 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 2754 break;
b4949ab2 2755 case RTN_THROW:
0315e382 2756 case RTN_UNREACHABLE:
ef2c7d7b 2757 default:
7150aede 2758 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
2759 : (cfg->fc_type == RTN_UNREACHABLE)
2760 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
2761 rt->dst.output = ip6_pkt_discard_out;
2762 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
2763 break;
2764 }
1da177e4
LT
2765 goto install_route;
2766 }
2767
86872cb5 2768 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 2769 const struct in6_addr *gw_addr;
1da177e4
LT
2770 int gwa_type;
2771
86872cb5 2772 gw_addr = &cfg->fc_gateway;
330567b7 2773 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
2774
2775 /* if gw_addr is local we will fail to detect this in case
2776 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2777 * will return already-added prefix route via interface that
2778 * prefix route was assigned to, which might be non-loopback.
2779 */
2780 err = -EINVAL;
330567b7
FW
2781 if (ipv6_chk_addr_and_flags(net, gw_addr,
2782 gwa_type & IPV6_ADDR_LINKLOCAL ?
d5d531cb
DA
2783 dev : NULL, 0, 0)) {
2784 NL_SET_ERR_MSG(extack, "Invalid gateway address");
48ed7b26 2785 goto out;
d5d531cb 2786 }
4e3fd7a0 2787 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
2788
2789 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1da177e4
LT
2790 /* IPv6 strictly inhibits using not link-local
2791 addresses as nexthop address.
2792 Otherwise, router will not able to send redirects.
2793 It is very good, but in some (rare!) circumstances
2794 (SIT, PtP, NBMA NOARP links) it is handy to allow
2795 some exceptions. --ANK
96d5822c
EN
2796 We allow IPv4-mapped nexthops to support RFC4798-type
2797 addressing
1da177e4 2798 */
96d5822c 2799 if (!(gwa_type & (IPV6_ADDR_UNICAST |
d5d531cb
DA
2800 IPV6_ADDR_MAPPED))) {
2801 NL_SET_ERR_MSG(extack,
2802 "Invalid gateway address");
1da177e4 2803 goto out;
d5d531cb 2804 }
1da177e4 2805
fc1e64e1
DA
2806 if (cfg->fc_flags & RTNH_F_ONLINK) {
2807 err = ip6_route_check_nh_onlink(net, cfg, dev,
2808 extack);
2809 } else {
2810 err = ip6_route_check_nh(net, cfg, &dev, &idev);
2811 }
1da177e4
LT
2812 if (err)
2813 goto out;
2814 }
2815 err = -EINVAL;
d5d531cb
DA
2816 if (!dev) {
2817 NL_SET_ERR_MSG(extack, "Egress device not specified");
2818 goto out;
2819 } else if (dev->flags & IFF_LOOPBACK) {
2820 NL_SET_ERR_MSG(extack,
2821 "Egress device can not be loopback device for this route");
1da177e4 2822 goto out;
d5d531cb 2823 }
1da177e4
LT
2824 }
2825
2826 err = -ENODEV;
38308473 2827 if (!dev)
1da177e4
LT
2828 goto out;
2829
955ec4cb
DA
2830 if (!(dev->flags & IFF_UP)) {
2831 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2832 err = -ENETDOWN;
2833 goto out;
2834 }
2835
c3968a85
DW
2836 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2837 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 2838 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
2839 err = -EINVAL;
2840 goto out;
2841 }
4e3fd7a0 2842 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2843 rt->rt6i_prefsrc.plen = 128;
2844 } else
2845 rt->rt6i_prefsrc.plen = 0;
2846
86872cb5 2847 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2848
2849install_route:
5609b80a
IS
2850 if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
2851 !netif_carrier_ok(dev))
2852 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
fc1e64e1 2853 rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
d8d1f30b 2854 rt->dst.dev = dev;
1da177e4 2855 rt->rt6i_idev = idev;
c71099ac 2856 rt->rt6i_table = table;
63152fc0 2857
c346dca1 2858 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2859
8c5b83f0 2860 return rt;
6b9ea5a6
RP
2861out:
2862 if (dev)
2863 dev_put(dev);
2864 if (idev)
2865 in6_dev_put(idev);
587fea74
WW
2866 if (rt)
2867 dst_release_immediate(&rt->dst);
6b9ea5a6 2868
8c5b83f0 2869 return ERR_PTR(err);
6b9ea5a6
RP
2870}
2871
333c4301
DA
2872int ip6_route_add(struct fib6_config *cfg,
2873 struct netlink_ext_ack *extack)
6b9ea5a6
RP
2874{
2875 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2876 struct rt6_info *rt;
6b9ea5a6
RP
2877 int err;
2878
333c4301 2879 rt = ip6_route_info_create(cfg, extack);
8c5b83f0
RP
2880 if (IS_ERR(rt)) {
2881 err = PTR_ERR(rt);
2882 rt = NULL;
6b9ea5a6 2883 goto out;
8c5b83f0 2884 }
6b9ea5a6 2885
e715b6d3
FW
2886 err = ip6_convert_metrics(&mxc, cfg);
2887 if (err)
2888 goto out;
1da177e4 2889
333c4301 2890 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
e715b6d3
FW
2891
2892 kfree(mxc.mx);
6b9ea5a6 2893
e715b6d3 2894 return err;
1da177e4 2895out:
587fea74
WW
2896 if (rt)
2897 dst_release_immediate(&rt->dst);
6b9ea5a6 2898
1da177e4
LT
2899 return err;
2900}
2901
86872cb5 2902static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2903{
2904 int err;
c71099ac 2905 struct fib6_table *table;
d1918542 2906 struct net *net = dev_net(rt->dst.dev);
1da177e4 2907
a4c2fd7f 2908 if (rt == net->ipv6.ip6_null_entry) {
6825a26c
G
2909 err = -ENOENT;
2910 goto out;
2911 }
6c813a72 2912
c71099ac 2913 table = rt->rt6i_table;
66f5d6ce 2914 spin_lock_bh(&table->tb6_lock);
86872cb5 2915 err = fib6_del(rt, info);
66f5d6ce 2916 spin_unlock_bh(&table->tb6_lock);
1da177e4 2917
6825a26c 2918out:
94e187c0 2919 ip6_rt_put(rt);
1da177e4
LT
2920 return err;
2921}
2922
e0a1ad73
TG
2923int ip6_del_rt(struct rt6_info *rt)
2924{
4d1169c1 2925 struct nl_info info = {
d1918542 2926 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2927 };
528c4ceb 2928 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2929}
2930
0ae81335
DA
2931static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2932{
2933 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 2934 struct net *net = info->nl_net;
16a16cd3 2935 struct sk_buff *skb = NULL;
0ae81335 2936 struct fib6_table *table;
e3330039 2937 int err = -ENOENT;
0ae81335 2938
e3330039
WC
2939 if (rt == net->ipv6.ip6_null_entry)
2940 goto out_put;
0ae81335 2941 table = rt->rt6i_table;
66f5d6ce 2942 spin_lock_bh(&table->tb6_lock);
0ae81335
DA
2943
2944 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2945 struct rt6_info *sibling, *next_sibling;
2946
16a16cd3
DA
2947 /* prefer to send a single notification with all hops */
2948 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2949 if (skb) {
2950 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2951
e3330039 2952 if (rt6_fill_node(net, skb, rt,
16a16cd3
DA
2953 NULL, NULL, 0, RTM_DELROUTE,
2954 info->portid, seq, 0) < 0) {
2955 kfree_skb(skb);
2956 skb = NULL;
2957 } else
2958 info->skip_notify = 1;
2959 }
2960
0ae81335
DA
2961 list_for_each_entry_safe(sibling, next_sibling,
2962 &rt->rt6i_siblings,
2963 rt6i_siblings) {
2964 err = fib6_del(sibling, info);
2965 if (err)
e3330039 2966 goto out_unlock;
0ae81335
DA
2967 }
2968 }
2969
2970 err = fib6_del(rt, info);
e3330039 2971out_unlock:
66f5d6ce 2972 spin_unlock_bh(&table->tb6_lock);
e3330039 2973out_put:
0ae81335 2974 ip6_rt_put(rt);
16a16cd3
DA
2975
2976 if (skb) {
e3330039 2977 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
2978 info->nlh, gfp_any());
2979 }
0ae81335
DA
2980 return err;
2981}
2982
333c4301
DA
2983static int ip6_route_del(struct fib6_config *cfg,
2984 struct netlink_ext_ack *extack)
1da177e4 2985{
2b760fcf 2986 struct rt6_info *rt, *rt_cache;
c71099ac 2987 struct fib6_table *table;
1da177e4 2988 struct fib6_node *fn;
1da177e4
LT
2989 int err = -ESRCH;
2990
5578689a 2991 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
2992 if (!table) {
2993 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 2994 return err;
d5d531cb 2995 }
c71099ac 2996
66f5d6ce 2997 rcu_read_lock();
1da177e4 2998
c71099ac 2999 fn = fib6_locate(&table->tb6_root,
86872cb5 3000 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3001 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3002 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3003
1da177e4 3004 if (fn) {
66f5d6ce 3005 for_each_fib6_node_rt_rcu(fn) {
2b760fcf
WW
3006 if (cfg->fc_flags & RTF_CACHE) {
3007 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3008 &cfg->fc_src);
3009 if (!rt_cache)
3010 continue;
3011 rt = rt_cache;
3012 }
86872cb5 3013 if (cfg->fc_ifindex &&
d1918542
DM
3014 (!rt->dst.dev ||
3015 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 3016 continue;
86872cb5
TG
3017 if (cfg->fc_flags & RTF_GATEWAY &&
3018 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 3019 continue;
86872cb5 3020 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 3021 continue;
c2ed1880
M
3022 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
3023 continue;
d3843fe5
WW
3024 if (!dst_hold_safe(&rt->dst))
3025 break;
66f5d6ce 3026 rcu_read_unlock();
1da177e4 3027
0ae81335
DA
3028 /* if gateway was specified only delete the one hop */
3029 if (cfg->fc_flags & RTF_GATEWAY)
3030 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3031
3032 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3033 }
3034 }
66f5d6ce 3035 rcu_read_unlock();
1da177e4
LT
3036
3037 return err;
3038}
3039
6700c270 3040static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3041{
a6279458 3042 struct netevent_redirect netevent;
e8599ff4 3043 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3044 struct ndisc_options ndopts;
3045 struct inet6_dev *in6_dev;
3046 struct neighbour *neigh;
71bcdba0 3047 struct rd_msg *msg;
6e157b6a
DM
3048 int optlen, on_link;
3049 u8 *lladdr;
e8599ff4 3050
29a3cad5 3051 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3052 optlen -= sizeof(*msg);
e8599ff4
DM
3053
3054 if (optlen < 0) {
6e157b6a 3055 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3056 return;
3057 }
3058
71bcdba0 3059 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3060
71bcdba0 3061 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3062 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3063 return;
3064 }
3065
6e157b6a 3066 on_link = 0;
71bcdba0 3067 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3068 on_link = 1;
71bcdba0 3069 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3070 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3071 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3072 return;
3073 }
3074
3075 in6_dev = __in6_dev_get(skb->dev);
3076 if (!in6_dev)
3077 return;
3078 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3079 return;
3080
3081 /* RFC2461 8.1:
3082 * The IP source address of the Redirect MUST be the same as the current
3083 * first-hop router for the specified ICMP Destination Address.
3084 */
3085
f997c55c 3086 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3087 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3088 return;
3089 }
6e157b6a
DM
3090
3091 lladdr = NULL;
e8599ff4
DM
3092 if (ndopts.nd_opts_tgt_lladdr) {
3093 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3094 skb->dev);
3095 if (!lladdr) {
3096 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3097 return;
3098 }
3099 }
3100
6e157b6a 3101 rt = (struct rt6_info *) dst;
ec13ad1d 3102 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3103 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3104 return;
6e157b6a 3105 }
e8599ff4 3106
6e157b6a
DM
3107 /* Redirect received -> path was valid.
3108 * Look, redirects are sent only in response to data packets,
3109 * so that this nexthop apparently is reachable. --ANK
3110 */
0dec879f 3111 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3112
71bcdba0 3113 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3114 if (!neigh)
3115 return;
a6279458 3116
1da177e4
LT
3117 /*
3118 * We have finally decided to accept it.
3119 */
3120
f997c55c 3121 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3122 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3123 NEIGH_UPDATE_F_OVERRIDE|
3124 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3125 NEIGH_UPDATE_F_ISROUTER)),
3126 NDISC_REDIRECT, &ndopts);
1da177e4 3127
83a09abd 3128 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 3129 if (!nrt)
1da177e4
LT
3130 goto out;
3131
3132 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3133 if (on_link)
3134 nrt->rt6i_flags &= ~RTF_GATEWAY;
3135
b91d5329 3136 nrt->rt6i_protocol = RTPROT_REDIRECT;
4e3fd7a0 3137 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3138
2b760fcf
WW
3139 /* No need to remove rt from the exception table if rt is
3140 * a cached route because rt6_insert_exception() will
3141 * takes care of it
3142 */
3143 if (rt6_insert_exception(nrt, rt)) {
3144 dst_release_immediate(&nrt->dst);
3145 goto out;
3146 }
1da177e4 3147
d8d1f30b
CG
3148 netevent.old = &rt->dst;
3149 netevent.new = &nrt->dst;
71bcdba0 3150 netevent.daddr = &msg->dest;
60592833 3151 netevent.neigh = neigh;
8d71740c
TT
3152 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3153
1da177e4 3154out:
e8599ff4 3155 neigh_release(neigh);
6e157b6a
DM
3156}
3157
1da177e4
LT
3158/*
3159 * Misc support functions
3160 */
3161
4b32b5ad
MKL
3162static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
3163{
3a2232e9 3164 BUG_ON(from->from);
4b32b5ad
MKL
3165
3166 rt->rt6i_flags &= ~RTF_EXPIRES;
3167 dst_hold(&from->dst);
3a2232e9 3168 rt->from = from;
4b32b5ad
MKL
3169 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
3170}
3171
83a09abd
MKL
3172static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
3173{
3174 rt->dst.input = ort->dst.input;
3175 rt->dst.output = ort->dst.output;
3176 rt->rt6i_dst = ort->rt6i_dst;
3177 rt->dst.error = ort->dst.error;
3178 rt->rt6i_idev = ort->rt6i_idev;
3179 if (rt->rt6i_idev)
3180 in6_dev_hold(rt->rt6i_idev);
3181 rt->dst.lastuse = jiffies;
3182 rt->rt6i_gateway = ort->rt6i_gateway;
3183 rt->rt6i_flags = ort->rt6i_flags;
3184 rt6_set_from(rt, ort);
3185 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 3186#ifdef CONFIG_IPV6_SUBTREES
83a09abd 3187 rt->rt6i_src = ort->rt6i_src;
1da177e4 3188#endif
83a09abd
MKL
3189 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
3190 rt->rt6i_table = ort->rt6i_table;
61adedf3 3191 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
3192}
3193
70ceb4f5 3194#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 3195static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 3196 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3197 const struct in6_addr *gwaddr,
3198 struct net_device *dev)
70ceb4f5 3199{
830218c1
DA
3200 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3201 int ifindex = dev->ifindex;
70ceb4f5
YH
3202 struct fib6_node *fn;
3203 struct rt6_info *rt = NULL;
c71099ac
TG
3204 struct fib6_table *table;
3205
830218c1 3206 table = fib6_get_table(net, tb_id);
38308473 3207 if (!table)
c71099ac 3208 return NULL;
70ceb4f5 3209
66f5d6ce 3210 rcu_read_lock();
38fbeeee 3211 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3212 if (!fn)
3213 goto out;
3214
66f5d6ce 3215 for_each_fib6_node_rt_rcu(fn) {
d1918542 3216 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
3217 continue;
3218 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3219 continue;
3220 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
3221 continue;
d3843fe5 3222 ip6_hold_safe(NULL, &rt, false);
70ceb4f5
YH
3223 break;
3224 }
3225out:
66f5d6ce 3226 rcu_read_unlock();
70ceb4f5
YH
3227 return rt;
3228}
3229
efa2cea0 3230static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 3231 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3232 const struct in6_addr *gwaddr,
3233 struct net_device *dev,
95c96174 3234 unsigned int pref)
70ceb4f5 3235{
86872cb5 3236 struct fib6_config cfg = {
238fc7ea 3237 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3238 .fc_ifindex = dev->ifindex,
86872cb5
TG
3239 .fc_dst_len = prefixlen,
3240 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3241 RTF_UP | RTF_PREF(pref),
b91d5329 3242 .fc_protocol = RTPROT_RA,
15e47304 3243 .fc_nlinfo.portid = 0,
efa2cea0
DL
3244 .fc_nlinfo.nlh = NULL,
3245 .fc_nlinfo.nl_net = net,
86872cb5
TG
3246 };
3247
830218c1 3248 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3249 cfg.fc_dst = *prefix;
3250 cfg.fc_gateway = *gwaddr;
70ceb4f5 3251
e317da96
YH
3252 /* We should treat it as a default route if prefix length is 0. */
3253 if (!prefixlen)
86872cb5 3254 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3255
333c4301 3256 ip6_route_add(&cfg, NULL);
70ceb4f5 3257
830218c1 3258 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3259}
3260#endif
3261
b71d1d42 3262struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 3263{
830218c1 3264 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 3265 struct rt6_info *rt;
c71099ac 3266 struct fib6_table *table;
1da177e4 3267
830218c1 3268 table = fib6_get_table(dev_net(dev), tb_id);
38308473 3269 if (!table)
c71099ac 3270 return NULL;
1da177e4 3271
66f5d6ce
WW
3272 rcu_read_lock();
3273 for_each_fib6_node_rt_rcu(&table->tb6_root) {
d1918542 3274 if (dev == rt->dst.dev &&
045927ff 3275 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
3276 ipv6_addr_equal(&rt->rt6i_gateway, addr))
3277 break;
3278 }
3279 if (rt)
d3843fe5 3280 ip6_hold_safe(NULL, &rt, false);
66f5d6ce 3281 rcu_read_unlock();
1da177e4
LT
3282 return rt;
3283}
3284
b71d1d42 3285struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
3286 struct net_device *dev,
3287 unsigned int pref)
1da177e4 3288{
86872cb5 3289 struct fib6_config cfg = {
ca254490 3290 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3291 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3292 .fc_ifindex = dev->ifindex,
3293 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3294 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3295 .fc_protocol = RTPROT_RA,
15e47304 3296 .fc_nlinfo.portid = 0,
5578689a 3297 .fc_nlinfo.nlh = NULL,
c346dca1 3298 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 3299 };
1da177e4 3300
4e3fd7a0 3301 cfg.fc_gateway = *gwaddr;
1da177e4 3302
333c4301 3303 if (!ip6_route_add(&cfg, NULL)) {
830218c1
DA
3304 struct fib6_table *table;
3305
3306 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3307 if (table)
3308 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3309 }
1da177e4 3310
1da177e4
LT
3311 return rt6_get_dflt_router(gwaddr, dev);
3312}
3313
830218c1 3314static void __rt6_purge_dflt_routers(struct fib6_table *table)
1da177e4
LT
3315{
3316 struct rt6_info *rt;
3317
3318restart:
66f5d6ce
WW
3319 rcu_read_lock();
3320 for_each_fib6_node_rt_rcu(&table->tb6_root) {
3e8b0ac3
LC
3321 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3322 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d3843fe5 3323 if (dst_hold_safe(&rt->dst)) {
66f5d6ce 3324 rcu_read_unlock();
d3843fe5
WW
3325 ip6_del_rt(rt);
3326 } else {
66f5d6ce 3327 rcu_read_unlock();
d3843fe5 3328 }
1da177e4
LT
3329 goto restart;
3330 }
3331 }
66f5d6ce 3332 rcu_read_unlock();
830218c1
DA
3333
3334 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3335}
3336
3337void rt6_purge_dflt_routers(struct net *net)
3338{
3339 struct fib6_table *table;
3340 struct hlist_head *head;
3341 unsigned int h;
3342
3343 rcu_read_lock();
3344
3345 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3346 head = &net->ipv6.fib_table_hash[h];
3347 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3348 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3349 __rt6_purge_dflt_routers(table);
3350 }
3351 }
3352
3353 rcu_read_unlock();
1da177e4
LT
3354}
3355
5578689a
DL
3356static void rtmsg_to_fib6_config(struct net *net,
3357 struct in6_rtmsg *rtmsg,
86872cb5
TG
3358 struct fib6_config *cfg)
3359{
3360 memset(cfg, 0, sizeof(*cfg));
3361
ca254490
DA
3362 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3363 : RT6_TABLE_MAIN;
86872cb5
TG
3364 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3365 cfg->fc_metric = rtmsg->rtmsg_metric;
3366 cfg->fc_expires = rtmsg->rtmsg_info;
3367 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3368 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3369 cfg->fc_flags = rtmsg->rtmsg_flags;
3370
5578689a 3371 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3372
4e3fd7a0
AD
3373 cfg->fc_dst = rtmsg->rtmsg_dst;
3374 cfg->fc_src = rtmsg->rtmsg_src;
3375 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3376}
3377
5578689a 3378int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3379{
86872cb5 3380 struct fib6_config cfg;
1da177e4
LT
3381 struct in6_rtmsg rtmsg;
3382 int err;
3383
67ba4152 3384 switch (cmd) {
1da177e4
LT
3385 case SIOCADDRT: /* Add a route */
3386 case SIOCDELRT: /* Delete a route */
af31f412 3387 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3388 return -EPERM;
3389 err = copy_from_user(&rtmsg, arg,
3390 sizeof(struct in6_rtmsg));
3391 if (err)
3392 return -EFAULT;
86872cb5 3393
5578689a 3394 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3395
1da177e4
LT
3396 rtnl_lock();
3397 switch (cmd) {
3398 case SIOCADDRT:
333c4301 3399 err = ip6_route_add(&cfg, NULL);
1da177e4
LT
3400 break;
3401 case SIOCDELRT:
333c4301 3402 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3403 break;
3404 default:
3405 err = -EINVAL;
3406 }
3407 rtnl_unlock();
3408
3409 return err;
3ff50b79 3410 }
1da177e4
LT
3411
3412 return -EINVAL;
3413}
3414
3415/*
3416 * Drop the packet on the floor
3417 */
3418
d5fdd6ba 3419static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3420{
612f09e8 3421 int type;
adf30907 3422 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3423 switch (ipstats_mib_noroutes) {
3424 case IPSTATS_MIB_INNOROUTES:
0660e03f 3425 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3426 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
3427 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3428 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3429 break;
3430 }
3431 /* FALLTHROUGH */
3432 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3433 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3434 ipstats_mib_noroutes);
612f09e8
YH
3435 break;
3436 }
3ffe533c 3437 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3438 kfree_skb(skb);
3439 return 0;
3440}
3441
9ce8ade0
TG
3442static int ip6_pkt_discard(struct sk_buff *skb)
3443{
612f09e8 3444 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3445}
3446
ede2059d 3447static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3448{
adf30907 3449 skb->dev = skb_dst(skb)->dev;
612f09e8 3450 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3451}
3452
9ce8ade0
TG
3453static int ip6_pkt_prohibit(struct sk_buff *skb)
3454{
612f09e8 3455 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3456}
3457
ede2059d 3458static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3459{
adf30907 3460 skb->dev = skb_dst(skb)->dev;
612f09e8 3461 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3462}
3463
1da177e4
LT
3464/*
3465 * Allocate a dst for local (unicast / anycast) address.
3466 */
3467
3468struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
3469 const struct in6_addr *addr,
8f031519 3470 bool anycast)
1da177e4 3471{
ca254490 3472 u32 tb_id;
c346dca1 3473 struct net *net = dev_net(idev->dev);
4832c30d 3474 struct net_device *dev = idev->dev;
5f02ce24
DA
3475 struct rt6_info *rt;
3476
5f02ce24 3477 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 3478 if (!rt)
1da177e4
LT
3479 return ERR_PTR(-ENOMEM);
3480
1da177e4
LT
3481 in6_dev_hold(idev);
3482
11d53b49 3483 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
3484 rt->dst.input = ip6_input;
3485 rt->dst.output = ip6_output;
1da177e4 3486 rt->rt6i_idev = idev;
1da177e4 3487
94b5e0f9 3488 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 3489 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
3490 if (anycast)
3491 rt->rt6i_flags |= RTF_ANYCAST;
3492 else
1da177e4 3493 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 3494
550bab42 3495 rt->rt6i_gateway = *addr;
4e3fd7a0 3496 rt->rt6i_dst.addr = *addr;
1da177e4 3497 rt->rt6i_dst.plen = 128;
ca254490
DA
3498 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3499 rt->rt6i_table = fib6_get_table(net, tb_id);
1da177e4 3500
1da177e4
LT
3501 return rt;
3502}
3503
c3968a85
DW
3504/* remove deleted ip from prefsrc entries */
3505struct arg_dev_net_ip {
3506 struct net_device *dev;
3507 struct net *net;
3508 struct in6_addr *addr;
3509};
3510
3511static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3512{
3513 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3514 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3515 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3516
d1918542 3517 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
3518 rt != net->ipv6.ip6_null_entry &&
3519 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
60006a48 3520 spin_lock_bh(&rt6_exception_lock);
c3968a85
DW
3521 /* remove prefsrc entry */
3522 rt->rt6i_prefsrc.plen = 0;
60006a48
WW
3523 /* need to update cache as well */
3524 rt6_exceptions_remove_prefsrc(rt);
3525 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3526 }
3527 return 0;
3528}
3529
3530void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3531{
3532 struct net *net = dev_net(ifp->idev->dev);
3533 struct arg_dev_net_ip adni = {
3534 .dev = ifp->idev->dev,
3535 .net = net,
3536 .addr = &ifp->addr,
3537 };
0c3584d5 3538 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3539}
3540
be7a010d 3541#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3542
3543/* Remove routers and update dst entries when gateway turn into host. */
3544static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3545{
3546 struct in6_addr *gateway = (struct in6_addr *)arg;
3547
2b760fcf
WW
3548 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3549 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
be7a010d
DJ
3550 return -1;
3551 }
b16cb459
WW
3552
3553 /* Further clean up cached routes in exception table.
3554 * This is needed because cached route may have a different
3555 * gateway than its 'parent' in the case of an ip redirect.
3556 */
3557 rt6_exceptions_clean_tohost(rt, gateway);
3558
be7a010d
DJ
3559 return 0;
3560}
3561
3562void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3563{
3564 fib6_clean_all(net, fib6_clean_tohost, gateway);
3565}
3566
2127d95a
IS
3567struct arg_netdev_event {
3568 const struct net_device *dev;
4c981e28
IS
3569 union {
3570 unsigned int nh_flags;
3571 unsigned long event;
3572 };
2127d95a
IS
3573};
3574
d7dedee1
IS
3575static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
3576{
3577 struct rt6_info *iter;
3578 struct fib6_node *fn;
3579
3580 fn = rcu_dereference_protected(rt->rt6i_node,
3581 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3582 iter = rcu_dereference_protected(fn->leaf,
3583 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3584 while (iter) {
3585 if (iter->rt6i_metric == rt->rt6i_metric &&
3586 rt6_qualify_for_ecmp(iter))
3587 return iter;
3588 iter = rcu_dereference_protected(iter->rt6_next,
3589 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3590 }
3591
3592 return NULL;
3593}
3594
3595static bool rt6_is_dead(const struct rt6_info *rt)
3596{
3597 if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
3598 (rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
3599 rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3600 return true;
3601
3602 return false;
3603}
3604
3605static int rt6_multipath_total_weight(const struct rt6_info *rt)
3606{
3607 struct rt6_info *iter;
3608 int total = 0;
3609
3610 if (!rt6_is_dead(rt))
398958ae 3611 total += rt->rt6i_nh_weight;
d7dedee1
IS
3612
3613 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
3614 if (!rt6_is_dead(iter))
398958ae 3615 total += iter->rt6i_nh_weight;
d7dedee1
IS
3616 }
3617
3618 return total;
3619}
3620
3621static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
3622{
3623 int upper_bound = -1;
3624
3625 if (!rt6_is_dead(rt)) {
398958ae 3626 *weight += rt->rt6i_nh_weight;
d7dedee1
IS
3627 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3628 total) - 1;
3629 }
3630 atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
3631}
3632
3633static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
3634{
3635 struct rt6_info *iter;
3636 int weight = 0;
3637
3638 rt6_upper_bound_set(rt, &weight, total);
3639
3640 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3641 rt6_upper_bound_set(iter, &weight, total);
3642}
3643
3644void rt6_multipath_rebalance(struct rt6_info *rt)
3645{
3646 struct rt6_info *first;
3647 int total;
3648
3649 /* In case the entire multipath route was marked for flushing,
3650 * then there is no need to rebalance upon the removal of every
3651 * sibling route.
3652 */
3653 if (!rt->rt6i_nsiblings || rt->should_flush)
3654 return;
3655
3656 /* During lookup routes are evaluated in order, so we need to
3657 * make sure upper bounds are assigned from the first sibling
3658 * onwards.
3659 */
3660 first = rt6_multipath_first_sibling(rt);
3661 if (WARN_ON_ONCE(!first))
3662 return;
3663
3664 total = rt6_multipath_total_weight(first);
3665 rt6_multipath_upper_bound_set(first, total);
3666}
3667
2127d95a
IS
3668static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3669{
3670 const struct arg_netdev_event *arg = p_arg;
3671 const struct net *net = dev_net(arg->dev);
3672
1de178ed 3673 if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
2127d95a 3674 rt->rt6i_nh_flags &= ~arg->nh_flags;
1de178ed 3675 fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
d7dedee1 3676 rt6_multipath_rebalance(rt);
1de178ed 3677 }
2127d95a
IS
3678
3679 return 0;
3680}
3681
3682void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3683{
3684 struct arg_netdev_event arg = {
3685 .dev = dev,
6802f3ad
IS
3686 {
3687 .nh_flags = nh_flags,
3688 },
2127d95a
IS
3689 };
3690
3691 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3692 arg.nh_flags |= RTNH_F_LINKDOWN;
3693
3694 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3695}
3696
1de178ed
IS
3697static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
3698 const struct net_device *dev)
3699{
3700 struct rt6_info *iter;
3701
3702 if (rt->dst.dev == dev)
3703 return true;
3704 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3705 if (iter->dst.dev == dev)
3706 return true;
3707
3708 return false;
3709}
3710
3711static void rt6_multipath_flush(struct rt6_info *rt)
3712{
3713 struct rt6_info *iter;
3714
3715 rt->should_flush = 1;
3716 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3717 iter->should_flush = 1;
3718}
3719
3720static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
3721 const struct net_device *down_dev)
3722{
3723 struct rt6_info *iter;
3724 unsigned int dead = 0;
3725
3726 if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD)
3727 dead++;
3728 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3729 if (iter->dst.dev == down_dev ||
3730 iter->rt6i_nh_flags & RTNH_F_DEAD)
3731 dead++;
3732
3733 return dead;
3734}
3735
3736static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
3737 const struct net_device *dev,
3738 unsigned int nh_flags)
3739{
3740 struct rt6_info *iter;
3741
3742 if (rt->dst.dev == dev)
3743 rt->rt6i_nh_flags |= nh_flags;
3744 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3745 if (iter->dst.dev == dev)
3746 iter->rt6i_nh_flags |= nh_flags;
3747}
3748
a1a22c12 3749/* called with write lock held for table with rt */
4c981e28 3750static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
1da177e4 3751{
4c981e28
IS
3752 const struct arg_netdev_event *arg = p_arg;
3753 const struct net_device *dev = arg->dev;
3754 const struct net *net = dev_net(dev);
8ed67789 3755
1de178ed 3756 if (rt == net->ipv6.ip6_null_entry)
27c6fa73
IS
3757 return 0;
3758
3759 switch (arg->event) {
3760 case NETDEV_UNREGISTER:
1de178ed 3761 return rt->dst.dev == dev ? -1 : 0;
27c6fa73 3762 case NETDEV_DOWN:
1de178ed 3763 if (rt->should_flush)
27c6fa73 3764 return -1;
1de178ed
IS
3765 if (!rt->rt6i_nsiblings)
3766 return rt->dst.dev == dev ? -1 : 0;
3767 if (rt6_multipath_uses_dev(rt, dev)) {
3768 unsigned int count;
3769
3770 count = rt6_multipath_dead_count(rt, dev);
3771 if (rt->rt6i_nsiblings + 1 == count) {
3772 rt6_multipath_flush(rt);
3773 return -1;
3774 }
3775 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3776 RTNH_F_LINKDOWN);
3777 fib6_update_sernum(rt);
d7dedee1 3778 rt6_multipath_rebalance(rt);
1de178ed
IS
3779 }
3780 return -2;
27c6fa73 3781 case NETDEV_CHANGE:
1de178ed
IS
3782 if (rt->dst.dev != dev ||
3783 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73
IS
3784 break;
3785 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 3786 rt6_multipath_rebalance(rt);
27c6fa73 3787 break;
2b241361 3788 }
c159d30c 3789
1da177e4
LT
3790 return 0;
3791}
3792
27c6fa73 3793void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 3794{
4c981e28 3795 struct arg_netdev_event arg = {
8ed67789 3796 .dev = dev,
6802f3ad
IS
3797 {
3798 .event = event,
3799 },
8ed67789
DL
3800 };
3801
4c981e28
IS
3802 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
3803}
3804
3805void rt6_disable_ip(struct net_device *dev, unsigned long event)
3806{
3807 rt6_sync_down_dev(dev, event);
3808 rt6_uncached_list_flush_dev(dev_net(dev), dev);
3809 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
3810}
3811
95c96174 3812struct rt6_mtu_change_arg {
1da177e4 3813 struct net_device *dev;
95c96174 3814 unsigned int mtu;
1da177e4
LT
3815};
3816
3817static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3818{
3819 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3820 struct inet6_dev *idev;
3821
3822 /* In IPv6 pmtu discovery is not optional,
3823 so that RTAX_MTU lock cannot disable it.
3824 We still use this lock to block changes
3825 caused by addrconf/ndisc.
3826 */
3827
3828 idev = __in6_dev_get(arg->dev);
38308473 3829 if (!idev)
1da177e4
LT
3830 return 0;
3831
3832 /* For administrative MTU increase, there is no way to discover
3833 IPv6 PMTU increase, so PMTU increase should be updated here.
3834 Since RFC 1981 doesn't include administrative MTU increase
3835 update PMTU increase is a MUST. (i.e. jumbo frame)
3836 */
d1918542 3837 if (rt->dst.dev == arg->dev &&
4b32b5ad 3838 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
f5bbe7ee 3839 spin_lock_bh(&rt6_exception_lock);
e9fa1495
SB
3840 if (dst_metric_raw(&rt->dst, RTAX_MTU) &&
3841 rt6_mtu_change_route_allowed(idev, rt, arg->mtu))
4b32b5ad 3842 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
e9fa1495 3843 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 3844 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 3845 }
1da177e4
LT
3846 return 0;
3847}
3848
95c96174 3849void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 3850{
c71099ac
TG
3851 struct rt6_mtu_change_arg arg = {
3852 .dev = dev,
3853 .mtu = mtu,
3854 };
1da177e4 3855
0c3584d5 3856 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
3857}
3858
ef7c79ed 3859static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 3860 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 3861 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 3862 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
3863 [RTA_PRIORITY] = { .type = NLA_U32 },
3864 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 3865 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 3866 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
3867 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
3868 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 3869 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 3870 [RTA_UID] = { .type = NLA_U32 },
3b45a410 3871 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
3872};
3873
3874static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
3875 struct fib6_config *cfg,
3876 struct netlink_ext_ack *extack)
1da177e4 3877{
86872cb5
TG
3878 struct rtmsg *rtm;
3879 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 3880 unsigned int pref;
86872cb5 3881 int err;
1da177e4 3882
fceb6435
JB
3883 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3884 NULL);
86872cb5
TG
3885 if (err < 0)
3886 goto errout;
1da177e4 3887
86872cb5
TG
3888 err = -EINVAL;
3889 rtm = nlmsg_data(nlh);
3890 memset(cfg, 0, sizeof(*cfg));
3891
3892 cfg->fc_table = rtm->rtm_table;
3893 cfg->fc_dst_len = rtm->rtm_dst_len;
3894 cfg->fc_src_len = rtm->rtm_src_len;
3895 cfg->fc_flags = RTF_UP;
3896 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 3897 cfg->fc_type = rtm->rtm_type;
86872cb5 3898
ef2c7d7b
ND
3899 if (rtm->rtm_type == RTN_UNREACHABLE ||
3900 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
3901 rtm->rtm_type == RTN_PROHIBIT ||
3902 rtm->rtm_type == RTN_THROW)
86872cb5
TG
3903 cfg->fc_flags |= RTF_REJECT;
3904
ab79ad14
3905 if (rtm->rtm_type == RTN_LOCAL)
3906 cfg->fc_flags |= RTF_LOCAL;
3907
1f56a01f
MKL
3908 if (rtm->rtm_flags & RTM_F_CLONED)
3909 cfg->fc_flags |= RTF_CACHE;
3910
fc1e64e1
DA
3911 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
3912
15e47304 3913 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 3914 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 3915 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
3916
3917 if (tb[RTA_GATEWAY]) {
67b61f6c 3918 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 3919 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 3920 }
86872cb5
TG
3921
3922 if (tb[RTA_DST]) {
3923 int plen = (rtm->rtm_dst_len + 7) >> 3;
3924
3925 if (nla_len(tb[RTA_DST]) < plen)
3926 goto errout;
3927
3928 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 3929 }
86872cb5
TG
3930
3931 if (tb[RTA_SRC]) {
3932 int plen = (rtm->rtm_src_len + 7) >> 3;
3933
3934 if (nla_len(tb[RTA_SRC]) < plen)
3935 goto errout;
3936
3937 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 3938 }
86872cb5 3939
c3968a85 3940 if (tb[RTA_PREFSRC])
67b61f6c 3941 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 3942
86872cb5
TG
3943 if (tb[RTA_OIF])
3944 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3945
3946 if (tb[RTA_PRIORITY])
3947 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3948
3949 if (tb[RTA_METRICS]) {
3950 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3951 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 3952 }
86872cb5
TG
3953
3954 if (tb[RTA_TABLE])
3955 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3956
51ebd318
ND
3957 if (tb[RTA_MULTIPATH]) {
3958 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3959 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
3960
3961 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 3962 cfg->fc_mp_len, extack);
9ed59592
DA
3963 if (err < 0)
3964 goto errout;
51ebd318
ND
3965 }
3966
c78ba6d6
LR
3967 if (tb[RTA_PREF]) {
3968 pref = nla_get_u8(tb[RTA_PREF]);
3969 if (pref != ICMPV6_ROUTER_PREF_LOW &&
3970 pref != ICMPV6_ROUTER_PREF_HIGH)
3971 pref = ICMPV6_ROUTER_PREF_MEDIUM;
3972 cfg->fc_flags |= RTF_PREF(pref);
3973 }
3974
19e42e45
RP
3975 if (tb[RTA_ENCAP])
3976 cfg->fc_encap = tb[RTA_ENCAP];
3977
9ed59592 3978 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
3979 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3980
c255bd68 3981 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
3982 if (err < 0)
3983 goto errout;
3984 }
3985
32bc201e
XL
3986 if (tb[RTA_EXPIRES]) {
3987 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3988
3989 if (addrconf_finite_timeout(timeout)) {
3990 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3991 cfg->fc_flags |= RTF_EXPIRES;
3992 }
3993 }
3994
86872cb5
TG
3995 err = 0;
3996errout:
3997 return err;
1da177e4
LT
3998}
3999
6b9ea5a6
RP
4000struct rt6_nh {
4001 struct rt6_info *rt6_info;
4002 struct fib6_config r_cfg;
4003 struct mx6_config mxc;
4004 struct list_head next;
4005};
4006
4007static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4008{
4009 struct rt6_nh *nh;
4010
4011 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4012 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4013 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4014 nh->r_cfg.fc_ifindex);
4015 }
4016}
4017
4018static int ip6_route_info_append(struct list_head *rt6_nh_list,
4019 struct rt6_info *rt, struct fib6_config *r_cfg)
4020{
4021 struct rt6_nh *nh;
6b9ea5a6
RP
4022 int err = -EEXIST;
4023
4024 list_for_each_entry(nh, rt6_nh_list, next) {
4025 /* check if rt6_info already exists */
f06b7549 4026 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
6b9ea5a6
RP
4027 return err;
4028 }
4029
4030 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4031 if (!nh)
4032 return -ENOMEM;
4033 nh->rt6_info = rt;
4034 err = ip6_convert_metrics(&nh->mxc, r_cfg);
4035 if (err) {
4036 kfree(nh);
4037 return err;
4038 }
4039 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4040 list_add_tail(&nh->next, rt6_nh_list);
4041
4042 return 0;
4043}
4044
3b1137fe
DA
4045static void ip6_route_mpath_notify(struct rt6_info *rt,
4046 struct rt6_info *rt_last,
4047 struct nl_info *info,
4048 __u16 nlflags)
4049{
4050 /* if this is an APPEND route, then rt points to the first route
4051 * inserted and rt_last points to last route inserted. Userspace
4052 * wants a consistent dump of the route which starts at the first
4053 * nexthop. Since sibling routes are always added at the end of
4054 * the list, find the first sibling of the last route appended
4055 */
4056 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
4057 rt = list_first_entry(&rt_last->rt6i_siblings,
4058 struct rt6_info,
4059 rt6i_siblings);
4060 }
4061
4062 if (rt)
4063 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4064}
4065
333c4301
DA
4066static int ip6_route_multipath_add(struct fib6_config *cfg,
4067 struct netlink_ext_ack *extack)
51ebd318 4068{
3b1137fe
DA
4069 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
4070 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4071 struct fib6_config r_cfg;
4072 struct rtnexthop *rtnh;
6b9ea5a6
RP
4073 struct rt6_info *rt;
4074 struct rt6_nh *err_nh;
4075 struct rt6_nh *nh, *nh_safe;
3b1137fe 4076 __u16 nlflags;
51ebd318
ND
4077 int remaining;
4078 int attrlen;
6b9ea5a6
RP
4079 int err = 1;
4080 int nhn = 0;
4081 int replace = (cfg->fc_nlinfo.nlh &&
4082 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4083 LIST_HEAD(rt6_nh_list);
51ebd318 4084
3b1137fe
DA
4085 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4086 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4087 nlflags |= NLM_F_APPEND;
4088
35f1b4e9 4089 remaining = cfg->fc_mp_len;
51ebd318 4090 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4091
6b9ea5a6
RP
4092 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4093 * rt6_info structs per nexthop
4094 */
51ebd318
ND
4095 while (rtnh_ok(rtnh, remaining)) {
4096 memcpy(&r_cfg, cfg, sizeof(*cfg));
4097 if (rtnh->rtnh_ifindex)
4098 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4099
4100 attrlen = rtnh_attrlen(rtnh);
4101 if (attrlen > 0) {
4102 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4103
4104 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4105 if (nla) {
67b61f6c 4106 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4107 r_cfg.fc_flags |= RTF_GATEWAY;
4108 }
19e42e45
RP
4109 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4110 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4111 if (nla)
4112 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4113 }
6b9ea5a6 4114
68e2ffde 4115 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
333c4301 4116 rt = ip6_route_info_create(&r_cfg, extack);
8c5b83f0
RP
4117 if (IS_ERR(rt)) {
4118 err = PTR_ERR(rt);
4119 rt = NULL;
6b9ea5a6 4120 goto cleanup;
8c5b83f0 4121 }
6b9ea5a6 4122
398958ae
IS
4123 rt->rt6i_nh_weight = rtnh->rtnh_hops + 1;
4124
6b9ea5a6 4125 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 4126 if (err) {
587fea74 4127 dst_release_immediate(&rt->dst);
6b9ea5a6
RP
4128 goto cleanup;
4129 }
4130
4131 rtnh = rtnh_next(rtnh, &remaining);
4132 }
4133
3b1137fe
DA
4134 /* for add and replace send one notification with all nexthops.
4135 * Skip the notification in fib6_add_rt2node and send one with
4136 * the full route when done
4137 */
4138 info->skip_notify = 1;
4139
6b9ea5a6
RP
4140 err_nh = NULL;
4141 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe 4142 rt_last = nh->rt6_info;
333c4301 4143 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3b1137fe
DA
4144 /* save reference to first route for notification */
4145 if (!rt_notif && !err)
4146 rt_notif = nh->rt6_info;
4147
6b9ea5a6
RP
4148 /* nh->rt6_info is used or freed at this point, reset to NULL*/
4149 nh->rt6_info = NULL;
4150 if (err) {
4151 if (replace && nhn)
4152 ip6_print_replace_route_err(&rt6_nh_list);
4153 err_nh = nh;
4154 goto add_errout;
51ebd318 4155 }
6b9ea5a6 4156
1a72418b 4157 /* Because each route is added like a single route we remove
27596472
MK
4158 * these flags after the first nexthop: if there is a collision,
4159 * we have already failed to add the first nexthop:
4160 * fib6_add_rt2node() has rejected it; when replacing, old
4161 * nexthops have been replaced by first new, the rest should
4162 * be added to it.
1a72418b 4163 */
27596472
MK
4164 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4165 NLM_F_REPLACE);
6b9ea5a6
RP
4166 nhn++;
4167 }
4168
3b1137fe
DA
4169 /* success ... tell user about new route */
4170 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4171 goto cleanup;
4172
4173add_errout:
3b1137fe
DA
4174 /* send notification for routes that were added so that
4175 * the delete notifications sent by ip6_route_del are
4176 * coherent
4177 */
4178 if (rt_notif)
4179 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4180
6b9ea5a6
RP
4181 /* Delete routes that were already added */
4182 list_for_each_entry(nh, &rt6_nh_list, next) {
4183 if (err_nh == nh)
4184 break;
333c4301 4185 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4186 }
4187
4188cleanup:
4189 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
587fea74
WW
4190 if (nh->rt6_info)
4191 dst_release_immediate(&nh->rt6_info->dst);
52fe51f8 4192 kfree(nh->mxc.mx);
6b9ea5a6
RP
4193 list_del(&nh->next);
4194 kfree(nh);
4195 }
4196
4197 return err;
4198}
4199
333c4301
DA
4200static int ip6_route_multipath_del(struct fib6_config *cfg,
4201 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4202{
4203 struct fib6_config r_cfg;
4204 struct rtnexthop *rtnh;
4205 int remaining;
4206 int attrlen;
4207 int err = 1, last_err = 0;
4208
4209 remaining = cfg->fc_mp_len;
4210 rtnh = (struct rtnexthop *)cfg->fc_mp;
4211
4212 /* Parse a Multipath Entry */
4213 while (rtnh_ok(rtnh, remaining)) {
4214 memcpy(&r_cfg, cfg, sizeof(*cfg));
4215 if (rtnh->rtnh_ifindex)
4216 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4217
4218 attrlen = rtnh_attrlen(rtnh);
4219 if (attrlen > 0) {
4220 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4221
4222 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4223 if (nla) {
4224 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4225 r_cfg.fc_flags |= RTF_GATEWAY;
4226 }
4227 }
333c4301 4228 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4229 if (err)
4230 last_err = err;
4231
51ebd318
ND
4232 rtnh = rtnh_next(rtnh, &remaining);
4233 }
4234
4235 return last_err;
4236}
4237
c21ef3e3
DA
4238static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4239 struct netlink_ext_ack *extack)
1da177e4 4240{
86872cb5
TG
4241 struct fib6_config cfg;
4242 int err;
1da177e4 4243
333c4301 4244 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4245 if (err < 0)
4246 return err;
4247
51ebd318 4248 if (cfg.fc_mp)
333c4301 4249 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4250 else {
4251 cfg.fc_delete_all_nh = 1;
333c4301 4252 return ip6_route_del(&cfg, extack);
0ae81335 4253 }
1da177e4
LT
4254}
4255
c21ef3e3
DA
4256static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4257 struct netlink_ext_ack *extack)
1da177e4 4258{
86872cb5
TG
4259 struct fib6_config cfg;
4260 int err;
1da177e4 4261
333c4301 4262 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4263 if (err < 0)
4264 return err;
4265
51ebd318 4266 if (cfg.fc_mp)
333c4301 4267 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4268 else
333c4301 4269 return ip6_route_add(&cfg, extack);
1da177e4
LT
4270}
4271
beb1afac 4272static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 4273{
beb1afac
DA
4274 int nexthop_len = 0;
4275
4276 if (rt->rt6i_nsiblings) {
4277 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4278 + NLA_ALIGN(sizeof(struct rtnexthop))
4279 + nla_total_size(16) /* RTA_GATEWAY */
beb1afac
DA
4280 + lwtunnel_get_encap_size(rt->dst.lwtstate);
4281
4282 nexthop_len *= rt->rt6i_nsiblings;
4283 }
4284
339bf98f
TG
4285 return NLMSG_ALIGN(sizeof(struct rtmsg))
4286 + nla_total_size(16) /* RTA_SRC */
4287 + nla_total_size(16) /* RTA_DST */
4288 + nla_total_size(16) /* RTA_GATEWAY */
4289 + nla_total_size(16) /* RTA_PREFSRC */
4290 + nla_total_size(4) /* RTA_TABLE */
4291 + nla_total_size(4) /* RTA_IIF */
4292 + nla_total_size(4) /* RTA_OIF */
4293 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4294 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4295 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4296 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4297 + nla_total_size(1) /* RTA_PREF */
beb1afac
DA
4298 + lwtunnel_get_encap_size(rt->dst.lwtstate)
4299 + nexthop_len;
4300}
4301
4302static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 4303 unsigned int *flags, bool skip_oif)
beb1afac 4304{
f9d882ea
IS
4305 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
4306 *flags |= RTNH_F_DEAD;
4307
44c9f2f2 4308 if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) {
beb1afac
DA
4309 *flags |= RTNH_F_LINKDOWN;
4310 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
4311 *flags |= RTNH_F_DEAD;
4312 }
4313
4314 if (rt->rt6i_flags & RTF_GATEWAY) {
4315 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
4316 goto nla_put_failure;
4317 }
4318
fc1e64e1 4319 *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK);
fe400799 4320 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4321 *flags |= RTNH_F_OFFLOAD;
4322
5be083ce
DA
4323 /* not needed for multipath encoding b/c it has a rtnexthop struct */
4324 if (!skip_oif && rt->dst.dev &&
beb1afac
DA
4325 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
4326 goto nla_put_failure;
4327
4328 if (rt->dst.lwtstate &&
4329 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
4330 goto nla_put_failure;
4331
4332 return 0;
4333
4334nla_put_failure:
4335 return -EMSGSIZE;
4336}
4337
5be083ce 4338/* add multipath next hop */
beb1afac
DA
4339static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4340{
4341 struct rtnexthop *rtnh;
4342 unsigned int flags = 0;
4343
4344 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4345 if (!rtnh)
4346 goto nla_put_failure;
4347
398958ae 4348 rtnh->rtnh_hops = rt->rt6i_nh_weight - 1;
beb1afac
DA
4349 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
4350
5be083ce 4351 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4352 goto nla_put_failure;
4353
4354 rtnh->rtnh_flags = flags;
4355
4356 /* length of rtnetlink header + attributes */
4357 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4358
4359 return 0;
4360
4361nla_put_failure:
4362 return -EMSGSIZE;
339bf98f
TG
4363}
4364
191cd582
BH
4365static int rt6_fill_node(struct net *net,
4366 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 4367 struct in6_addr *dst, struct in6_addr *src,
15e47304 4368 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4369 unsigned int flags)
1da177e4 4370{
4b32b5ad 4371 u32 metrics[RTAX_MAX];
1da177e4 4372 struct rtmsg *rtm;
2d7202bf 4373 struct nlmsghdr *nlh;
e3703b3d 4374 long expires;
9e762a4a 4375 u32 table;
1da177e4 4376
15e47304 4377 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4378 if (!nlh)
26932566 4379 return -EMSGSIZE;
2d7202bf
TG
4380
4381 rtm = nlmsg_data(nlh);
1da177e4
LT
4382 rtm->rtm_family = AF_INET6;
4383 rtm->rtm_dst_len = rt->rt6i_dst.plen;
4384 rtm->rtm_src_len = rt->rt6i_src.plen;
4385 rtm->rtm_tos = 0;
c71099ac 4386 if (rt->rt6i_table)
9e762a4a 4387 table = rt->rt6i_table->tb6_id;
c71099ac 4388 else
9e762a4a
PM
4389 table = RT6_TABLE_UNSPEC;
4390 rtm->rtm_table = table;
c78679e8
DM
4391 if (nla_put_u32(skb, RTA_TABLE, table))
4392 goto nla_put_failure;
ef2c7d7b
ND
4393 if (rt->rt6i_flags & RTF_REJECT) {
4394 switch (rt->dst.error) {
4395 case -EINVAL:
4396 rtm->rtm_type = RTN_BLACKHOLE;
4397 break;
4398 case -EACCES:
4399 rtm->rtm_type = RTN_PROHIBIT;
4400 break;
b4949ab2
ND
4401 case -EAGAIN:
4402 rtm->rtm_type = RTN_THROW;
4403 break;
ef2c7d7b
ND
4404 default:
4405 rtm->rtm_type = RTN_UNREACHABLE;
4406 break;
4407 }
4408 }
38308473 4409 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 4410 rtm->rtm_type = RTN_LOCAL;
4ee39733
DA
4411 else if (rt->rt6i_flags & RTF_ANYCAST)
4412 rtm->rtm_type = RTN_ANYCAST;
d1918542 4413 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
4414 rtm->rtm_type = RTN_LOCAL;
4415 else
4416 rtm->rtm_type = RTN_UNICAST;
4417 rtm->rtm_flags = 0;
4418 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4419 rtm->rtm_protocol = rt->rt6i_protocol;
1da177e4 4420
38308473 4421 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
4422 rtm->rtm_flags |= RTM_F_CLONED;
4423
4424 if (dst) {
930345ea 4425 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 4426 goto nla_put_failure;
1ab1457c 4427 rtm->rtm_dst_len = 128;
1da177e4 4428 } else if (rtm->rtm_dst_len)
930345ea 4429 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 4430 goto nla_put_failure;
1da177e4
LT
4431#ifdef CONFIG_IPV6_SUBTREES
4432 if (src) {
930345ea 4433 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4434 goto nla_put_failure;
1ab1457c 4435 rtm->rtm_src_len = 128;
c78679e8 4436 } else if (rtm->rtm_src_len &&
930345ea 4437 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 4438 goto nla_put_failure;
1da177e4 4439#endif
7bc570c8
YH
4440 if (iif) {
4441#ifdef CONFIG_IPV6_MROUTE
4442 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
4443 int err = ip6mr_get_route(net, skb, rtm, portid);
4444
4445 if (err == 0)
4446 return 0;
4447 if (err < 0)
4448 goto nla_put_failure;
7bc570c8
YH
4449 } else
4450#endif
c78679e8
DM
4451 if (nla_put_u32(skb, RTA_IIF, iif))
4452 goto nla_put_failure;
7bc570c8 4453 } else if (dst) {
1da177e4 4454 struct in6_addr saddr_buf;
c78679e8 4455 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 4456 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4457 goto nla_put_failure;
1da177e4 4458 }
2d7202bf 4459
c3968a85
DW
4460 if (rt->rt6i_prefsrc.plen) {
4461 struct in6_addr saddr_buf;
4e3fd7a0 4462 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 4463 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4464 goto nla_put_failure;
c3968a85
DW
4465 }
4466
4b32b5ad
MKL
4467 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
4468 if (rt->rt6i_pmtu)
4469 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
4470 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
4471 goto nla_put_failure;
4472
c78679e8
DM
4473 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4474 goto nla_put_failure;
8253947e 4475
beb1afac
DA
4476 /* For multipath routes, walk the siblings list and add
4477 * each as a nexthop within RTA_MULTIPATH.
4478 */
4479 if (rt->rt6i_nsiblings) {
4480 struct rt6_info *sibling, *next_sibling;
4481 struct nlattr *mp;
4482
4483 mp = nla_nest_start(skb, RTA_MULTIPATH);
4484 if (!mp)
4485 goto nla_put_failure;
4486
4487 if (rt6_add_nexthop(skb, rt) < 0)
4488 goto nla_put_failure;
4489
4490 list_for_each_entry_safe(sibling, next_sibling,
4491 &rt->rt6i_siblings, rt6i_siblings) {
4492 if (rt6_add_nexthop(skb, sibling) < 0)
4493 goto nla_put_failure;
4494 }
4495
4496 nla_nest_end(skb, mp);
4497 } else {
5be083ce 4498 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4499 goto nla_put_failure;
4500 }
4501
8253947e 4502 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 4503
87a50699 4504 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 4505 goto nla_put_failure;
2d7202bf 4506
c78ba6d6
LR
4507 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4508 goto nla_put_failure;
4509
19e42e45 4510
053c095a
JB
4511 nlmsg_end(skb, nlh);
4512 return 0;
2d7202bf
TG
4513
4514nla_put_failure:
26932566
PM
4515 nlmsg_cancel(skb, nlh);
4516 return -EMSGSIZE;
1da177e4
LT
4517}
4518
1b43af54 4519int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
4520{
4521 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4522 struct net *net = arg->net;
4523
4524 if (rt == net->ipv6.ip6_null_entry)
4525 return 0;
1da177e4 4526
2d7202bf
TG
4527 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4528 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4529
4530 /* user wants prefix routes only */
4531 if (rtm->rtm_flags & RTM_F_PREFIX &&
4532 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4533 /* success since this is not a prefix route */
4534 return 1;
4535 }
4536 }
1da177e4 4537
1f17e2f2 4538 return rt6_fill_node(net,
191cd582 4539 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 4540 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
f8cfe2ce 4541 NLM_F_MULTI);
1da177e4
LT
4542}
4543
c21ef3e3
DA
4544static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4545 struct netlink_ext_ack *extack)
1da177e4 4546{
3b1e0a65 4547 struct net *net = sock_net(in_skb->sk);
ab364a6f 4548 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
4549 int err, iif = 0, oif = 0;
4550 struct dst_entry *dst;
ab364a6f 4551 struct rt6_info *rt;
1da177e4 4552 struct sk_buff *skb;
ab364a6f 4553 struct rtmsg *rtm;
4c9483b2 4554 struct flowi6 fl6;
18c3a61c 4555 bool fibmatch;
1da177e4 4556
fceb6435 4557 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4558 extack);
ab364a6f
TG
4559 if (err < 0)
4560 goto errout;
1da177e4 4561
ab364a6f 4562 err = -EINVAL;
4c9483b2 4563 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4564 rtm = nlmsg_data(nlh);
4565 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4566 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4567
ab364a6f
TG
4568 if (tb[RTA_SRC]) {
4569 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4570 goto errout;
4571
4e3fd7a0 4572 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4573 }
4574
4575 if (tb[RTA_DST]) {
4576 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4577 goto errout;
4578
4e3fd7a0 4579 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4580 }
4581
4582 if (tb[RTA_IIF])
4583 iif = nla_get_u32(tb[RTA_IIF]);
4584
4585 if (tb[RTA_OIF])
72331bc0 4586 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4587
2e47b291
LC
4588 if (tb[RTA_MARK])
4589 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4590
622ec2c9
LC
4591 if (tb[RTA_UID])
4592 fl6.flowi6_uid = make_kuid(current_user_ns(),
4593 nla_get_u32(tb[RTA_UID]));
4594 else
4595 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4596
1da177e4
LT
4597 if (iif) {
4598 struct net_device *dev;
72331bc0
SL
4599 int flags = 0;
4600
121622db
FW
4601 rcu_read_lock();
4602
4603 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4604 if (!dev) {
121622db 4605 rcu_read_unlock();
1da177e4 4606 err = -ENODEV;
ab364a6f 4607 goto errout;
1da177e4 4608 }
72331bc0
SL
4609
4610 fl6.flowi6_iif = iif;
4611
4612 if (!ipv6_addr_any(&fl6.saddr))
4613 flags |= RT6_LOOKUP_F_HAS_SADDR;
4614
58acfd71 4615 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
121622db
FW
4616
4617 rcu_read_unlock();
72331bc0
SL
4618 } else {
4619 fl6.flowi6_oif = oif;
4620
58acfd71 4621 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4622 }
4623
18c3a61c
RP
4624
4625 rt = container_of(dst, struct rt6_info, dst);
4626 if (rt->dst.error) {
4627 err = rt->dst.error;
4628 ip6_rt_put(rt);
4629 goto errout;
1da177e4
LT
4630 }
4631
9d6acb3b
WC
4632 if (rt == net->ipv6.ip6_null_entry) {
4633 err = rt->dst.error;
4634 ip6_rt_put(rt);
4635 goto errout;
4636 }
4637
fba961ab
DM
4638 if (fibmatch && rt->from) {
4639 struct rt6_info *ort = rt->from;
58acfd71
IS
4640
4641 dst_hold(&ort->dst);
4642 ip6_rt_put(rt);
4643 rt = ort;
4644 }
4645
ab364a6f 4646 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4647 if (!skb) {
94e187c0 4648 ip6_rt_put(rt);
ab364a6f
TG
4649 err = -ENOBUFS;
4650 goto errout;
4651 }
1da177e4 4652
d8d1f30b 4653 skb_dst_set(skb, &rt->dst);
18c3a61c
RP
4654 if (fibmatch)
4655 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
4656 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4657 nlh->nlmsg_seq, 0);
4658 else
4659 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
4660 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4661 nlh->nlmsg_seq, 0);
1da177e4 4662 if (err < 0) {
ab364a6f
TG
4663 kfree_skb(skb);
4664 goto errout;
1da177e4
LT
4665 }
4666
15e47304 4667 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4668errout:
1da177e4 4669 return err;
1da177e4
LT
4670}
4671
37a1d361
RP
4672void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4673 unsigned int nlm_flags)
1da177e4
LT
4674{
4675 struct sk_buff *skb;
5578689a 4676 struct net *net = info->nl_net;
528c4ceb
DL
4677 u32 seq;
4678 int err;
4679
4680 err = -ENOBUFS;
38308473 4681 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4682
19e42e45 4683 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4684 if (!skb)
21713ebc
TG
4685 goto errout;
4686
191cd582 4687 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
f8cfe2ce 4688 event, info->portid, seq, nlm_flags);
26932566
PM
4689 if (err < 0) {
4690 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4691 WARN_ON(err == -EMSGSIZE);
4692 kfree_skb(skb);
4693 goto errout;
4694 }
15e47304 4695 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4696 info->nlh, gfp_any());
4697 return;
21713ebc
TG
4698errout:
4699 if (err < 0)
5578689a 4700 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4701}
4702
8ed67789 4703static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4704 unsigned long event, void *ptr)
8ed67789 4705{
351638e7 4706 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4707 struct net *net = dev_net(dev);
8ed67789 4708
242d3a49
WC
4709 if (!(dev->flags & IFF_LOOPBACK))
4710 return NOTIFY_OK;
4711
4712 if (event == NETDEV_REGISTER) {
d8d1f30b 4713 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4714 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4715#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4716 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4717 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4718 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4719 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4720#endif
76da0704
WC
4721 } else if (event == NETDEV_UNREGISTER &&
4722 dev->reg_state != NETREG_UNREGISTERED) {
4723 /* NETDEV_UNREGISTER could be fired for multiple times by
4724 * netdev_wait_allrefs(). Make sure we only call this once.
4725 */
12d94a80 4726 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4727#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4728 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4729 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4730#endif
4731 }
4732
4733 return NOTIFY_OK;
4734}
4735
1da177e4
LT
4736/*
4737 * /proc
4738 */
4739
4740#ifdef CONFIG_PROC_FS
4741
33120b30 4742static const struct file_operations ipv6_route_proc_fops = {
33120b30
AD
4743 .open = ipv6_route_open,
4744 .read = seq_read,
4745 .llseek = seq_lseek,
8d2ca1d7 4746 .release = seq_release_net,
33120b30
AD
4747};
4748
1da177e4
LT
4749static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4750{
69ddb805 4751 struct net *net = (struct net *)seq->private;
1da177e4 4752 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4753 net->ipv6.rt6_stats->fib_nodes,
4754 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 4755 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
4756 net->ipv6.rt6_stats->fib_rt_entries,
4757 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4758 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4759 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4760
4761 return 0;
4762}
4763
4764static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4765{
de05c557 4766 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4767}
4768
9a32144e 4769static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4770 .open = rt6_stats_seq_open,
4771 .read = seq_read,
4772 .llseek = seq_lseek,
b6fcbdb4 4773 .release = single_release_net,
1da177e4
LT
4774};
4775#endif /* CONFIG_PROC_FS */
4776
4777#ifdef CONFIG_SYSCTL
4778
1da177e4 4779static
fe2c6338 4780int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4781 void __user *buffer, size_t *lenp, loff_t *ppos)
4782{
c486da34
LAG
4783 struct net *net;
4784 int delay;
4785 if (!write)
1da177e4 4786 return -EINVAL;
c486da34
LAG
4787
4788 net = (struct net *)ctl->extra1;
4789 delay = net->ipv6.sysctl.flush_delay;
4790 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4791 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4792 return 0;
1da177e4
LT
4793}
4794
fe2c6338 4795struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4796 {
1da177e4 4797 .procname = "flush",
4990509f 4798 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4799 .maxlen = sizeof(int),
89c8b3a1 4800 .mode = 0200,
6d9f239a 4801 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4802 },
4803 {
1da177e4 4804 .procname = "gc_thresh",
9a7ec3a9 4805 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4806 .maxlen = sizeof(int),
4807 .mode = 0644,
6d9f239a 4808 .proc_handler = proc_dointvec,
1da177e4
LT
4809 },
4810 {
1da177e4 4811 .procname = "max_size",
4990509f 4812 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
4813 .maxlen = sizeof(int),
4814 .mode = 0644,
6d9f239a 4815 .proc_handler = proc_dointvec,
1da177e4
LT
4816 },
4817 {
1da177e4 4818 .procname = "gc_min_interval",
4990509f 4819 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4820 .maxlen = sizeof(int),
4821 .mode = 0644,
6d9f239a 4822 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4823 },
4824 {
1da177e4 4825 .procname = "gc_timeout",
4990509f 4826 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
4827 .maxlen = sizeof(int),
4828 .mode = 0644,
6d9f239a 4829 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4830 },
4831 {
1da177e4 4832 .procname = "gc_interval",
4990509f 4833 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
4834 .maxlen = sizeof(int),
4835 .mode = 0644,
6d9f239a 4836 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4837 },
4838 {
1da177e4 4839 .procname = "gc_elasticity",
4990509f 4840 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
4841 .maxlen = sizeof(int),
4842 .mode = 0644,
f3d3f616 4843 .proc_handler = proc_dointvec,
1da177e4
LT
4844 },
4845 {
1da177e4 4846 .procname = "mtu_expires",
4990509f 4847 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
4848 .maxlen = sizeof(int),
4849 .mode = 0644,
6d9f239a 4850 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4851 },
4852 {
1da177e4 4853 .procname = "min_adv_mss",
4990509f 4854 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
4855 .maxlen = sizeof(int),
4856 .mode = 0644,
f3d3f616 4857 .proc_handler = proc_dointvec,
1da177e4
LT
4858 },
4859 {
1da177e4 4860 .procname = "gc_min_interval_ms",
4990509f 4861 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4862 .maxlen = sizeof(int),
4863 .mode = 0644,
6d9f239a 4864 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 4865 },
f8572d8f 4866 { }
1da177e4
LT
4867};
4868
2c8c1e72 4869struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
4870{
4871 struct ctl_table *table;
4872
4873 table = kmemdup(ipv6_route_table_template,
4874 sizeof(ipv6_route_table_template),
4875 GFP_KERNEL);
5ee09105
YH
4876
4877 if (table) {
4878 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 4879 table[0].extra1 = net;
86393e52 4880 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
4881 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
4882 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4883 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
4884 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
4885 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
4886 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
4887 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 4888 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
4889
4890 /* Don't export sysctls to unprivileged users */
4891 if (net->user_ns != &init_user_ns)
4892 table[0].procname = NULL;
5ee09105
YH
4893 }
4894
760f2d01
DL
4895 return table;
4896}
1da177e4
LT
4897#endif
4898
2c8c1e72 4899static int __net_init ip6_route_net_init(struct net *net)
cdb18761 4900{
633d424b 4901 int ret = -ENOMEM;
8ed67789 4902
86393e52
AD
4903 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
4904 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 4905
fc66f95c
ED
4906 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
4907 goto out_ip6_dst_ops;
4908
8ed67789
DL
4909 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
4910 sizeof(*net->ipv6.ip6_null_entry),
4911 GFP_KERNEL);
4912 if (!net->ipv6.ip6_null_entry)
fc66f95c 4913 goto out_ip6_dst_entries;
d8d1f30b 4914 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4915 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4916 ip6_template_metrics, true);
8ed67789
DL
4917
4918#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 4919 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
4920 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4921 sizeof(*net->ipv6.ip6_prohibit_entry),
4922 GFP_KERNEL);
68fffc67
PZ
4923 if (!net->ipv6.ip6_prohibit_entry)
4924 goto out_ip6_null_entry;
d8d1f30b 4925 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4926 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4927 ip6_template_metrics, true);
8ed67789
DL
4928
4929 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4930 sizeof(*net->ipv6.ip6_blk_hole_entry),
4931 GFP_KERNEL);
68fffc67
PZ
4932 if (!net->ipv6.ip6_blk_hole_entry)
4933 goto out_ip6_prohibit_entry;
d8d1f30b 4934 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4935 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4936 ip6_template_metrics, true);
8ed67789
DL
4937#endif
4938
b339a47c
PZ
4939 net->ipv6.sysctl.flush_delay = 0;
4940 net->ipv6.sysctl.ip6_rt_max_size = 4096;
4941 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4942 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4943 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4944 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4945 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4946 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4947
6891a346
BT
4948 net->ipv6.ip6_rt_gc_expire = 30*HZ;
4949
8ed67789
DL
4950 ret = 0;
4951out:
4952 return ret;
f2fc6a54 4953
68fffc67
PZ
4954#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4955out_ip6_prohibit_entry:
4956 kfree(net->ipv6.ip6_prohibit_entry);
4957out_ip6_null_entry:
4958 kfree(net->ipv6.ip6_null_entry);
4959#endif
fc66f95c
ED
4960out_ip6_dst_entries:
4961 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 4962out_ip6_dst_ops:
f2fc6a54 4963 goto out;
cdb18761
DL
4964}
4965
2c8c1e72 4966static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 4967{
8ed67789
DL
4968 kfree(net->ipv6.ip6_null_entry);
4969#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4970 kfree(net->ipv6.ip6_prohibit_entry);
4971 kfree(net->ipv6.ip6_blk_hole_entry);
4972#endif
41bb78b4 4973 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
4974}
4975
d189634e
TG
4976static int __net_init ip6_route_net_init_late(struct net *net)
4977{
4978#ifdef CONFIG_PROC_FS
d4beaa66
G
4979 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4980 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
4981#endif
4982 return 0;
4983}
4984
4985static void __net_exit ip6_route_net_exit_late(struct net *net)
4986{
4987#ifdef CONFIG_PROC_FS
ece31ffd
G
4988 remove_proc_entry("ipv6_route", net->proc_net);
4989 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
4990#endif
4991}
4992
cdb18761
DL
4993static struct pernet_operations ip6_route_net_ops = {
4994 .init = ip6_route_net_init,
4995 .exit = ip6_route_net_exit,
4996};
4997
c3426b47
DM
4998static int __net_init ipv6_inetpeer_init(struct net *net)
4999{
5000 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5001
5002 if (!bp)
5003 return -ENOMEM;
5004 inet_peer_base_init(bp);
5005 net->ipv6.peers = bp;
5006 return 0;
5007}
5008
5009static void __net_exit ipv6_inetpeer_exit(struct net *net)
5010{
5011 struct inet_peer_base *bp = net->ipv6.peers;
5012
5013 net->ipv6.peers = NULL;
56a6b248 5014 inetpeer_invalidate_tree(bp);
c3426b47
DM
5015 kfree(bp);
5016}
5017
2b823f72 5018static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5019 .init = ipv6_inetpeer_init,
5020 .exit = ipv6_inetpeer_exit,
5021};
5022
d189634e
TG
5023static struct pernet_operations ip6_route_net_late_ops = {
5024 .init = ip6_route_net_init_late,
5025 .exit = ip6_route_net_exit_late,
5026};
5027
8ed67789
DL
5028static struct notifier_block ip6_route_dev_notifier = {
5029 .notifier_call = ip6_route_dev_notify,
242d3a49 5030 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5031};
5032
2f460933
WC
5033void __init ip6_route_init_special_entries(void)
5034{
5035 /* Registering of the loopback is done before this portion of code,
5036 * the loopback reference in rt6_info will not be taken, do it
5037 * manually for init_net */
5038 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5039 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5040 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5041 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5042 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5043 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5044 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5045 #endif
5046}
5047
433d49c3 5048int __init ip6_route_init(void)
1da177e4 5049{
433d49c3 5050 int ret;
8d0b94af 5051 int cpu;
433d49c3 5052
9a7ec3a9
DL
5053 ret = -ENOMEM;
5054 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5055 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5056 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5057 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5058 goto out;
14e50e57 5059
fc66f95c 5060 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5061 if (ret)
bdb3289f 5062 goto out_kmem_cache;
bdb3289f 5063
c3426b47
DM
5064 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5065 if (ret)
e8803b6c 5066 goto out_dst_entries;
2a0c451a 5067
7e52b33b
DM
5068 ret = register_pernet_subsys(&ip6_route_net_ops);
5069 if (ret)
5070 goto out_register_inetpeer;
c3426b47 5071
5dc121e9
AE
5072 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5073
e8803b6c 5074 ret = fib6_init();
433d49c3 5075 if (ret)
8ed67789 5076 goto out_register_subsys;
433d49c3 5077
433d49c3
DL
5078 ret = xfrm6_init();
5079 if (ret)
e8803b6c 5080 goto out_fib6_init;
c35b7e72 5081
433d49c3
DL
5082 ret = fib6_rules_init();
5083 if (ret)
5084 goto xfrm6_init;
7e5449c2 5085
d189634e
TG
5086 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5087 if (ret)
5088 goto fib6_rules_init;
5089
16feebcf
FW
5090 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5091 inet6_rtm_newroute, NULL, 0);
5092 if (ret < 0)
5093 goto out_register_late_subsys;
5094
5095 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5096 inet6_rtm_delroute, NULL, 0);
5097 if (ret < 0)
5098 goto out_register_late_subsys;
5099
5100 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5101 inet6_rtm_getroute, NULL,
5102 RTNL_FLAG_DOIT_UNLOCKED);
5103 if (ret < 0)
d189634e 5104 goto out_register_late_subsys;
c127ea2c 5105
8ed67789 5106 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5107 if (ret)
d189634e 5108 goto out_register_late_subsys;
8ed67789 5109
8d0b94af
MKL
5110 for_each_possible_cpu(cpu) {
5111 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5112
5113 INIT_LIST_HEAD(&ul->head);
5114 spin_lock_init(&ul->lock);
5115 }
5116
433d49c3
DL
5117out:
5118 return ret;
5119
d189634e 5120out_register_late_subsys:
16feebcf 5121 rtnl_unregister_all(PF_INET6);
d189634e 5122 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5123fib6_rules_init:
433d49c3
DL
5124 fib6_rules_cleanup();
5125xfrm6_init:
433d49c3 5126 xfrm6_fini();
2a0c451a
TG
5127out_fib6_init:
5128 fib6_gc_cleanup();
8ed67789
DL
5129out_register_subsys:
5130 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5131out_register_inetpeer:
5132 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5133out_dst_entries:
5134 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5135out_kmem_cache:
f2fc6a54 5136 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5137 goto out;
1da177e4
LT
5138}
5139
5140void ip6_route_cleanup(void)
5141{
8ed67789 5142 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5143 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5144 fib6_rules_cleanup();
1da177e4 5145 xfrm6_fini();
1da177e4 5146 fib6_gc_cleanup();
c3426b47 5147 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5148 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5149 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5150 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5151}