ipv6: prepare fib6_remove_prefsrc() for exception table
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
83a09abd 81static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 82static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 83static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 84static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
85static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
569d3645 89static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
90
91static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 92static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 93static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 94static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 95static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
96static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
97 struct sk_buff *skb, u32 mtu);
98static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 struct sk_buff *skb);
4b32b5ad 100static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 101static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3
DA
102static size_t rt6_nlmsg_size(struct rt6_info *rt);
103static int rt6_fill_node(struct net *net,
104 struct sk_buff *skb, struct rt6_info *rt,
105 struct in6_addr *dst, struct in6_addr *src,
106 int iif, int type, u32 portid, u32 seq,
107 unsigned int flags);
35732d01
WW
108static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
109 struct in6_addr *daddr,
110 struct in6_addr *saddr);
1da177e4 111
70ceb4f5 112#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 113static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 114 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
115 const struct in6_addr *gwaddr,
116 struct net_device *dev,
95c96174 117 unsigned int pref);
efa2cea0 118static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 119 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
120 const struct in6_addr *gwaddr,
121 struct net_device *dev);
70ceb4f5
YH
122#endif
123
8d0b94af
MKL
124struct uncached_list {
125 spinlock_t lock;
126 struct list_head head;
127};
128
129static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
130
131static void rt6_uncached_list_add(struct rt6_info *rt)
132{
133 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
134
8d0b94af
MKL
135 rt->rt6i_uncached_list = ul;
136
137 spin_lock_bh(&ul->lock);
138 list_add_tail(&rt->rt6i_uncached, &ul->head);
139 spin_unlock_bh(&ul->lock);
140}
141
142static void rt6_uncached_list_del(struct rt6_info *rt)
143{
144 if (!list_empty(&rt->rt6i_uncached)) {
145 struct uncached_list *ul = rt->rt6i_uncached_list;
146
147 spin_lock_bh(&ul->lock);
148 list_del(&rt->rt6i_uncached);
149 spin_unlock_bh(&ul->lock);
150 }
151}
152
153static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154{
155 struct net_device *loopback_dev = net->loopback_dev;
156 int cpu;
157
e332bc67
EB
158 if (dev == loopback_dev)
159 return;
160
8d0b94af
MKL
161 for_each_possible_cpu(cpu) {
162 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 struct rt6_info *rt;
164
165 spin_lock_bh(&ul->lock);
166 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 struct inet6_dev *rt_idev = rt->rt6i_idev;
168 struct net_device *rt_dev = rt->dst.dev;
169
e332bc67 170 if (rt_idev->dev == dev) {
8d0b94af
MKL
171 rt->rt6i_idev = in6_dev_get(loopback_dev);
172 in6_dev_put(rt_idev);
173 }
174
e332bc67 175 if (rt_dev == dev) {
8d0b94af
MKL
176 rt->dst.dev = loopback_dev;
177 dev_hold(rt->dst.dev);
178 dev_put(rt_dev);
179 }
180 }
181 spin_unlock_bh(&ul->lock);
182 }
183}
184
d52d3997
MKL
185static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
186{
187 return dst_metrics_write_ptr(rt->dst.from);
188}
189
06582540
DM
190static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
191{
4b32b5ad 192 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 193
d52d3997
MKL
194 if (rt->rt6i_flags & RTF_PCPU)
195 return rt6_pcpu_cow_metrics(rt);
196 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
197 return NULL;
198 else
3b471175 199 return dst_cow_metrics_generic(dst, old);
06582540
DM
200}
201
f894cbf8
DM
202static inline const void *choose_neigh_daddr(struct rt6_info *rt,
203 struct sk_buff *skb,
204 const void *daddr)
39232973
DM
205{
206 struct in6_addr *p = &rt->rt6i_gateway;
207
a7563f34 208 if (!ipv6_addr_any(p))
39232973 209 return (const void *) p;
f894cbf8
DM
210 else if (skb)
211 return &ipv6_hdr(skb)->daddr;
39232973
DM
212 return daddr;
213}
214
f894cbf8
DM
215static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
216 struct sk_buff *skb,
217 const void *daddr)
d3aaeb38 218{
39232973
DM
219 struct rt6_info *rt = (struct rt6_info *) dst;
220 struct neighbour *n;
221
f894cbf8 222 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 223 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
224 if (n)
225 return n;
226 return neigh_create(&nd_tbl, daddr, dst->dev);
227}
228
63fca65d
JA
229static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
230{
231 struct net_device *dev = dst->dev;
232 struct rt6_info *rt = (struct rt6_info *)dst;
233
234 daddr = choose_neigh_daddr(rt, NULL, daddr);
235 if (!daddr)
236 return;
237 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
238 return;
239 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
240 return;
241 __ipv6_confirm_neigh(dev, daddr);
242}
243
9a7ec3a9 244static struct dst_ops ip6_dst_ops_template = {
1da177e4 245 .family = AF_INET6,
1da177e4
LT
246 .gc = ip6_dst_gc,
247 .gc_thresh = 1024,
248 .check = ip6_dst_check,
0dbaee3b 249 .default_advmss = ip6_default_advmss,
ebb762f2 250 .mtu = ip6_mtu,
06582540 251 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
252 .destroy = ip6_dst_destroy,
253 .ifdown = ip6_dst_ifdown,
254 .negative_advice = ip6_negative_advice,
255 .link_failure = ip6_link_failure,
256 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 257 .redirect = rt6_do_redirect,
9f8955cc 258 .local_out = __ip6_local_out,
d3aaeb38 259 .neigh_lookup = ip6_neigh_lookup,
63fca65d 260 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
261};
262
ebb762f2 263static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 264{
618f9bc7
SK
265 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
266
267 return mtu ? : dst->dev->mtu;
ec831ea7
RD
268}
269
6700c270
DM
270static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
271 struct sk_buff *skb, u32 mtu)
14e50e57
DM
272{
273}
274
6700c270
DM
275static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
276 struct sk_buff *skb)
b587ee3b
DM
277{
278}
279
14e50e57
DM
280static struct dst_ops ip6_dst_blackhole_ops = {
281 .family = AF_INET6,
14e50e57
DM
282 .destroy = ip6_dst_destroy,
283 .check = ip6_dst_check,
ebb762f2 284 .mtu = ip6_blackhole_mtu,
214f45c9 285 .default_advmss = ip6_default_advmss,
14e50e57 286 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 287 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 288 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 289 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
290};
291
62fa8a84 292static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 293 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
294};
295
fb0af4c7 296static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
297 .dst = {
298 .__refcnt = ATOMIC_INIT(1),
299 .__use = 1,
2c20cbd7 300 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 301 .error = -ENETUNREACH,
d8d1f30b
CG
302 .input = ip6_pkt_discard,
303 .output = ip6_pkt_discard_out,
1da177e4
LT
304 },
305 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 306 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
307 .rt6i_metric = ~(u32) 0,
308 .rt6i_ref = ATOMIC_INIT(1),
309};
310
101367c2
TG
311#ifdef CONFIG_IPV6_MULTIPLE_TABLES
312
fb0af4c7 313static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
314 .dst = {
315 .__refcnt = ATOMIC_INIT(1),
316 .__use = 1,
2c20cbd7 317 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 318 .error = -EACCES,
d8d1f30b
CG
319 .input = ip6_pkt_prohibit,
320 .output = ip6_pkt_prohibit_out,
101367c2
TG
321 },
322 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 323 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
324 .rt6i_metric = ~(u32) 0,
325 .rt6i_ref = ATOMIC_INIT(1),
326};
327
fb0af4c7 328static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
329 .dst = {
330 .__refcnt = ATOMIC_INIT(1),
331 .__use = 1,
2c20cbd7 332 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 333 .error = -EINVAL,
d8d1f30b 334 .input = dst_discard,
ede2059d 335 .output = dst_discard_out,
101367c2
TG
336 },
337 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 338 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
339 .rt6i_metric = ~(u32) 0,
340 .rt6i_ref = ATOMIC_INIT(1),
341};
342
343#endif
344
ebfa45f0
MKL
345static void rt6_info_init(struct rt6_info *rt)
346{
347 struct dst_entry *dst = &rt->dst;
348
349 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
350 INIT_LIST_HEAD(&rt->rt6i_siblings);
351 INIT_LIST_HEAD(&rt->rt6i_uncached);
352}
353
1da177e4 354/* allocate dst with ip6_dst_ops */
d52d3997
MKL
355static struct rt6_info *__ip6_dst_alloc(struct net *net,
356 struct net_device *dev,
ad706862 357 int flags)
1da177e4 358{
97bab73f 359 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 360 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 361
ebfa45f0
MKL
362 if (rt)
363 rt6_info_init(rt);
8104891b 364
cf911662 365 return rt;
1da177e4
LT
366}
367
9ab179d8
DA
368struct rt6_info *ip6_dst_alloc(struct net *net,
369 struct net_device *dev,
370 int flags)
d52d3997 371{
ad706862 372 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
373
374 if (rt) {
375 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
376 if (rt->rt6i_pcpu) {
377 int cpu;
378
379 for_each_possible_cpu(cpu) {
380 struct rt6_info **p;
381
382 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
383 /* no one shares rt */
384 *p = NULL;
385 }
386 } else {
587fea74 387 dst_release_immediate(&rt->dst);
d52d3997
MKL
388 return NULL;
389 }
390 }
391
392 return rt;
393}
9ab179d8 394EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 395
1da177e4
LT
396static void ip6_dst_destroy(struct dst_entry *dst)
397{
398 struct rt6_info *rt = (struct rt6_info *)dst;
35732d01 399 struct rt6_exception_bucket *bucket;
ecd98837 400 struct dst_entry *from = dst->from;
8d0b94af 401 struct inet6_dev *idev;
1da177e4 402
4b32b5ad 403 dst_destroy_metrics_generic(dst);
87775312 404 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
405 rt6_uncached_list_del(rt);
406
407 idev = rt->rt6i_idev;
38308473 408 if (idev) {
1da177e4
LT
409 rt->rt6i_idev = NULL;
410 in6_dev_put(idev);
1ab1457c 411 }
35732d01
WW
412 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
413 if (bucket) {
414 rt->rt6i_exception_bucket = NULL;
415 kfree(bucket);
416 }
1716a961 417
ecd98837
YH
418 dst->from = NULL;
419 dst_release(from);
b3419363
DM
420}
421
1da177e4
LT
422static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
423 int how)
424{
425 struct rt6_info *rt = (struct rt6_info *)dst;
426 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 427 struct net_device *loopback_dev =
c346dca1 428 dev_net(dev)->loopback_dev;
1da177e4 429
e5645f51
WW
430 if (idev && idev->dev != loopback_dev) {
431 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
432 if (loopback_idev) {
433 rt->rt6i_idev = loopback_idev;
434 in6_dev_put(idev);
97cac082 435 }
1da177e4
LT
436 }
437}
438
5973fb1e
MKL
439static bool __rt6_check_expired(const struct rt6_info *rt)
440{
441 if (rt->rt6i_flags & RTF_EXPIRES)
442 return time_after(jiffies, rt->dst.expires);
443 else
444 return false;
445}
446
a50feda5 447static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 448{
1716a961
G
449 if (rt->rt6i_flags & RTF_EXPIRES) {
450 if (time_after(jiffies, rt->dst.expires))
a50feda5 451 return true;
1716a961 452 } else if (rt->dst.from) {
1e2ea8ad
XL
453 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
454 rt6_check_expired((struct rt6_info *)rt->dst.from);
1716a961 455 }
a50feda5 456 return false;
1da177e4
LT
457}
458
51ebd318 459static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
460 struct flowi6 *fl6, int oif,
461 int strict)
51ebd318
ND
462{
463 struct rt6_info *sibling, *next_sibling;
464 int route_choosen;
465
b673d6cc
JS
466 /* We might have already computed the hash for ICMPv6 errors. In such
467 * case it will always be non-zero. Otherwise now is the time to do it.
468 */
469 if (!fl6->mp_hash)
470 fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
471
472 route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
51ebd318
ND
473 /* Don't change the route, if route_choosen == 0
474 * (siblings does not include ourself)
475 */
476 if (route_choosen)
477 list_for_each_entry_safe(sibling, next_sibling,
478 &match->rt6i_siblings, rt6i_siblings) {
479 route_choosen--;
480 if (route_choosen == 0) {
52bd4c0c
ND
481 if (rt6_score_route(sibling, oif, strict) < 0)
482 break;
51ebd318
ND
483 match = sibling;
484 break;
485 }
486 }
487 return match;
488}
489
1da177e4 490/*
c71099ac 491 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
492 */
493
8ed67789
DL
494static inline struct rt6_info *rt6_device_match(struct net *net,
495 struct rt6_info *rt,
b71d1d42 496 const struct in6_addr *saddr,
1da177e4 497 int oif,
d420895e 498 int flags)
1da177e4
LT
499{
500 struct rt6_info *local = NULL;
501 struct rt6_info *sprt;
502
dd3abc4e
YH
503 if (!oif && ipv6_addr_any(saddr))
504 goto out;
505
d8d1f30b 506 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 507 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
508
509 if (oif) {
1da177e4
LT
510 if (dev->ifindex == oif)
511 return sprt;
512 if (dev->flags & IFF_LOOPBACK) {
38308473 513 if (!sprt->rt6i_idev ||
1da177e4 514 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 515 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 516 continue;
17fb0b2b
DA
517 if (local &&
518 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
519 continue;
520 }
521 local = sprt;
522 }
dd3abc4e
YH
523 } else {
524 if (ipv6_chk_addr(net, saddr, dev,
525 flags & RT6_LOOKUP_F_IFACE))
526 return sprt;
1da177e4 527 }
dd3abc4e 528 }
1da177e4 529
dd3abc4e 530 if (oif) {
1da177e4
LT
531 if (local)
532 return local;
533
d420895e 534 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 535 return net->ipv6.ip6_null_entry;
1da177e4 536 }
dd3abc4e 537out:
1da177e4
LT
538 return rt;
539}
540
27097255 541#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
542struct __rt6_probe_work {
543 struct work_struct work;
544 struct in6_addr target;
545 struct net_device *dev;
546};
547
548static void rt6_probe_deferred(struct work_struct *w)
549{
550 struct in6_addr mcaddr;
551 struct __rt6_probe_work *work =
552 container_of(w, struct __rt6_probe_work, work);
553
554 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 555 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 556 dev_put(work->dev);
662f5533 557 kfree(work);
c2f17e82
HFS
558}
559
27097255
YH
560static void rt6_probe(struct rt6_info *rt)
561{
990edb42 562 struct __rt6_probe_work *work;
f2c31e32 563 struct neighbour *neigh;
27097255
YH
564 /*
565 * Okay, this does not seem to be appropriate
566 * for now, however, we need to check if it
567 * is really so; aka Router Reachability Probing.
568 *
569 * Router Reachability Probe MUST be rate-limited
570 * to no more than one per minute.
571 */
2152caea 572 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 573 return;
2152caea
YH
574 rcu_read_lock_bh();
575 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
576 if (neigh) {
8d6c31bf
MKL
577 if (neigh->nud_state & NUD_VALID)
578 goto out;
579
990edb42 580 work = NULL;
2152caea 581 write_lock(&neigh->lock);
990edb42
MKL
582 if (!(neigh->nud_state & NUD_VALID) &&
583 time_after(jiffies,
584 neigh->updated +
585 rt->rt6i_idev->cnf.rtr_probe_interval)) {
586 work = kmalloc(sizeof(*work), GFP_ATOMIC);
587 if (work)
588 __neigh_set_probe_once(neigh);
c2f17e82 589 }
2152caea 590 write_unlock(&neigh->lock);
990edb42
MKL
591 } else {
592 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 593 }
990edb42
MKL
594
595 if (work) {
596 INIT_WORK(&work->work, rt6_probe_deferred);
597 work->target = rt->rt6i_gateway;
598 dev_hold(rt->dst.dev);
599 work->dev = rt->dst.dev;
600 schedule_work(&work->work);
601 }
602
8d6c31bf 603out:
2152caea 604 rcu_read_unlock_bh();
27097255
YH
605}
606#else
607static inline void rt6_probe(struct rt6_info *rt)
608{
27097255
YH
609}
610#endif
611
1da177e4 612/*
554cfb7e 613 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 614 */
b6f99a21 615static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 616{
d1918542 617 struct net_device *dev = rt->dst.dev;
161980f4 618 if (!oif || dev->ifindex == oif)
554cfb7e 619 return 2;
161980f4
DM
620 if ((dev->flags & IFF_LOOPBACK) &&
621 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
622 return 1;
623 return 0;
554cfb7e 624}
1da177e4 625
afc154e9 626static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 627{
f2c31e32 628 struct neighbour *neigh;
afc154e9 629 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 630
4d0c5911
YH
631 if (rt->rt6i_flags & RTF_NONEXTHOP ||
632 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 633 return RT6_NUD_SUCCEED;
145a3621
YH
634
635 rcu_read_lock_bh();
636 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
637 if (neigh) {
638 read_lock(&neigh->lock);
554cfb7e 639 if (neigh->nud_state & NUD_VALID)
afc154e9 640 ret = RT6_NUD_SUCCEED;
398bcbeb 641#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 642 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 643 ret = RT6_NUD_SUCCEED;
7e980569
JB
644 else
645 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 646#endif
145a3621 647 read_unlock(&neigh->lock);
afc154e9
HFS
648 } else {
649 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 650 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 651 }
145a3621
YH
652 rcu_read_unlock_bh();
653
a5a81f0b 654 return ret;
1da177e4
LT
655}
656
554cfb7e
YH
657static int rt6_score_route(struct rt6_info *rt, int oif,
658 int strict)
1da177e4 659{
a5a81f0b 660 int m;
1ab1457c 661
4d0c5911 662 m = rt6_check_dev(rt, oif);
77d16f45 663 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 664 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
665#ifdef CONFIG_IPV6_ROUTER_PREF
666 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
667#endif
afc154e9
HFS
668 if (strict & RT6_LOOKUP_F_REACHABLE) {
669 int n = rt6_check_neigh(rt);
670 if (n < 0)
671 return n;
672 }
554cfb7e
YH
673 return m;
674}
675
f11e6659 676static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
677 int *mpri, struct rt6_info *match,
678 bool *do_rr)
554cfb7e 679{
f11e6659 680 int m;
afc154e9 681 bool match_do_rr = false;
35103d11
AG
682 struct inet6_dev *idev = rt->rt6i_idev;
683 struct net_device *dev = rt->dst.dev;
684
685 if (dev && !netif_carrier_ok(dev) &&
d5d32e4b
DA
686 idev->cnf.ignore_routes_with_linkdown &&
687 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 688 goto out;
f11e6659
DM
689
690 if (rt6_check_expired(rt))
691 goto out;
692
693 m = rt6_score_route(rt, oif, strict);
7e980569 694 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
695 match_do_rr = true;
696 m = 0; /* lowest valid score */
7e980569 697 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 698 goto out;
afc154e9
HFS
699 }
700
701 if (strict & RT6_LOOKUP_F_REACHABLE)
702 rt6_probe(rt);
f11e6659 703
7e980569 704 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 705 if (m > *mpri) {
afc154e9 706 *do_rr = match_do_rr;
f11e6659
DM
707 *mpri = m;
708 match = rt;
f11e6659 709 }
f11e6659
DM
710out:
711 return match;
712}
713
714static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
715 struct rt6_info *rr_head,
afc154e9
HFS
716 u32 metric, int oif, int strict,
717 bool *do_rr)
f11e6659 718{
9fbdcfaf 719 struct rt6_info *rt, *match, *cont;
554cfb7e 720 int mpri = -1;
1da177e4 721
f11e6659 722 match = NULL;
9fbdcfaf
SK
723 cont = NULL;
724 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
725 if (rt->rt6i_metric != metric) {
726 cont = rt;
727 break;
728 }
729
730 match = find_match(rt, oif, strict, &mpri, match, do_rr);
731 }
732
733 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
734 if (rt->rt6i_metric != metric) {
735 cont = rt;
736 break;
737 }
738
afc154e9 739 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
740 }
741
742 if (match || !cont)
743 return match;
744
745 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 746 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 747
f11e6659
DM
748 return match;
749}
1da177e4 750
f11e6659
DM
751static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
752{
753 struct rt6_info *match, *rt0;
8ed67789 754 struct net *net;
afc154e9 755 bool do_rr = false;
1da177e4 756
f11e6659
DM
757 rt0 = fn->rr_ptr;
758 if (!rt0)
759 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 760
afc154e9
HFS
761 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
762 &do_rr);
1da177e4 763
afc154e9 764 if (do_rr) {
d8d1f30b 765 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 766
554cfb7e 767 /* no entries matched; do round-robin */
f11e6659
DM
768 if (!next || next->rt6i_metric != rt0->rt6i_metric)
769 next = fn->leaf;
770
771 if (next != rt0)
772 fn->rr_ptr = next;
1da177e4 773 }
1da177e4 774
d1918542 775 net = dev_net(rt0->dst.dev);
a02cec21 776 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
777}
778
8b9df265
MKL
779static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
780{
781 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
782}
783
70ceb4f5
YH
784#ifdef CONFIG_IPV6_ROUTE_INFO
785int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 786 const struct in6_addr *gwaddr)
70ceb4f5 787{
c346dca1 788 struct net *net = dev_net(dev);
70ceb4f5
YH
789 struct route_info *rinfo = (struct route_info *) opt;
790 struct in6_addr prefix_buf, *prefix;
791 unsigned int pref;
4bed72e4 792 unsigned long lifetime;
70ceb4f5
YH
793 struct rt6_info *rt;
794
795 if (len < sizeof(struct route_info)) {
796 return -EINVAL;
797 }
798
799 /* Sanity check for prefix_len and length */
800 if (rinfo->length > 3) {
801 return -EINVAL;
802 } else if (rinfo->prefix_len > 128) {
803 return -EINVAL;
804 } else if (rinfo->prefix_len > 64) {
805 if (rinfo->length < 2) {
806 return -EINVAL;
807 }
808 } else if (rinfo->prefix_len > 0) {
809 if (rinfo->length < 1) {
810 return -EINVAL;
811 }
812 }
813
814 pref = rinfo->route_pref;
815 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 816 return -EINVAL;
70ceb4f5 817
4bed72e4 818 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
819
820 if (rinfo->length == 3)
821 prefix = (struct in6_addr *)rinfo->prefix;
822 else {
823 /* this function is safe */
824 ipv6_addr_prefix(&prefix_buf,
825 (struct in6_addr *)rinfo->prefix,
826 rinfo->prefix_len);
827 prefix = &prefix_buf;
828 }
829
f104a567
DJ
830 if (rinfo->prefix_len == 0)
831 rt = rt6_get_dflt_router(gwaddr, dev);
832 else
833 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 834 gwaddr, dev);
70ceb4f5
YH
835
836 if (rt && !lifetime) {
e0a1ad73 837 ip6_del_rt(rt);
70ceb4f5
YH
838 rt = NULL;
839 }
840
841 if (!rt && lifetime)
830218c1
DA
842 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
843 dev, pref);
70ceb4f5
YH
844 else if (rt)
845 rt->rt6i_flags = RTF_ROUTEINFO |
846 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
847
848 if (rt) {
1716a961
G
849 if (!addrconf_finite_timeout(lifetime))
850 rt6_clean_expires(rt);
851 else
852 rt6_set_expires(rt, jiffies + HZ * lifetime);
853
94e187c0 854 ip6_rt_put(rt);
70ceb4f5
YH
855 }
856 return 0;
857}
858#endif
859
a3c00e46
MKL
860static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
861 struct in6_addr *saddr)
862{
863 struct fib6_node *pn;
864 while (1) {
865 if (fn->fn_flags & RTN_TL_ROOT)
866 return NULL;
867 pn = fn->parent;
868 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
869 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
870 else
871 fn = pn;
872 if (fn->fn_flags & RTN_RTINFO)
873 return fn;
874 }
875}
c71099ac 876
8ed67789
DL
877static struct rt6_info *ip6_pol_route_lookup(struct net *net,
878 struct fib6_table *table,
4c9483b2 879 struct flowi6 *fl6, int flags)
1da177e4
LT
880{
881 struct fib6_node *fn;
882 struct rt6_info *rt;
883
c71099ac 884 read_lock_bh(&table->tb6_lock);
4c9483b2 885 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
886restart:
887 rt = fn->leaf;
4c9483b2 888 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 889 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 890 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
891 if (rt == net->ipv6.ip6_null_entry) {
892 fn = fib6_backtrack(fn, &fl6->saddr);
893 if (fn)
894 goto restart;
895 }
d8d1f30b 896 dst_use(&rt->dst, jiffies);
c71099ac 897 read_unlock_bh(&table->tb6_lock);
b811580d
DA
898
899 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
900
c71099ac
TG
901 return rt;
902
903}
904
67ba4152 905struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
906 int flags)
907{
908 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
909}
910EXPORT_SYMBOL_GPL(ip6_route_lookup);
911
9acd9f3a
YH
912struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
913 const struct in6_addr *saddr, int oif, int strict)
c71099ac 914{
4c9483b2
DM
915 struct flowi6 fl6 = {
916 .flowi6_oif = oif,
917 .daddr = *daddr,
c71099ac
TG
918 };
919 struct dst_entry *dst;
77d16f45 920 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 921
adaa70bb 922 if (saddr) {
4c9483b2 923 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
924 flags |= RT6_LOOKUP_F_HAS_SADDR;
925 }
926
4c9483b2 927 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
928 if (dst->error == 0)
929 return (struct rt6_info *) dst;
930
931 dst_release(dst);
932
1da177e4
LT
933 return NULL;
934}
7159039a
YH
935EXPORT_SYMBOL(rt6_lookup);
936
c71099ac 937/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
938 * It takes new route entry, the addition fails by any reason the
939 * route is released.
940 * Caller must hold dst before calling it.
1da177e4
LT
941 */
942
e5fd387a 943static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301
DA
944 struct mx6_config *mxc,
945 struct netlink_ext_ack *extack)
1da177e4
LT
946{
947 int err;
c71099ac 948 struct fib6_table *table;
1da177e4 949
c71099ac
TG
950 table = rt->rt6i_table;
951 write_lock_bh(&table->tb6_lock);
333c4301 952 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
c71099ac 953 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
954
955 return err;
956}
957
40e22e8f
TG
958int ip6_ins_rt(struct rt6_info *rt)
959{
e715b6d3
FW
960 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
961 struct mx6_config mxc = { .mx = NULL, };
962
1cfb71ee
WW
963 /* Hold dst to account for the reference from the fib6 tree */
964 dst_hold(&rt->dst);
333c4301 965 return __ip6_ins_rt(rt, &info, &mxc, NULL);
40e22e8f
TG
966}
967
4832c30d
DA
968/* called with rcu_lock held */
969static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
970{
971 struct net_device *dev = rt->dst.dev;
972
973 if (rt->rt6i_flags & RTF_LOCAL) {
974 /* for copies of local routes, dst->dev needs to be the
975 * device if it is a master device, the master device if
976 * device is enslaved, and the loopback as the default
977 */
978 if (netif_is_l3_slave(dev) &&
979 !rt6_need_strict(&rt->rt6i_dst.addr))
980 dev = l3mdev_master_dev_rcu(dev);
981 else if (!netif_is_l3_master(dev))
982 dev = dev_net(dev)->loopback_dev;
983 /* last case is netif_is_l3_master(dev) is true in which
984 * case we want dev returned to be dev
985 */
986 }
987
988 return dev;
989}
990
8b9df265
MKL
991static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
992 const struct in6_addr *daddr,
993 const struct in6_addr *saddr)
1da177e4 994{
4832c30d 995 struct net_device *dev;
1da177e4
LT
996 struct rt6_info *rt;
997
998 /*
999 * Clone the route.
1000 */
1001
d52d3997 1002 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 1003 ort = (struct rt6_info *)ort->dst.from;
1da177e4 1004
4832c30d
DA
1005 rcu_read_lock();
1006 dev = ip6_rt_get_dev_rcu(ort);
1007 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1008 rcu_read_unlock();
83a09abd
MKL
1009 if (!rt)
1010 return NULL;
1011
1012 ip6_rt_copy_init(rt, ort);
1013 rt->rt6i_flags |= RTF_CACHE;
1014 rt->rt6i_metric = 0;
1015 rt->dst.flags |= DST_HOST;
1016 rt->rt6i_dst.addr = *daddr;
1017 rt->rt6i_dst.plen = 128;
1da177e4 1018
83a09abd
MKL
1019 if (!rt6_is_gw_or_nonexthop(ort)) {
1020 if (ort->rt6i_dst.plen != 128 &&
1021 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1022 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1023#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1024 if (rt->rt6i_src.plen && saddr) {
1025 rt->rt6i_src.addr = *saddr;
1026 rt->rt6i_src.plen = 128;
8b9df265 1027 }
83a09abd 1028#endif
95a9a5ba 1029 }
1da177e4 1030
95a9a5ba
YH
1031 return rt;
1032}
1da177e4 1033
d52d3997
MKL
1034static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1035{
4832c30d 1036 struct net_device *dev;
d52d3997
MKL
1037 struct rt6_info *pcpu_rt;
1038
4832c30d
DA
1039 rcu_read_lock();
1040 dev = ip6_rt_get_dev_rcu(rt);
1041 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1042 rcu_read_unlock();
d52d3997
MKL
1043 if (!pcpu_rt)
1044 return NULL;
1045 ip6_rt_copy_init(pcpu_rt, rt);
1046 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1047 pcpu_rt->rt6i_flags |= RTF_PCPU;
1048 return pcpu_rt;
1049}
1050
1051/* It should be called with read_lock_bh(&tb6_lock) acquired */
1052static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1053{
a73e4195 1054 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1055
1056 p = this_cpu_ptr(rt->rt6i_pcpu);
1057 pcpu_rt = *p;
1058
a73e4195
MKL
1059 if (pcpu_rt) {
1060 dst_hold(&pcpu_rt->dst);
1061 rt6_dst_from_metrics_check(pcpu_rt);
1062 }
1063 return pcpu_rt;
1064}
1065
1066static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1067{
9c7370a1 1068 struct fib6_table *table = rt->rt6i_table;
a73e4195 1069 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1070
1071 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1072 if (!pcpu_rt) {
1073 struct net *net = dev_net(rt->dst.dev);
1074
9c7370a1
MKL
1075 dst_hold(&net->ipv6.ip6_null_entry->dst);
1076 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1077 }
1078
9c7370a1
MKL
1079 read_lock_bh(&table->tb6_lock);
1080 if (rt->rt6i_pcpu) {
1081 p = this_cpu_ptr(rt->rt6i_pcpu);
1082 prev = cmpxchg(p, NULL, pcpu_rt);
1083 if (prev) {
1084 /* If someone did it before us, return prev instead */
587fea74 1085 dst_release_immediate(&pcpu_rt->dst);
9c7370a1
MKL
1086 pcpu_rt = prev;
1087 }
1088 } else {
1089 /* rt has been removed from the fib6 tree
1090 * before we have a chance to acquire the read_lock.
1091 * In this case, don't brother to create a pcpu rt
1092 * since rt is going away anyway. The next
1093 * dst_check() will trigger a re-lookup.
1094 */
587fea74 1095 dst_release_immediate(&pcpu_rt->dst);
9c7370a1 1096 pcpu_rt = rt;
d52d3997 1097 }
d52d3997
MKL
1098 dst_hold(&pcpu_rt->dst);
1099 rt6_dst_from_metrics_check(pcpu_rt);
9c7370a1 1100 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1101 return pcpu_rt;
1102}
1103
35732d01
WW
1104/* exception hash table implementation
1105 */
1106static DEFINE_SPINLOCK(rt6_exception_lock);
1107
1108/* Remove rt6_ex from hash table and free the memory
1109 * Caller must hold rt6_exception_lock
1110 */
1111static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1112 struct rt6_exception *rt6_ex)
1113{
1114 if (!bucket || !rt6_ex)
1115 return;
1116 rt6_ex->rt6i->rt6i_node = NULL;
1117 hlist_del_rcu(&rt6_ex->hlist);
1118 rt6_release(rt6_ex->rt6i);
1119 kfree_rcu(rt6_ex, rcu);
1120 WARN_ON_ONCE(!bucket->depth);
1121 bucket->depth--;
1122}
1123
1124/* Remove oldest rt6_ex in bucket and free the memory
1125 * Caller must hold rt6_exception_lock
1126 */
1127static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1128{
1129 struct rt6_exception *rt6_ex, *oldest = NULL;
1130
1131 if (!bucket)
1132 return;
1133
1134 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1135 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1136 oldest = rt6_ex;
1137 }
1138 rt6_remove_exception(bucket, oldest);
1139}
1140
1141static u32 rt6_exception_hash(const struct in6_addr *dst,
1142 const struct in6_addr *src)
1143{
1144 static u32 seed __read_mostly;
1145 u32 val;
1146
1147 net_get_random_once(&seed, sizeof(seed));
1148 val = jhash(dst, sizeof(*dst), seed);
1149
1150#ifdef CONFIG_IPV6_SUBTREES
1151 if (src)
1152 val = jhash(src, sizeof(*src), val);
1153#endif
1154 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1155}
1156
1157/* Helper function to find the cached rt in the hash table
1158 * and update bucket pointer to point to the bucket for this
1159 * (daddr, saddr) pair
1160 * Caller must hold rt6_exception_lock
1161 */
1162static struct rt6_exception *
1163__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1164 const struct in6_addr *daddr,
1165 const struct in6_addr *saddr)
1166{
1167 struct rt6_exception *rt6_ex;
1168 u32 hval;
1169
1170 if (!(*bucket) || !daddr)
1171 return NULL;
1172
1173 hval = rt6_exception_hash(daddr, saddr);
1174 *bucket += hval;
1175
1176 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1177 struct rt6_info *rt6 = rt6_ex->rt6i;
1178 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1179
1180#ifdef CONFIG_IPV6_SUBTREES
1181 if (matched && saddr)
1182 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1183#endif
1184 if (matched)
1185 return rt6_ex;
1186 }
1187 return NULL;
1188}
1189
1190/* Helper function to find the cached rt in the hash table
1191 * and update bucket pointer to point to the bucket for this
1192 * (daddr, saddr) pair
1193 * Caller must hold rcu_read_lock()
1194 */
1195static struct rt6_exception *
1196__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1197 const struct in6_addr *daddr,
1198 const struct in6_addr *saddr)
1199{
1200 struct rt6_exception *rt6_ex;
1201 u32 hval;
1202
1203 WARN_ON_ONCE(!rcu_read_lock_held());
1204
1205 if (!(*bucket) || !daddr)
1206 return NULL;
1207
1208 hval = rt6_exception_hash(daddr, saddr);
1209 *bucket += hval;
1210
1211 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1212 struct rt6_info *rt6 = rt6_ex->rt6i;
1213 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1214
1215#ifdef CONFIG_IPV6_SUBTREES
1216 if (matched && saddr)
1217 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1218#endif
1219 if (matched)
1220 return rt6_ex;
1221 }
1222 return NULL;
1223}
1224
1225static int rt6_insert_exception(struct rt6_info *nrt,
1226 struct rt6_info *ort)
1227{
1228 struct rt6_exception_bucket *bucket;
1229 struct in6_addr *src_key = NULL;
1230 struct rt6_exception *rt6_ex;
1231 int err = 0;
1232
1233 /* ort can't be a cache or pcpu route */
1234 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
1235 ort = (struct rt6_info *)ort->dst.from;
1236 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1237
1238 spin_lock_bh(&rt6_exception_lock);
1239
1240 if (ort->exception_bucket_flushed) {
1241 err = -EINVAL;
1242 goto out;
1243 }
1244
1245 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1246 lockdep_is_held(&rt6_exception_lock));
1247 if (!bucket) {
1248 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1249 GFP_ATOMIC);
1250 if (!bucket) {
1251 err = -ENOMEM;
1252 goto out;
1253 }
1254 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1255 }
1256
1257#ifdef CONFIG_IPV6_SUBTREES
1258 /* rt6i_src.plen != 0 indicates ort is in subtree
1259 * and exception table is indexed by a hash of
1260 * both rt6i_dst and rt6i_src.
1261 * Otherwise, the exception table is indexed by
1262 * a hash of only rt6i_dst.
1263 */
1264 if (ort->rt6i_src.plen)
1265 src_key = &nrt->rt6i_src.addr;
1266#endif
60006a48
WW
1267
1268 /* Update rt6i_prefsrc as it could be changed
1269 * in rt6_remove_prefsrc()
1270 */
1271 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
1272
35732d01
WW
1273 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1274 src_key);
1275 if (rt6_ex)
1276 rt6_remove_exception(bucket, rt6_ex);
1277
1278 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1279 if (!rt6_ex) {
1280 err = -ENOMEM;
1281 goto out;
1282 }
1283 rt6_ex->rt6i = nrt;
1284 rt6_ex->stamp = jiffies;
1285 atomic_inc(&nrt->rt6i_ref);
1286 nrt->rt6i_node = ort->rt6i_node;
1287 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1288 bucket->depth++;
1289
1290 if (bucket->depth > FIB6_MAX_DEPTH)
1291 rt6_exception_remove_oldest(bucket);
1292
1293out:
1294 spin_unlock_bh(&rt6_exception_lock);
1295
1296 /* Update fn->fn_sernum to invalidate all cached dst */
1297 if (!err)
1298 fib6_update_sernum(ort);
1299
1300 return err;
1301}
1302
1303void rt6_flush_exceptions(struct rt6_info *rt)
1304{
1305 struct rt6_exception_bucket *bucket;
1306 struct rt6_exception *rt6_ex;
1307 struct hlist_node *tmp;
1308 int i;
1309
1310 spin_lock_bh(&rt6_exception_lock);
1311 /* Prevent rt6_insert_exception() to recreate the bucket list */
1312 rt->exception_bucket_flushed = 1;
1313
1314 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1315 lockdep_is_held(&rt6_exception_lock));
1316 if (!bucket)
1317 goto out;
1318
1319 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1320 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1321 rt6_remove_exception(bucket, rt6_ex);
1322 WARN_ON_ONCE(bucket->depth);
1323 bucket++;
1324 }
1325
1326out:
1327 spin_unlock_bh(&rt6_exception_lock);
1328}
1329
1330/* Find cached rt in the hash table inside passed in rt
1331 * Caller has to hold rcu_read_lock()
1332 */
1333static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1334 struct in6_addr *daddr,
1335 struct in6_addr *saddr)
1336{
1337 struct rt6_exception_bucket *bucket;
1338 struct in6_addr *src_key = NULL;
1339 struct rt6_exception *rt6_ex;
1340 struct rt6_info *res = NULL;
1341
1342 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1343
1344#ifdef CONFIG_IPV6_SUBTREES
1345 /* rt6i_src.plen != 0 indicates rt is in subtree
1346 * and exception table is indexed by a hash of
1347 * both rt6i_dst and rt6i_src.
1348 * Otherwise, the exception table is indexed by
1349 * a hash of only rt6i_dst.
1350 */
1351 if (rt->rt6i_src.plen)
1352 src_key = saddr;
1353#endif
1354 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1355
1356 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1357 res = rt6_ex->rt6i;
1358
1359 return res;
1360}
1361
1362/* Remove the passed in cached rt from the hash table that contains it */
1363int rt6_remove_exception_rt(struct rt6_info *rt)
1364{
1365 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1366 struct rt6_exception_bucket *bucket;
1367 struct in6_addr *src_key = NULL;
1368 struct rt6_exception *rt6_ex;
1369 int err;
1370
1371 if (!from ||
1372 !(rt->rt6i_flags | RTF_CACHE))
1373 return -EINVAL;
1374
1375 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1376 return -ENOENT;
1377
1378 spin_lock_bh(&rt6_exception_lock);
1379 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1380 lockdep_is_held(&rt6_exception_lock));
1381#ifdef CONFIG_IPV6_SUBTREES
1382 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1383 * and exception table is indexed by a hash of
1384 * both rt6i_dst and rt6i_src.
1385 * Otherwise, the exception table is indexed by
1386 * a hash of only rt6i_dst.
1387 */
1388 if (from->rt6i_src.plen)
1389 src_key = &rt->rt6i_src.addr;
1390#endif
1391 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1392 &rt->rt6i_dst.addr,
1393 src_key);
1394 if (rt6_ex) {
1395 rt6_remove_exception(bucket, rt6_ex);
1396 err = 0;
1397 } else {
1398 err = -ENOENT;
1399 }
1400
1401 spin_unlock_bh(&rt6_exception_lock);
1402 return err;
1403}
1404
1405/* Find rt6_ex which contains the passed in rt cache and
1406 * refresh its stamp
1407 */
1408static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1409{
1410 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1411 struct rt6_exception_bucket *bucket;
1412 struct in6_addr *src_key = NULL;
1413 struct rt6_exception *rt6_ex;
1414
1415 if (!from ||
1416 !(rt->rt6i_flags | RTF_CACHE))
1417 return;
1418
1419 rcu_read_lock();
1420 bucket = rcu_dereference(from->rt6i_exception_bucket);
1421
1422#ifdef CONFIG_IPV6_SUBTREES
1423 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1424 * and exception table is indexed by a hash of
1425 * both rt6i_dst and rt6i_src.
1426 * Otherwise, the exception table is indexed by
1427 * a hash of only rt6i_dst.
1428 */
1429 if (from->rt6i_src.plen)
1430 src_key = &rt->rt6i_src.addr;
1431#endif
1432 rt6_ex = __rt6_find_exception_rcu(&bucket,
1433 &rt->rt6i_dst.addr,
1434 src_key);
1435 if (rt6_ex)
1436 rt6_ex->stamp = jiffies;
1437
1438 rcu_read_unlock();
1439}
1440
60006a48
WW
1441static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1442{
1443 struct rt6_exception_bucket *bucket;
1444 struct rt6_exception *rt6_ex;
1445 int i;
1446
1447 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1448 lockdep_is_held(&rt6_exception_lock));
1449
1450 if (bucket) {
1451 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1452 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1453 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1454 }
1455 bucket++;
1456 }
1457 }
1458}
1459
9ff74384
DA
1460struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1461 int oif, struct flowi6 *fl6, int flags)
1da177e4 1462{
367efcb9 1463 struct fib6_node *fn, *saved_fn;
45e4fd26 1464 struct rt6_info *rt;
c71099ac 1465 int strict = 0;
1da177e4 1466
77d16f45 1467 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1468 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1469 if (net->ipv6.devconf_all->forwarding == 0)
1470 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1471
c71099ac 1472 read_lock_bh(&table->tb6_lock);
1da177e4 1473
4c9483b2 1474 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1475 saved_fn = fn;
1da177e4 1476
ca254490
DA
1477 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1478 oif = 0;
1479
a3c00e46 1480redo_rt6_select:
367efcb9 1481 rt = rt6_select(fn, oif, strict);
52bd4c0c 1482 if (rt->rt6i_nsiblings)
367efcb9 1483 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1484 if (rt == net->ipv6.ip6_null_entry) {
1485 fn = fib6_backtrack(fn, &fl6->saddr);
1486 if (fn)
1487 goto redo_rt6_select;
367efcb9
MKL
1488 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1489 /* also consider unreachable route */
1490 strict &= ~RT6_LOOKUP_F_REACHABLE;
1491 fn = saved_fn;
1492 goto redo_rt6_select;
367efcb9 1493 }
a3c00e46
MKL
1494 }
1495
fb9de91e 1496
3da59bd9 1497 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1498 dst_use(&rt->dst, jiffies);
1499 read_unlock_bh(&table->tb6_lock);
1500
1501 rt6_dst_from_metrics_check(rt);
b811580d
DA
1502
1503 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
d52d3997 1504 return rt;
3da59bd9
MKL
1505 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1506 !(rt->rt6i_flags & RTF_GATEWAY))) {
1507 /* Create a RTF_CACHE clone which will not be
1508 * owned by the fib6 tree. It is for the special case where
1509 * the daddr in the skb during the neighbor look-up is different
1510 * from the fl6->daddr used to look-up route here.
1511 */
1512
1513 struct rt6_info *uncached_rt;
1514
d52d3997
MKL
1515 dst_use(&rt->dst, jiffies);
1516 read_unlock_bh(&table->tb6_lock);
1517
3da59bd9
MKL
1518 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1519 dst_release(&rt->dst);
c71099ac 1520
1cfb71ee
WW
1521 if (uncached_rt) {
1522 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1523 * No need for another dst_hold()
1524 */
8d0b94af 1525 rt6_uncached_list_add(uncached_rt);
1cfb71ee 1526 } else {
3da59bd9 1527 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1528 dst_hold(&uncached_rt->dst);
1529 }
b811580d
DA
1530
1531 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
3da59bd9 1532 return uncached_rt;
3da59bd9 1533
d52d3997
MKL
1534 } else {
1535 /* Get a percpu copy */
1536
1537 struct rt6_info *pcpu_rt;
1538
1539 rt->dst.lastuse = jiffies;
1540 rt->dst.__use++;
1541 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1542
9c7370a1
MKL
1543 if (pcpu_rt) {
1544 read_unlock_bh(&table->tb6_lock);
1545 } else {
1546 /* We have to do the read_unlock first
1547 * because rt6_make_pcpu_route() may trigger
1548 * ip6_dst_gc() which will take the write_lock.
1549 */
1550 dst_hold(&rt->dst);
1551 read_unlock_bh(&table->tb6_lock);
a73e4195 1552 pcpu_rt = rt6_make_pcpu_route(rt);
9c7370a1
MKL
1553 dst_release(&rt->dst);
1554 }
d52d3997 1555
b811580d 1556 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
d52d3997 1557 return pcpu_rt;
9c7370a1 1558
d52d3997 1559 }
1da177e4 1560}
9ff74384 1561EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1562
8ed67789 1563static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1564 struct flowi6 *fl6, int flags)
4acad72d 1565{
4c9483b2 1566 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1567}
1568
d409b847
MB
1569struct dst_entry *ip6_route_input_lookup(struct net *net,
1570 struct net_device *dev,
1571 struct flowi6 *fl6, int flags)
72331bc0
SL
1572{
1573 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1574 flags |= RT6_LOOKUP_F_IFACE;
1575
1576 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1577}
d409b847 1578EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1579
23aebdac
JS
1580static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1581 struct flow_keys *keys)
1582{
1583 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1584 const struct ipv6hdr *key_iph = outer_iph;
1585 const struct ipv6hdr *inner_iph;
1586 const struct icmp6hdr *icmph;
1587 struct ipv6hdr _inner_iph;
1588
1589 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1590 goto out;
1591
1592 icmph = icmp6_hdr(skb);
1593 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1594 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1595 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1596 icmph->icmp6_type != ICMPV6_PARAMPROB)
1597 goto out;
1598
1599 inner_iph = skb_header_pointer(skb,
1600 skb_transport_offset(skb) + sizeof(*icmph),
1601 sizeof(_inner_iph), &_inner_iph);
1602 if (!inner_iph)
1603 goto out;
1604
1605 key_iph = inner_iph;
1606out:
1607 memset(keys, 0, sizeof(*keys));
1608 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1609 keys->addrs.v6addrs.src = key_iph->saddr;
1610 keys->addrs.v6addrs.dst = key_iph->daddr;
1611 keys->tags.flow_label = ip6_flowinfo(key_iph);
1612 keys->basic.ip_proto = key_iph->nexthdr;
1613}
1614
1615/* if skb is set it will be used and fl6 can be NULL */
1616u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1617{
1618 struct flow_keys hash_keys;
1619
1620 if (skb) {
1621 ip6_multipath_l3_keys(skb, &hash_keys);
1622 return flow_hash_from_keys(&hash_keys);
1623 }
1624
1625 return get_hash_from_flowi6(fl6);
1626}
1627
c71099ac
TG
1628void ip6_route_input(struct sk_buff *skb)
1629{
b71d1d42 1630 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1631 struct net *net = dev_net(skb->dev);
adaa70bb 1632 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1633 struct ip_tunnel_info *tun_info;
4c9483b2 1634 struct flowi6 fl6 = {
e0d56fdd 1635 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
1636 .daddr = iph->daddr,
1637 .saddr = iph->saddr,
6502ca52 1638 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1639 .flowi6_mark = skb->mark,
1640 .flowi6_proto = iph->nexthdr,
c71099ac 1641 };
adaa70bb 1642
904af04d 1643 tun_info = skb_tunnel_info(skb);
46fa062a 1644 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1645 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
23aebdac
JS
1646 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1647 fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
06e9d040 1648 skb_dst_drop(skb);
72331bc0 1649 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1650}
1651
8ed67789 1652static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1653 struct flowi6 *fl6, int flags)
1da177e4 1654{
4c9483b2 1655 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1656}
1657
6f21c96a
PA
1658struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1659 struct flowi6 *fl6, int flags)
c71099ac 1660{
d46a9d67 1661 bool any_src;
c71099ac 1662
4c1feac5
DA
1663 if (rt6_need_strict(&fl6->daddr)) {
1664 struct dst_entry *dst;
1665
1666 dst = l3mdev_link_scope_lookup(net, fl6);
1667 if (dst)
1668 return dst;
1669 }
ca254490 1670
1fb9489b 1671 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1672
d46a9d67 1673 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1674 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1675 (fl6->flowi6_oif && any_src))
77d16f45 1676 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1677
d46a9d67 1678 if (!any_src)
adaa70bb 1679 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1680 else if (sk)
1681 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1682
4c9483b2 1683 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1684}
6f21c96a 1685EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1686
2774c131 1687struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1688{
5c1e6aa3 1689 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 1690 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
1691 struct dst_entry *new = NULL;
1692
1dbe3252 1693 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
b2a9c0ed 1694 DST_OBSOLETE_NONE, 0);
14e50e57 1695 if (rt) {
0a1f5962 1696 rt6_info_init(rt);
8104891b 1697
0a1f5962 1698 new = &rt->dst;
14e50e57 1699 new->__use = 1;
352e512c 1700 new->input = dst_discard;
ede2059d 1701 new->output = dst_discard_out;
14e50e57 1702
0a1f5962 1703 dst_copy_metrics(new, &ort->dst);
14e50e57 1704
1dbe3252 1705 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 1706 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1707 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1708 rt->rt6i_metric = 0;
1709
1710 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1711#ifdef CONFIG_IPV6_SUBTREES
1712 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1713#endif
14e50e57
DM
1714 }
1715
69ead7af
DM
1716 dst_release(dst_orig);
1717 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1718}
14e50e57 1719
1da177e4
LT
1720/*
1721 * Destination cache support functions
1722 */
1723
4b32b5ad
MKL
1724static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1725{
1726 if (rt->dst.from &&
1727 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1728 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1729}
1730
3da59bd9
MKL
1731static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1732{
36143645 1733 u32 rt_cookie = 0;
c5cff856
WW
1734
1735 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
3da59bd9
MKL
1736 return NULL;
1737
1738 if (rt6_check_expired(rt))
1739 return NULL;
1740
1741 return &rt->dst;
1742}
1743
1744static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1745{
5973fb1e
MKL
1746 if (!__rt6_check_expired(rt) &&
1747 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3da59bd9
MKL
1748 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1749 return &rt->dst;
1750 else
1751 return NULL;
1752}
1753
1da177e4
LT
1754static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1755{
1756 struct rt6_info *rt;
1757
1758 rt = (struct rt6_info *) dst;
1759
6f3118b5
ND
1760 /* All IPV6 dsts are created with ->obsolete set to the value
1761 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1762 * into this function always.
1763 */
e3bc10bd 1764
4b32b5ad
MKL
1765 rt6_dst_from_metrics_check(rt);
1766
02bcf4e0 1767 if (rt->rt6i_flags & RTF_PCPU ||
a4c2fd7f 1768 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
3da59bd9
MKL
1769 return rt6_dst_from_check(rt, cookie);
1770 else
1771 return rt6_check(rt, cookie);
1da177e4
LT
1772}
1773
1774static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1775{
1776 struct rt6_info *rt = (struct rt6_info *) dst;
1777
1778 if (rt) {
54c1a859
YH
1779 if (rt->rt6i_flags & RTF_CACHE) {
1780 if (rt6_check_expired(rt)) {
1781 ip6_del_rt(rt);
1782 dst = NULL;
1783 }
1784 } else {
1da177e4 1785 dst_release(dst);
54c1a859
YH
1786 dst = NULL;
1787 }
1da177e4 1788 }
54c1a859 1789 return dst;
1da177e4
LT
1790}
1791
1792static void ip6_link_failure(struct sk_buff *skb)
1793{
1794 struct rt6_info *rt;
1795
3ffe533c 1796 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1797
adf30907 1798 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1799 if (rt) {
1eb4f758 1800 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0
WW
1801 if (dst_hold_safe(&rt->dst))
1802 ip6_del_rt(rt);
c5cff856
WW
1803 } else {
1804 struct fib6_node *fn;
1805
1806 rcu_read_lock();
1807 fn = rcu_dereference(rt->rt6i_node);
1808 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
1809 fn->fn_sernum = -1;
1810 rcu_read_unlock();
1eb4f758 1811 }
1da177e4
LT
1812 }
1813}
1814
45e4fd26
MKL
1815static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1816{
1817 struct net *net = dev_net(rt->dst.dev);
1818
1819 rt->rt6i_flags |= RTF_MODIFIED;
1820 rt->rt6i_pmtu = mtu;
1821 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1822}
1823
0d3f6d29
MKL
1824static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1825{
1826 return !(rt->rt6i_flags & RTF_CACHE) &&
4e587ea7
WW
1827 (rt->rt6i_flags & RTF_PCPU ||
1828 rcu_access_pointer(rt->rt6i_node));
0d3f6d29
MKL
1829}
1830
45e4fd26
MKL
1831static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1832 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1833{
0dec879f 1834 const struct in6_addr *daddr, *saddr;
67ba4152 1835 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1836
45e4fd26
MKL
1837 if (rt6->rt6i_flags & RTF_LOCAL)
1838 return;
81aded24 1839
19bda36c
XL
1840 if (dst_metric_locked(dst, RTAX_MTU))
1841 return;
1842
0dec879f
JA
1843 if (iph) {
1844 daddr = &iph->daddr;
1845 saddr = &iph->saddr;
1846 } else if (sk) {
1847 daddr = &sk->sk_v6_daddr;
1848 saddr = &inet6_sk(sk)->saddr;
1849 } else {
1850 daddr = NULL;
1851 saddr = NULL;
1852 }
1853 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
1854 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1855 if (mtu >= dst_mtu(dst))
1856 return;
9d289715 1857
0d3f6d29 1858 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 1859 rt6_do_update_pmtu(rt6, mtu);
0dec879f 1860 } else if (daddr) {
45e4fd26
MKL
1861 struct rt6_info *nrt6;
1862
45e4fd26
MKL
1863 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1864 if (nrt6) {
1865 rt6_do_update_pmtu(nrt6, mtu);
1866
1867 /* ip6_ins_rt(nrt6) will bump the
1868 * rt6->rt6i_node->fn_sernum
1869 * which will fail the next rt6_check() and
1870 * invalidate the sk->sk_dst_cache.
1871 */
1872 ip6_ins_rt(nrt6);
1cfb71ee
WW
1873 /* Release the reference taken in
1874 * ip6_rt_cache_alloc()
1875 */
1876 dst_release(&nrt6->dst);
45e4fd26 1877 }
1da177e4
LT
1878 }
1879}
1880
45e4fd26
MKL
1881static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1882 struct sk_buff *skb, u32 mtu)
1883{
1884 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1885}
1886
42ae66c8 1887void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 1888 int oif, u32 mark, kuid_t uid)
81aded24
DM
1889{
1890 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1891 struct dst_entry *dst;
1892 struct flowi6 fl6;
1893
1894 memset(&fl6, 0, sizeof(fl6));
1895 fl6.flowi6_oif = oif;
1b3c61dc 1896 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1897 fl6.daddr = iph->daddr;
1898 fl6.saddr = iph->saddr;
6502ca52 1899 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 1900 fl6.flowi6_uid = uid;
81aded24
DM
1901
1902 dst = ip6_route_output(net, NULL, &fl6);
1903 if (!dst->error)
45e4fd26 1904 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1905 dst_release(dst);
1906}
1907EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1908
1909void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1910{
33c162a9
MKL
1911 struct dst_entry *dst;
1912
81aded24 1913 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 1914 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
1915
1916 dst = __sk_dst_get(sk);
1917 if (!dst || !dst->obsolete ||
1918 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1919 return;
1920
1921 bh_lock_sock(sk);
1922 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1923 ip6_datagram_dst_update(sk, false);
1924 bh_unlock_sock(sk);
81aded24
DM
1925}
1926EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1927
b55b76b2
DJ
1928/* Handle redirects */
1929struct ip6rd_flowi {
1930 struct flowi6 fl6;
1931 struct in6_addr gateway;
1932};
1933
1934static struct rt6_info *__ip6_route_redirect(struct net *net,
1935 struct fib6_table *table,
1936 struct flowi6 *fl6,
1937 int flags)
1938{
1939 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1940 struct rt6_info *rt;
1941 struct fib6_node *fn;
1942
1943 /* Get the "current" route for this destination and
67c408cf 1944 * check if the redirect has come from appropriate router.
b55b76b2
DJ
1945 *
1946 * RFC 4861 specifies that redirects should only be
1947 * accepted if they come from the nexthop to the target.
1948 * Due to the way the routes are chosen, this notion
1949 * is a bit fuzzy and one might need to check all possible
1950 * routes.
1951 */
1952
1953 read_lock_bh(&table->tb6_lock);
1954 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1955restart:
1956 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1957 if (rt6_check_expired(rt))
1958 continue;
1959 if (rt->dst.error)
1960 break;
1961 if (!(rt->rt6i_flags & RTF_GATEWAY))
1962 continue;
1963 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1964 continue;
1965 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1966 continue;
1967 break;
1968 }
1969
1970 if (!rt)
1971 rt = net->ipv6.ip6_null_entry;
1972 else if (rt->dst.error) {
1973 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1974 goto out;
1975 }
1976
1977 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1978 fn = fib6_backtrack(fn, &fl6->saddr);
1979 if (fn)
1980 goto restart;
b55b76b2 1981 }
a3c00e46 1982
b0a1ba59 1983out:
b55b76b2
DJ
1984 dst_hold(&rt->dst);
1985
1986 read_unlock_bh(&table->tb6_lock);
1987
b811580d 1988 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
b55b76b2
DJ
1989 return rt;
1990};
1991
1992static struct dst_entry *ip6_route_redirect(struct net *net,
1993 const struct flowi6 *fl6,
1994 const struct in6_addr *gateway)
1995{
1996 int flags = RT6_LOOKUP_F_HAS_SADDR;
1997 struct ip6rd_flowi rdfl;
1998
1999 rdfl.fl6 = *fl6;
2000 rdfl.gateway = *gateway;
2001
2002 return fib6_rule_lookup(net, &rdfl.fl6,
2003 flags, __ip6_route_redirect);
2004}
2005
e2d118a1
LC
2006void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2007 kuid_t uid)
3a5ad2ee
DM
2008{
2009 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2010 struct dst_entry *dst;
2011 struct flowi6 fl6;
2012
2013 memset(&fl6, 0, sizeof(fl6));
e374c618 2014 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2015 fl6.flowi6_oif = oif;
2016 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2017 fl6.daddr = iph->daddr;
2018 fl6.saddr = iph->saddr;
6502ca52 2019 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2020 fl6.flowi6_uid = uid;
3a5ad2ee 2021
b55b76b2
DJ
2022 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
2023 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2024 dst_release(dst);
2025}
2026EXPORT_SYMBOL_GPL(ip6_redirect);
2027
c92a59ec
DJ
2028void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2029 u32 mark)
2030{
2031 const struct ipv6hdr *iph = ipv6_hdr(skb);
2032 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2033 struct dst_entry *dst;
2034 struct flowi6 fl6;
2035
2036 memset(&fl6, 0, sizeof(fl6));
e374c618 2037 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2038 fl6.flowi6_oif = oif;
2039 fl6.flowi6_mark = mark;
c92a59ec
DJ
2040 fl6.daddr = msg->dest;
2041 fl6.saddr = iph->daddr;
e2d118a1 2042 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2043
b55b76b2
DJ
2044 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
2045 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2046 dst_release(dst);
2047}
2048
3a5ad2ee
DM
2049void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2050{
e2d118a1
LC
2051 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2052 sk->sk_uid);
3a5ad2ee
DM
2053}
2054EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2055
0dbaee3b 2056static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2057{
0dbaee3b
DM
2058 struct net_device *dev = dst->dev;
2059 unsigned int mtu = dst_mtu(dst);
2060 struct net *net = dev_net(dev);
2061
1da177e4
LT
2062 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2063
5578689a
DL
2064 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2065 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2066
2067 /*
1ab1457c
YH
2068 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2069 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2070 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2071 * rely only on pmtu discovery"
2072 */
2073 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2074 mtu = IPV6_MAXPLEN;
2075 return mtu;
2076}
2077
ebb762f2 2078static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2079{
4b32b5ad
MKL
2080 const struct rt6_info *rt = (const struct rt6_info *)dst;
2081 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 2082 struct inet6_dev *idev;
618f9bc7 2083
4b32b5ad
MKL
2084 if (mtu)
2085 goto out;
2086
2087 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2088 if (mtu)
30f78d8e 2089 goto out;
618f9bc7
SK
2090
2091 mtu = IPV6_MIN_MTU;
d33e4553
DM
2092
2093 rcu_read_lock();
2094 idev = __in6_dev_get(dst->dev);
2095 if (idev)
2096 mtu = idev->cnf.mtu6;
2097 rcu_read_unlock();
2098
30f78d8e 2099out:
14972cbd
RP
2100 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2101
2102 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2103}
2104
3b00944c 2105struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2106 struct flowi6 *fl6)
1da177e4 2107{
87a11578 2108 struct dst_entry *dst;
1da177e4
LT
2109 struct rt6_info *rt;
2110 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2111 struct net *net = dev_net(dev);
1da177e4 2112
38308473 2113 if (unlikely(!idev))
122bdf67 2114 return ERR_PTR(-ENODEV);
1da177e4 2115
ad706862 2116 rt = ip6_dst_alloc(net, dev, 0);
38308473 2117 if (unlikely(!rt)) {
1da177e4 2118 in6_dev_put(idev);
87a11578 2119 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2120 goto out;
2121 }
2122
8e2ec639
YZ
2123 rt->dst.flags |= DST_HOST;
2124 rt->dst.output = ip6_output;
550bab42 2125 rt->rt6i_gateway = fl6->daddr;
87a11578 2126 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2127 rt->rt6i_dst.plen = 128;
2128 rt->rt6i_idev = idev;
14edd87d 2129 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2130
587fea74
WW
2131 /* Add this dst into uncached_list so that rt6_ifdown() can
2132 * do proper release of the net_device
2133 */
2134 rt6_uncached_list_add(rt);
1da177e4 2135
87a11578
DM
2136 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2137
1da177e4 2138out:
87a11578 2139 return dst;
1da177e4
LT
2140}
2141
569d3645 2142static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2143{
86393e52 2144 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2145 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2146 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2147 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2148 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2149 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2150 int entries;
7019b78e 2151
fc66f95c 2152 entries = dst_entries_get_fast(ops);
49a18d86 2153 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2154 entries <= rt_max_size)
1da177e4
LT
2155 goto out;
2156
6891a346 2157 net->ipv6.ip6_rt_gc_expire++;
14956643 2158 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2159 entries = dst_entries_get_slow(ops);
2160 if (entries < ops->gc_thresh)
7019b78e 2161 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2162out:
7019b78e 2163 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2164 return entries > rt_max_size;
1da177e4
LT
2165}
2166
e715b6d3
FW
2167static int ip6_convert_metrics(struct mx6_config *mxc,
2168 const struct fib6_config *cfg)
2169{
c3a8d947 2170 bool ecn_ca = false;
e715b6d3
FW
2171 struct nlattr *nla;
2172 int remaining;
2173 u32 *mp;
2174
63159f29 2175 if (!cfg->fc_mx)
e715b6d3
FW
2176 return 0;
2177
2178 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
2179 if (unlikely(!mp))
2180 return -ENOMEM;
2181
2182 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
2183 int type = nla_type(nla);
1bb14807 2184 u32 val;
e715b6d3 2185
1bb14807
DB
2186 if (!type)
2187 continue;
2188 if (unlikely(type > RTAX_MAX))
2189 goto err;
ea697639 2190
1bb14807
DB
2191 if (type == RTAX_CC_ALGO) {
2192 char tmp[TCP_CA_NAME_MAX];
e715b6d3 2193
1bb14807 2194 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 2195 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
2196 if (val == TCP_CA_UNSPEC)
2197 goto err;
2198 } else {
2199 val = nla_get_u32(nla);
e715b6d3 2200 }
626abd59
PA
2201 if (type == RTAX_HOPLIMIT && val > 255)
2202 val = 255;
b8d3e416
DB
2203 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
2204 goto err;
1bb14807
DB
2205
2206 mp[type - 1] = val;
2207 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
2208 }
2209
c3a8d947
DB
2210 if (ecn_ca) {
2211 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
2212 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
2213 }
e715b6d3 2214
c3a8d947 2215 mxc->mx = mp;
e715b6d3
FW
2216 return 0;
2217 err:
2218 kfree(mp);
2219 return -EINVAL;
2220}
1da177e4 2221
8c14586f
DA
2222static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2223 struct fib6_config *cfg,
2224 const struct in6_addr *gw_addr)
2225{
2226 struct flowi6 fl6 = {
2227 .flowi6_oif = cfg->fc_ifindex,
2228 .daddr = *gw_addr,
2229 .saddr = cfg->fc_prefsrc,
2230 };
2231 struct fib6_table *table;
2232 struct rt6_info *rt;
d5d32e4b 2233 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
8c14586f
DA
2234
2235 table = fib6_get_table(net, cfg->fc_table);
2236 if (!table)
2237 return NULL;
2238
2239 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2240 flags |= RT6_LOOKUP_F_HAS_SADDR;
2241
2242 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
2243
2244 /* if table lookup failed, fall back to full lookup */
2245 if (rt == net->ipv6.ip6_null_entry) {
2246 ip6_rt_put(rt);
2247 rt = NULL;
2248 }
2249
2250 return rt;
2251}
2252
333c4301
DA
2253static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2254 struct netlink_ext_ack *extack)
1da177e4 2255{
5578689a 2256 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
2257 struct rt6_info *rt = NULL;
2258 struct net_device *dev = NULL;
2259 struct inet6_dev *idev = NULL;
c71099ac 2260 struct fib6_table *table;
1da177e4 2261 int addr_type;
8c5b83f0 2262 int err = -EINVAL;
1da177e4 2263
557c44be 2264 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2265 if (cfg->fc_flags & RTF_PCPU) {
2266 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2267 goto out;
d5d531cb 2268 }
557c44be 2269
d5d531cb
DA
2270 if (cfg->fc_dst_len > 128) {
2271 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2272 goto out;
2273 }
2274 if (cfg->fc_src_len > 128) {
2275 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2276 goto out;
d5d531cb 2277 }
1da177e4 2278#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2279 if (cfg->fc_src_len) {
2280 NL_SET_ERR_MSG(extack,
2281 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2282 goto out;
d5d531cb 2283 }
1da177e4 2284#endif
86872cb5 2285 if (cfg->fc_ifindex) {
1da177e4 2286 err = -ENODEV;
5578689a 2287 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2288 if (!dev)
2289 goto out;
2290 idev = in6_dev_get(dev);
2291 if (!idev)
2292 goto out;
2293 }
2294
86872cb5
TG
2295 if (cfg->fc_metric == 0)
2296 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2297
d71314b4 2298 err = -ENOBUFS;
38308473
DM
2299 if (cfg->fc_nlinfo.nlh &&
2300 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2301 table = fib6_get_table(net, cfg->fc_table);
38308473 2302 if (!table) {
f3213831 2303 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2304 table = fib6_new_table(net, cfg->fc_table);
2305 }
2306 } else {
2307 table = fib6_new_table(net, cfg->fc_table);
2308 }
38308473
DM
2309
2310 if (!table)
c71099ac 2311 goto out;
c71099ac 2312
ad706862
MKL
2313 rt = ip6_dst_alloc(net, NULL,
2314 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 2315
38308473 2316 if (!rt) {
1da177e4
LT
2317 err = -ENOMEM;
2318 goto out;
2319 }
2320
1716a961
G
2321 if (cfg->fc_flags & RTF_EXPIRES)
2322 rt6_set_expires(rt, jiffies +
2323 clock_t_to_jiffies(cfg->fc_expires));
2324 else
2325 rt6_clean_expires(rt);
1da177e4 2326
86872cb5
TG
2327 if (cfg->fc_protocol == RTPROT_UNSPEC)
2328 cfg->fc_protocol = RTPROT_BOOT;
2329 rt->rt6i_protocol = cfg->fc_protocol;
2330
2331 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
2332
2333 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 2334 rt->dst.input = ip6_mc_input;
ab79ad14
2335 else if (cfg->fc_flags & RTF_LOCAL)
2336 rt->dst.input = ip6_input;
1da177e4 2337 else
d8d1f30b 2338 rt->dst.input = ip6_forward;
1da177e4 2339
d8d1f30b 2340 rt->dst.output = ip6_output;
1da177e4 2341
19e42e45
RP
2342 if (cfg->fc_encap) {
2343 struct lwtunnel_state *lwtstate;
2344
30357d7d 2345 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2346 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2347 &lwtstate, extack);
19e42e45
RP
2348 if (err)
2349 goto out;
61adedf3
JB
2350 rt->dst.lwtstate = lwtstate_get(lwtstate);
2351 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
2352 rt->dst.lwtstate->orig_output = rt->dst.output;
2353 rt->dst.output = lwtunnel_output;
25368623 2354 }
61adedf3
JB
2355 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
2356 rt->dst.lwtstate->orig_input = rt->dst.input;
2357 rt->dst.input = lwtunnel_input;
25368623 2358 }
19e42e45
RP
2359 }
2360
86872cb5
TG
2361 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2362 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 2363 if (rt->rt6i_dst.plen == 128)
e5fd387a 2364 rt->dst.flags |= DST_HOST;
e5fd387a 2365
1da177e4 2366#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
2367 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2368 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
2369#endif
2370
86872cb5 2371 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
2372
2373 /* We cannot add true routes via loopback here,
2374 they would result in kernel looping; promote them to reject routes
2375 */
86872cb5 2376 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
2377 (dev && (dev->flags & IFF_LOOPBACK) &&
2378 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2379 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 2380 /* hold loopback dev/idev if we haven't done so. */
5578689a 2381 if (dev != net->loopback_dev) {
1da177e4
LT
2382 if (dev) {
2383 dev_put(dev);
2384 in6_dev_put(idev);
2385 }
5578689a 2386 dev = net->loopback_dev;
1da177e4
LT
2387 dev_hold(dev);
2388 idev = in6_dev_get(dev);
2389 if (!idev) {
2390 err = -ENODEV;
2391 goto out;
2392 }
2393 }
1da177e4 2394 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
2395 switch (cfg->fc_type) {
2396 case RTN_BLACKHOLE:
2397 rt->dst.error = -EINVAL;
ede2059d 2398 rt->dst.output = dst_discard_out;
7150aede 2399 rt->dst.input = dst_discard;
ef2c7d7b
ND
2400 break;
2401 case RTN_PROHIBIT:
2402 rt->dst.error = -EACCES;
7150aede
K
2403 rt->dst.output = ip6_pkt_prohibit_out;
2404 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 2405 break;
b4949ab2 2406 case RTN_THROW:
0315e382 2407 case RTN_UNREACHABLE:
ef2c7d7b 2408 default:
7150aede 2409 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
2410 : (cfg->fc_type == RTN_UNREACHABLE)
2411 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
2412 rt->dst.output = ip6_pkt_discard_out;
2413 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
2414 break;
2415 }
1da177e4
LT
2416 goto install_route;
2417 }
2418
86872cb5 2419 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 2420 const struct in6_addr *gw_addr;
1da177e4
LT
2421 int gwa_type;
2422
86872cb5 2423 gw_addr = &cfg->fc_gateway;
330567b7 2424 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
2425
2426 /* if gw_addr is local we will fail to detect this in case
2427 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2428 * will return already-added prefix route via interface that
2429 * prefix route was assigned to, which might be non-loopback.
2430 */
2431 err = -EINVAL;
330567b7
FW
2432 if (ipv6_chk_addr_and_flags(net, gw_addr,
2433 gwa_type & IPV6_ADDR_LINKLOCAL ?
d5d531cb
DA
2434 dev : NULL, 0, 0)) {
2435 NL_SET_ERR_MSG(extack, "Invalid gateway address");
48ed7b26 2436 goto out;
d5d531cb 2437 }
4e3fd7a0 2438 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
2439
2440 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
8c14586f 2441 struct rt6_info *grt = NULL;
1da177e4
LT
2442
2443 /* IPv6 strictly inhibits using not link-local
2444 addresses as nexthop address.
2445 Otherwise, router will not able to send redirects.
2446 It is very good, but in some (rare!) circumstances
2447 (SIT, PtP, NBMA NOARP links) it is handy to allow
2448 some exceptions. --ANK
96d5822c
EN
2449 We allow IPv4-mapped nexthops to support RFC4798-type
2450 addressing
1da177e4 2451 */
96d5822c 2452 if (!(gwa_type & (IPV6_ADDR_UNICAST |
d5d531cb
DA
2453 IPV6_ADDR_MAPPED))) {
2454 NL_SET_ERR_MSG(extack,
2455 "Invalid gateway address");
1da177e4 2456 goto out;
d5d531cb 2457 }
1da177e4 2458
a435a07f 2459 if (cfg->fc_table) {
8c14586f
DA
2460 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2461
a435a07f
VB
2462 if (grt) {
2463 if (grt->rt6i_flags & RTF_GATEWAY ||
2464 (dev && dev != grt->dst.dev)) {
2465 ip6_rt_put(grt);
2466 grt = NULL;
2467 }
2468 }
2469 }
2470
8c14586f
DA
2471 if (!grt)
2472 grt = rt6_lookup(net, gw_addr, NULL,
2473 cfg->fc_ifindex, 1);
1da177e4
LT
2474
2475 err = -EHOSTUNREACH;
38308473 2476 if (!grt)
1da177e4
LT
2477 goto out;
2478 if (dev) {
d1918542 2479 if (dev != grt->dst.dev) {
94e187c0 2480 ip6_rt_put(grt);
1da177e4
LT
2481 goto out;
2482 }
2483 } else {
d1918542 2484 dev = grt->dst.dev;
1da177e4
LT
2485 idev = grt->rt6i_idev;
2486 dev_hold(dev);
2487 in6_dev_hold(grt->rt6i_idev);
2488 }
38308473 2489 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 2490 err = 0;
94e187c0 2491 ip6_rt_put(grt);
1da177e4
LT
2492
2493 if (err)
2494 goto out;
2495 }
2496 err = -EINVAL;
d5d531cb
DA
2497 if (!dev) {
2498 NL_SET_ERR_MSG(extack, "Egress device not specified");
2499 goto out;
2500 } else if (dev->flags & IFF_LOOPBACK) {
2501 NL_SET_ERR_MSG(extack,
2502 "Egress device can not be loopback device for this route");
1da177e4 2503 goto out;
d5d531cb 2504 }
1da177e4
LT
2505 }
2506
2507 err = -ENODEV;
38308473 2508 if (!dev)
1da177e4
LT
2509 goto out;
2510
c3968a85
DW
2511 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2512 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 2513 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
2514 err = -EINVAL;
2515 goto out;
2516 }
4e3fd7a0 2517 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2518 rt->rt6i_prefsrc.plen = 128;
2519 } else
2520 rt->rt6i_prefsrc.plen = 0;
2521
86872cb5 2522 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2523
2524install_route:
d8d1f30b 2525 rt->dst.dev = dev;
1da177e4 2526 rt->rt6i_idev = idev;
c71099ac 2527 rt->rt6i_table = table;
63152fc0 2528
c346dca1 2529 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2530
8c5b83f0 2531 return rt;
6b9ea5a6
RP
2532out:
2533 if (dev)
2534 dev_put(dev);
2535 if (idev)
2536 in6_dev_put(idev);
587fea74
WW
2537 if (rt)
2538 dst_release_immediate(&rt->dst);
6b9ea5a6 2539
8c5b83f0 2540 return ERR_PTR(err);
6b9ea5a6
RP
2541}
2542
333c4301
DA
2543int ip6_route_add(struct fib6_config *cfg,
2544 struct netlink_ext_ack *extack)
6b9ea5a6
RP
2545{
2546 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2547 struct rt6_info *rt;
6b9ea5a6
RP
2548 int err;
2549
333c4301 2550 rt = ip6_route_info_create(cfg, extack);
8c5b83f0
RP
2551 if (IS_ERR(rt)) {
2552 err = PTR_ERR(rt);
2553 rt = NULL;
6b9ea5a6 2554 goto out;
8c5b83f0 2555 }
6b9ea5a6 2556
e715b6d3
FW
2557 err = ip6_convert_metrics(&mxc, cfg);
2558 if (err)
2559 goto out;
1da177e4 2560
333c4301 2561 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
e715b6d3
FW
2562
2563 kfree(mxc.mx);
6b9ea5a6 2564
e715b6d3 2565 return err;
1da177e4 2566out:
587fea74
WW
2567 if (rt)
2568 dst_release_immediate(&rt->dst);
6b9ea5a6 2569
1da177e4
LT
2570 return err;
2571}
2572
86872cb5 2573static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2574{
2575 int err;
c71099ac 2576 struct fib6_table *table;
d1918542 2577 struct net *net = dev_net(rt->dst.dev);
1da177e4 2578
a4c2fd7f 2579 if (rt == net->ipv6.ip6_null_entry) {
6825a26c
G
2580 err = -ENOENT;
2581 goto out;
2582 }
6c813a72 2583
c71099ac
TG
2584 table = rt->rt6i_table;
2585 write_lock_bh(&table->tb6_lock);
86872cb5 2586 err = fib6_del(rt, info);
c71099ac 2587 write_unlock_bh(&table->tb6_lock);
1da177e4 2588
6825a26c 2589out:
94e187c0 2590 ip6_rt_put(rt);
1da177e4
LT
2591 return err;
2592}
2593
e0a1ad73
TG
2594int ip6_del_rt(struct rt6_info *rt)
2595{
4d1169c1 2596 struct nl_info info = {
d1918542 2597 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2598 };
528c4ceb 2599 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2600}
2601
0ae81335
DA
2602static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2603{
2604 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 2605 struct net *net = info->nl_net;
16a16cd3 2606 struct sk_buff *skb = NULL;
0ae81335 2607 struct fib6_table *table;
e3330039 2608 int err = -ENOENT;
0ae81335 2609
e3330039
WC
2610 if (rt == net->ipv6.ip6_null_entry)
2611 goto out_put;
0ae81335
DA
2612 table = rt->rt6i_table;
2613 write_lock_bh(&table->tb6_lock);
2614
2615 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2616 struct rt6_info *sibling, *next_sibling;
2617
16a16cd3
DA
2618 /* prefer to send a single notification with all hops */
2619 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2620 if (skb) {
2621 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2622
e3330039 2623 if (rt6_fill_node(net, skb, rt,
16a16cd3
DA
2624 NULL, NULL, 0, RTM_DELROUTE,
2625 info->portid, seq, 0) < 0) {
2626 kfree_skb(skb);
2627 skb = NULL;
2628 } else
2629 info->skip_notify = 1;
2630 }
2631
0ae81335
DA
2632 list_for_each_entry_safe(sibling, next_sibling,
2633 &rt->rt6i_siblings,
2634 rt6i_siblings) {
2635 err = fib6_del(sibling, info);
2636 if (err)
e3330039 2637 goto out_unlock;
0ae81335
DA
2638 }
2639 }
2640
2641 err = fib6_del(rt, info);
e3330039 2642out_unlock:
0ae81335 2643 write_unlock_bh(&table->tb6_lock);
e3330039 2644out_put:
0ae81335 2645 ip6_rt_put(rt);
16a16cd3
DA
2646
2647 if (skb) {
e3330039 2648 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
2649 info->nlh, gfp_any());
2650 }
0ae81335
DA
2651 return err;
2652}
2653
333c4301
DA
2654static int ip6_route_del(struct fib6_config *cfg,
2655 struct netlink_ext_ack *extack)
1da177e4 2656{
c71099ac 2657 struct fib6_table *table;
1da177e4
LT
2658 struct fib6_node *fn;
2659 struct rt6_info *rt;
2660 int err = -ESRCH;
2661
5578689a 2662 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
2663 if (!table) {
2664 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 2665 return err;
d5d531cb 2666 }
c71099ac
TG
2667
2668 read_lock_bh(&table->tb6_lock);
1da177e4 2669
c71099ac 2670 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
2671 &cfg->fc_dst, cfg->fc_dst_len,
2672 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 2673
1da177e4 2674 if (fn) {
d8d1f30b 2675 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2676 if ((rt->rt6i_flags & RTF_CACHE) &&
2677 !(cfg->fc_flags & RTF_CACHE))
2678 continue;
86872cb5 2679 if (cfg->fc_ifindex &&
d1918542
DM
2680 (!rt->dst.dev ||
2681 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2682 continue;
86872cb5
TG
2683 if (cfg->fc_flags & RTF_GATEWAY &&
2684 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2685 continue;
86872cb5 2686 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2687 continue;
c2ed1880
M
2688 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2689 continue;
d8d1f30b 2690 dst_hold(&rt->dst);
c71099ac 2691 read_unlock_bh(&table->tb6_lock);
1da177e4 2692
0ae81335
DA
2693 /* if gateway was specified only delete the one hop */
2694 if (cfg->fc_flags & RTF_GATEWAY)
2695 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2696
2697 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
2698 }
2699 }
c71099ac 2700 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2701
2702 return err;
2703}
2704
6700c270 2705static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2706{
a6279458 2707 struct netevent_redirect netevent;
e8599ff4 2708 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2709 struct ndisc_options ndopts;
2710 struct inet6_dev *in6_dev;
2711 struct neighbour *neigh;
71bcdba0 2712 struct rd_msg *msg;
6e157b6a
DM
2713 int optlen, on_link;
2714 u8 *lladdr;
e8599ff4 2715
29a3cad5 2716 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2717 optlen -= sizeof(*msg);
e8599ff4
DM
2718
2719 if (optlen < 0) {
6e157b6a 2720 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2721 return;
2722 }
2723
71bcdba0 2724 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2725
71bcdba0 2726 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2727 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2728 return;
2729 }
2730
6e157b6a 2731 on_link = 0;
71bcdba0 2732 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2733 on_link = 1;
71bcdba0 2734 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2735 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2736 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2737 return;
2738 }
2739
2740 in6_dev = __in6_dev_get(skb->dev);
2741 if (!in6_dev)
2742 return;
2743 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2744 return;
2745
2746 /* RFC2461 8.1:
2747 * The IP source address of the Redirect MUST be the same as the current
2748 * first-hop router for the specified ICMP Destination Address.
2749 */
2750
f997c55c 2751 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2752 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2753 return;
2754 }
6e157b6a
DM
2755
2756 lladdr = NULL;
e8599ff4
DM
2757 if (ndopts.nd_opts_tgt_lladdr) {
2758 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2759 skb->dev);
2760 if (!lladdr) {
2761 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2762 return;
2763 }
2764 }
2765
6e157b6a 2766 rt = (struct rt6_info *) dst;
ec13ad1d 2767 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 2768 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2769 return;
6e157b6a 2770 }
e8599ff4 2771
6e157b6a
DM
2772 /* Redirect received -> path was valid.
2773 * Look, redirects are sent only in response to data packets,
2774 * so that this nexthop apparently is reachable. --ANK
2775 */
0dec879f 2776 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 2777
71bcdba0 2778 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2779 if (!neigh)
2780 return;
a6279458 2781
1da177e4
LT
2782 /*
2783 * We have finally decided to accept it.
2784 */
2785
f997c55c 2786 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
2787 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2788 NEIGH_UPDATE_F_OVERRIDE|
2789 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
2790 NEIGH_UPDATE_F_ISROUTER)),
2791 NDISC_REDIRECT, &ndopts);
1da177e4 2792
83a09abd 2793 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2794 if (!nrt)
1da177e4
LT
2795 goto out;
2796
2797 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2798 if (on_link)
2799 nrt->rt6i_flags &= ~RTF_GATEWAY;
2800
b91d5329 2801 nrt->rt6i_protocol = RTPROT_REDIRECT;
4e3fd7a0 2802 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2803
40e22e8f 2804 if (ip6_ins_rt(nrt))
1cfb71ee 2805 goto out_release;
1da177e4 2806
d8d1f30b
CG
2807 netevent.old = &rt->dst;
2808 netevent.new = &nrt->dst;
71bcdba0 2809 netevent.daddr = &msg->dest;
60592833 2810 netevent.neigh = neigh;
8d71740c
TT
2811 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2812
38308473 2813 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2814 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2815 ip6_del_rt(rt);
1da177e4
LT
2816 }
2817
1cfb71ee
WW
2818out_release:
2819 /* Release the reference taken in
2820 * ip6_rt_cache_alloc()
2821 */
2822 dst_release(&nrt->dst);
2823
1da177e4 2824out:
e8599ff4 2825 neigh_release(neigh);
6e157b6a
DM
2826}
2827
1da177e4
LT
2828/*
2829 * Misc support functions
2830 */
2831
4b32b5ad
MKL
2832static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2833{
2834 BUG_ON(from->dst.from);
2835
2836 rt->rt6i_flags &= ~RTF_EXPIRES;
2837 dst_hold(&from->dst);
2838 rt->dst.from = &from->dst;
2839 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2840}
2841
83a09abd
MKL
2842static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2843{
2844 rt->dst.input = ort->dst.input;
2845 rt->dst.output = ort->dst.output;
2846 rt->rt6i_dst = ort->rt6i_dst;
2847 rt->dst.error = ort->dst.error;
2848 rt->rt6i_idev = ort->rt6i_idev;
2849 if (rt->rt6i_idev)
2850 in6_dev_hold(rt->rt6i_idev);
2851 rt->dst.lastuse = jiffies;
2852 rt->rt6i_gateway = ort->rt6i_gateway;
2853 rt->rt6i_flags = ort->rt6i_flags;
2854 rt6_set_from(rt, ort);
2855 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2856#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2857 rt->rt6i_src = ort->rt6i_src;
1da177e4 2858#endif
83a09abd
MKL
2859 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2860 rt->rt6i_table = ort->rt6i_table;
61adedf3 2861 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2862}
2863
70ceb4f5 2864#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2865static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 2866 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2867 const struct in6_addr *gwaddr,
2868 struct net_device *dev)
70ceb4f5 2869{
830218c1
DA
2870 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2871 int ifindex = dev->ifindex;
70ceb4f5
YH
2872 struct fib6_node *fn;
2873 struct rt6_info *rt = NULL;
c71099ac
TG
2874 struct fib6_table *table;
2875
830218c1 2876 table = fib6_get_table(net, tb_id);
38308473 2877 if (!table)
c71099ac 2878 return NULL;
70ceb4f5 2879
5744dd9b 2880 read_lock_bh(&table->tb6_lock);
67ba4152 2881 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2882 if (!fn)
2883 goto out;
2884
d8d1f30b 2885 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2886 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2887 continue;
2888 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2889 continue;
2890 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2891 continue;
d8d1f30b 2892 dst_hold(&rt->dst);
70ceb4f5
YH
2893 break;
2894 }
2895out:
5744dd9b 2896 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2897 return rt;
2898}
2899
efa2cea0 2900static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 2901 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2902 const struct in6_addr *gwaddr,
2903 struct net_device *dev,
95c96174 2904 unsigned int pref)
70ceb4f5 2905{
86872cb5 2906 struct fib6_config cfg = {
238fc7ea 2907 .fc_metric = IP6_RT_PRIO_USER,
830218c1 2908 .fc_ifindex = dev->ifindex,
86872cb5
TG
2909 .fc_dst_len = prefixlen,
2910 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2911 RTF_UP | RTF_PREF(pref),
b91d5329 2912 .fc_protocol = RTPROT_RA,
15e47304 2913 .fc_nlinfo.portid = 0,
efa2cea0
DL
2914 .fc_nlinfo.nlh = NULL,
2915 .fc_nlinfo.nl_net = net,
86872cb5
TG
2916 };
2917
830218c1 2918 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
2919 cfg.fc_dst = *prefix;
2920 cfg.fc_gateway = *gwaddr;
70ceb4f5 2921
e317da96
YH
2922 /* We should treat it as a default route if prefix length is 0. */
2923 if (!prefixlen)
86872cb5 2924 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2925
333c4301 2926 ip6_route_add(&cfg, NULL);
70ceb4f5 2927
830218c1 2928 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
2929}
2930#endif
2931
b71d1d42 2932struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2933{
830218c1 2934 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 2935 struct rt6_info *rt;
c71099ac 2936 struct fib6_table *table;
1da177e4 2937
830218c1 2938 table = fib6_get_table(dev_net(dev), tb_id);
38308473 2939 if (!table)
c71099ac 2940 return NULL;
1da177e4 2941
5744dd9b 2942 read_lock_bh(&table->tb6_lock);
67ba4152 2943 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2944 if (dev == rt->dst.dev &&
045927ff 2945 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2946 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2947 break;
2948 }
2949 if (rt)
d8d1f30b 2950 dst_hold(&rt->dst);
5744dd9b 2951 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2952 return rt;
2953}
2954
b71d1d42 2955struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2956 struct net_device *dev,
2957 unsigned int pref)
1da177e4 2958{
86872cb5 2959 struct fib6_config cfg = {
ca254490 2960 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 2961 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2962 .fc_ifindex = dev->ifindex,
2963 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2964 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 2965 .fc_protocol = RTPROT_RA,
15e47304 2966 .fc_nlinfo.portid = 0,
5578689a 2967 .fc_nlinfo.nlh = NULL,
c346dca1 2968 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2969 };
1da177e4 2970
4e3fd7a0 2971 cfg.fc_gateway = *gwaddr;
1da177e4 2972
333c4301 2973 if (!ip6_route_add(&cfg, NULL)) {
830218c1
DA
2974 struct fib6_table *table;
2975
2976 table = fib6_get_table(dev_net(dev), cfg.fc_table);
2977 if (table)
2978 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2979 }
1da177e4 2980
1da177e4
LT
2981 return rt6_get_dflt_router(gwaddr, dev);
2982}
2983
830218c1 2984static void __rt6_purge_dflt_routers(struct fib6_table *table)
1da177e4
LT
2985{
2986 struct rt6_info *rt;
2987
2988restart:
c71099ac 2989 read_lock_bh(&table->tb6_lock);
d8d1f30b 2990 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2991 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2992 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2993 dst_hold(&rt->dst);
c71099ac 2994 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2995 ip6_del_rt(rt);
1da177e4
LT
2996 goto restart;
2997 }
2998 }
c71099ac 2999 read_unlock_bh(&table->tb6_lock);
830218c1
DA
3000
3001 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3002}
3003
3004void rt6_purge_dflt_routers(struct net *net)
3005{
3006 struct fib6_table *table;
3007 struct hlist_head *head;
3008 unsigned int h;
3009
3010 rcu_read_lock();
3011
3012 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3013 head = &net->ipv6.fib_table_hash[h];
3014 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3015 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3016 __rt6_purge_dflt_routers(table);
3017 }
3018 }
3019
3020 rcu_read_unlock();
1da177e4
LT
3021}
3022
5578689a
DL
3023static void rtmsg_to_fib6_config(struct net *net,
3024 struct in6_rtmsg *rtmsg,
86872cb5
TG
3025 struct fib6_config *cfg)
3026{
3027 memset(cfg, 0, sizeof(*cfg));
3028
ca254490
DA
3029 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3030 : RT6_TABLE_MAIN;
86872cb5
TG
3031 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3032 cfg->fc_metric = rtmsg->rtmsg_metric;
3033 cfg->fc_expires = rtmsg->rtmsg_info;
3034 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3035 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3036 cfg->fc_flags = rtmsg->rtmsg_flags;
3037
5578689a 3038 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3039
4e3fd7a0
AD
3040 cfg->fc_dst = rtmsg->rtmsg_dst;
3041 cfg->fc_src = rtmsg->rtmsg_src;
3042 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3043}
3044
5578689a 3045int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3046{
86872cb5 3047 struct fib6_config cfg;
1da177e4
LT
3048 struct in6_rtmsg rtmsg;
3049 int err;
3050
67ba4152 3051 switch (cmd) {
1da177e4
LT
3052 case SIOCADDRT: /* Add a route */
3053 case SIOCDELRT: /* Delete a route */
af31f412 3054 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3055 return -EPERM;
3056 err = copy_from_user(&rtmsg, arg,
3057 sizeof(struct in6_rtmsg));
3058 if (err)
3059 return -EFAULT;
86872cb5 3060
5578689a 3061 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3062
1da177e4
LT
3063 rtnl_lock();
3064 switch (cmd) {
3065 case SIOCADDRT:
333c4301 3066 err = ip6_route_add(&cfg, NULL);
1da177e4
LT
3067 break;
3068 case SIOCDELRT:
333c4301 3069 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3070 break;
3071 default:
3072 err = -EINVAL;
3073 }
3074 rtnl_unlock();
3075
3076 return err;
3ff50b79 3077 }
1da177e4
LT
3078
3079 return -EINVAL;
3080}
3081
3082/*
3083 * Drop the packet on the floor
3084 */
3085
d5fdd6ba 3086static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3087{
612f09e8 3088 int type;
adf30907 3089 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3090 switch (ipstats_mib_noroutes) {
3091 case IPSTATS_MIB_INNOROUTES:
0660e03f 3092 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3093 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
3094 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3095 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3096 break;
3097 }
3098 /* FALLTHROUGH */
3099 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3100 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3101 ipstats_mib_noroutes);
612f09e8
YH
3102 break;
3103 }
3ffe533c 3104 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3105 kfree_skb(skb);
3106 return 0;
3107}
3108
9ce8ade0
TG
3109static int ip6_pkt_discard(struct sk_buff *skb)
3110{
612f09e8 3111 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3112}
3113
ede2059d 3114static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3115{
adf30907 3116 skb->dev = skb_dst(skb)->dev;
612f09e8 3117 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3118}
3119
9ce8ade0
TG
3120static int ip6_pkt_prohibit(struct sk_buff *skb)
3121{
612f09e8 3122 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3123}
3124
ede2059d 3125static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3126{
adf30907 3127 skb->dev = skb_dst(skb)->dev;
612f09e8 3128 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3129}
3130
1da177e4
LT
3131/*
3132 * Allocate a dst for local (unicast / anycast) address.
3133 */
3134
3135struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
3136 const struct in6_addr *addr,
8f031519 3137 bool anycast)
1da177e4 3138{
ca254490 3139 u32 tb_id;
c346dca1 3140 struct net *net = dev_net(idev->dev);
4832c30d 3141 struct net_device *dev = idev->dev;
5f02ce24
DA
3142 struct rt6_info *rt;
3143
5f02ce24 3144 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 3145 if (!rt)
1da177e4
LT
3146 return ERR_PTR(-ENOMEM);
3147
1da177e4
LT
3148 in6_dev_hold(idev);
3149
11d53b49 3150 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
3151 rt->dst.input = ip6_input;
3152 rt->dst.output = ip6_output;
1da177e4 3153 rt->rt6i_idev = idev;
1da177e4 3154
94b5e0f9 3155 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 3156 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
3157 if (anycast)
3158 rt->rt6i_flags |= RTF_ANYCAST;
3159 else
1da177e4 3160 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 3161
550bab42 3162 rt->rt6i_gateway = *addr;
4e3fd7a0 3163 rt->rt6i_dst.addr = *addr;
1da177e4 3164 rt->rt6i_dst.plen = 128;
ca254490
DA
3165 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3166 rt->rt6i_table = fib6_get_table(net, tb_id);
1da177e4 3167
1da177e4
LT
3168 return rt;
3169}
3170
c3968a85
DW
3171/* remove deleted ip from prefsrc entries */
3172struct arg_dev_net_ip {
3173 struct net_device *dev;
3174 struct net *net;
3175 struct in6_addr *addr;
3176};
3177
3178static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3179{
3180 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3181 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3182 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3183
d1918542 3184 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
3185 rt != net->ipv6.ip6_null_entry &&
3186 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
60006a48 3187 spin_lock_bh(&rt6_exception_lock);
c3968a85
DW
3188 /* remove prefsrc entry */
3189 rt->rt6i_prefsrc.plen = 0;
60006a48
WW
3190 /* need to update cache as well */
3191 rt6_exceptions_remove_prefsrc(rt);
3192 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3193 }
3194 return 0;
3195}
3196
3197void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3198{
3199 struct net *net = dev_net(ifp->idev->dev);
3200 struct arg_dev_net_ip adni = {
3201 .dev = ifp->idev->dev,
3202 .net = net,
3203 .addr = &ifp->addr,
3204 };
0c3584d5 3205 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3206}
3207
be7a010d
DJ
3208#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
3209#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
3210
3211/* Remove routers and update dst entries when gateway turn into host. */
3212static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3213{
3214 struct in6_addr *gateway = (struct in6_addr *)arg;
3215
3216 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
3217 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
3218 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
3219 return -1;
3220 }
3221 return 0;
3222}
3223
3224void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3225{
3226 fib6_clean_all(net, fib6_clean_tohost, gateway);
3227}
3228
8ed67789
DL
3229struct arg_dev_net {
3230 struct net_device *dev;
3231 struct net *net;
3232};
3233
a1a22c12 3234/* called with write lock held for table with rt */
1da177e4
LT
3235static int fib6_ifdown(struct rt6_info *rt, void *arg)
3236{
bc3ef660 3237 const struct arg_dev_net *adn = arg;
3238 const struct net_device *dev = adn->dev;
8ed67789 3239
d1918542 3240 if ((rt->dst.dev == dev || !dev) &&
a1a22c12
DA
3241 rt != adn->net->ipv6.ip6_null_entry &&
3242 (rt->rt6i_nsiblings == 0 ||
8397ed36 3243 (dev && netdev_unregistering(dev)) ||
a1a22c12 3244 !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
1da177e4 3245 return -1;
c159d30c 3246
1da177e4
LT
3247 return 0;
3248}
3249
f3db4851 3250void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 3251{
8ed67789
DL
3252 struct arg_dev_net adn = {
3253 .dev = dev,
3254 .net = net,
3255 };
3256
0c3584d5 3257 fib6_clean_all(net, fib6_ifdown, &adn);
e332bc67
EB
3258 if (dev)
3259 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
3260}
3261
95c96174 3262struct rt6_mtu_change_arg {
1da177e4 3263 struct net_device *dev;
95c96174 3264 unsigned int mtu;
1da177e4
LT
3265};
3266
3267static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3268{
3269 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3270 struct inet6_dev *idev;
3271
3272 /* In IPv6 pmtu discovery is not optional,
3273 so that RTAX_MTU lock cannot disable it.
3274 We still use this lock to block changes
3275 caused by addrconf/ndisc.
3276 */
3277
3278 idev = __in6_dev_get(arg->dev);
38308473 3279 if (!idev)
1da177e4
LT
3280 return 0;
3281
3282 /* For administrative MTU increase, there is no way to discover
3283 IPv6 PMTU increase, so PMTU increase should be updated here.
3284 Since RFC 1981 doesn't include administrative MTU increase
3285 update PMTU increase is a MUST. (i.e. jumbo frame)
3286 */
3287 /*
3288 If new MTU is less than route PMTU, this new MTU will be the
3289 lowest MTU in the path, update the route PMTU to reflect PMTU
3290 decreases; if new MTU is greater than route PMTU, and the
3291 old MTU is the lowest MTU in the path, update the route PMTU
3292 to reflect the increase. In this case if the other nodes' MTU
3293 also have the lowest MTU, TOO BIG MESSAGE will be lead to
67c408cf 3294 PMTU discovery.
1da177e4 3295 */
d1918542 3296 if (rt->dst.dev == arg->dev &&
fb56be83 3297 dst_metric_raw(&rt->dst, RTAX_MTU) &&
4b32b5ad
MKL
3298 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
3299 if (rt->rt6i_flags & RTF_CACHE) {
3300 /* For RTF_CACHE with rt6i_pmtu == 0
3301 * (i.e. a redirected route),
3302 * the metrics of its rt->dst.from has already
3303 * been updated.
3304 */
3305 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
3306 rt->rt6i_pmtu = arg->mtu;
3307 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
3308 (dst_mtu(&rt->dst) < arg->mtu &&
3309 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
3310 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
3311 }
566cfd8f 3312 }
1da177e4
LT
3313 return 0;
3314}
3315
95c96174 3316void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 3317{
c71099ac
TG
3318 struct rt6_mtu_change_arg arg = {
3319 .dev = dev,
3320 .mtu = mtu,
3321 };
1da177e4 3322
0c3584d5 3323 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
3324}
3325
ef7c79ed 3326static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 3327 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 3328 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 3329 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
3330 [RTA_PRIORITY] = { .type = NLA_U32 },
3331 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 3332 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 3333 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
3334 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
3335 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 3336 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 3337 [RTA_UID] = { .type = NLA_U32 },
3b45a410 3338 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
3339};
3340
3341static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
3342 struct fib6_config *cfg,
3343 struct netlink_ext_ack *extack)
1da177e4 3344{
86872cb5
TG
3345 struct rtmsg *rtm;
3346 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 3347 unsigned int pref;
86872cb5 3348 int err;
1da177e4 3349
fceb6435
JB
3350 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3351 NULL);
86872cb5
TG
3352 if (err < 0)
3353 goto errout;
1da177e4 3354
86872cb5
TG
3355 err = -EINVAL;
3356 rtm = nlmsg_data(nlh);
3357 memset(cfg, 0, sizeof(*cfg));
3358
3359 cfg->fc_table = rtm->rtm_table;
3360 cfg->fc_dst_len = rtm->rtm_dst_len;
3361 cfg->fc_src_len = rtm->rtm_src_len;
3362 cfg->fc_flags = RTF_UP;
3363 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 3364 cfg->fc_type = rtm->rtm_type;
86872cb5 3365
ef2c7d7b
ND
3366 if (rtm->rtm_type == RTN_UNREACHABLE ||
3367 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
3368 rtm->rtm_type == RTN_PROHIBIT ||
3369 rtm->rtm_type == RTN_THROW)
86872cb5
TG
3370 cfg->fc_flags |= RTF_REJECT;
3371
ab79ad14
3372 if (rtm->rtm_type == RTN_LOCAL)
3373 cfg->fc_flags |= RTF_LOCAL;
3374
1f56a01f
MKL
3375 if (rtm->rtm_flags & RTM_F_CLONED)
3376 cfg->fc_flags |= RTF_CACHE;
3377
15e47304 3378 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 3379 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 3380 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
3381
3382 if (tb[RTA_GATEWAY]) {
67b61f6c 3383 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 3384 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 3385 }
86872cb5
TG
3386
3387 if (tb[RTA_DST]) {
3388 int plen = (rtm->rtm_dst_len + 7) >> 3;
3389
3390 if (nla_len(tb[RTA_DST]) < plen)
3391 goto errout;
3392
3393 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 3394 }
86872cb5
TG
3395
3396 if (tb[RTA_SRC]) {
3397 int plen = (rtm->rtm_src_len + 7) >> 3;
3398
3399 if (nla_len(tb[RTA_SRC]) < plen)
3400 goto errout;
3401
3402 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 3403 }
86872cb5 3404
c3968a85 3405 if (tb[RTA_PREFSRC])
67b61f6c 3406 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 3407
86872cb5
TG
3408 if (tb[RTA_OIF])
3409 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3410
3411 if (tb[RTA_PRIORITY])
3412 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3413
3414 if (tb[RTA_METRICS]) {
3415 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3416 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 3417 }
86872cb5
TG
3418
3419 if (tb[RTA_TABLE])
3420 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3421
51ebd318
ND
3422 if (tb[RTA_MULTIPATH]) {
3423 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3424 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
3425
3426 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 3427 cfg->fc_mp_len, extack);
9ed59592
DA
3428 if (err < 0)
3429 goto errout;
51ebd318
ND
3430 }
3431
c78ba6d6
LR
3432 if (tb[RTA_PREF]) {
3433 pref = nla_get_u8(tb[RTA_PREF]);
3434 if (pref != ICMPV6_ROUTER_PREF_LOW &&
3435 pref != ICMPV6_ROUTER_PREF_HIGH)
3436 pref = ICMPV6_ROUTER_PREF_MEDIUM;
3437 cfg->fc_flags |= RTF_PREF(pref);
3438 }
3439
19e42e45
RP
3440 if (tb[RTA_ENCAP])
3441 cfg->fc_encap = tb[RTA_ENCAP];
3442
9ed59592 3443 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
3444 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3445
c255bd68 3446 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
3447 if (err < 0)
3448 goto errout;
3449 }
3450
32bc201e
XL
3451 if (tb[RTA_EXPIRES]) {
3452 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3453
3454 if (addrconf_finite_timeout(timeout)) {
3455 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3456 cfg->fc_flags |= RTF_EXPIRES;
3457 }
3458 }
3459
86872cb5
TG
3460 err = 0;
3461errout:
3462 return err;
1da177e4
LT
3463}
3464
6b9ea5a6
RP
3465struct rt6_nh {
3466 struct rt6_info *rt6_info;
3467 struct fib6_config r_cfg;
3468 struct mx6_config mxc;
3469 struct list_head next;
3470};
3471
3472static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3473{
3474 struct rt6_nh *nh;
3475
3476 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 3477 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
3478 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3479 nh->r_cfg.fc_ifindex);
3480 }
3481}
3482
3483static int ip6_route_info_append(struct list_head *rt6_nh_list,
3484 struct rt6_info *rt, struct fib6_config *r_cfg)
3485{
3486 struct rt6_nh *nh;
6b9ea5a6
RP
3487 int err = -EEXIST;
3488
3489 list_for_each_entry(nh, rt6_nh_list, next) {
3490 /* check if rt6_info already exists */
f06b7549 3491 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
6b9ea5a6
RP
3492 return err;
3493 }
3494
3495 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3496 if (!nh)
3497 return -ENOMEM;
3498 nh->rt6_info = rt;
3499 err = ip6_convert_metrics(&nh->mxc, r_cfg);
3500 if (err) {
3501 kfree(nh);
3502 return err;
3503 }
3504 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3505 list_add_tail(&nh->next, rt6_nh_list);
3506
3507 return 0;
3508}
3509
3b1137fe
DA
3510static void ip6_route_mpath_notify(struct rt6_info *rt,
3511 struct rt6_info *rt_last,
3512 struct nl_info *info,
3513 __u16 nlflags)
3514{
3515 /* if this is an APPEND route, then rt points to the first route
3516 * inserted and rt_last points to last route inserted. Userspace
3517 * wants a consistent dump of the route which starts at the first
3518 * nexthop. Since sibling routes are always added at the end of
3519 * the list, find the first sibling of the last route appended
3520 */
3521 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3522 rt = list_first_entry(&rt_last->rt6i_siblings,
3523 struct rt6_info,
3524 rt6i_siblings);
3525 }
3526
3527 if (rt)
3528 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3529}
3530
333c4301
DA
3531static int ip6_route_multipath_add(struct fib6_config *cfg,
3532 struct netlink_ext_ack *extack)
51ebd318 3533{
3b1137fe
DA
3534 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3535 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
3536 struct fib6_config r_cfg;
3537 struct rtnexthop *rtnh;
6b9ea5a6
RP
3538 struct rt6_info *rt;
3539 struct rt6_nh *err_nh;
3540 struct rt6_nh *nh, *nh_safe;
3b1137fe 3541 __u16 nlflags;
51ebd318
ND
3542 int remaining;
3543 int attrlen;
6b9ea5a6
RP
3544 int err = 1;
3545 int nhn = 0;
3546 int replace = (cfg->fc_nlinfo.nlh &&
3547 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3548 LIST_HEAD(rt6_nh_list);
51ebd318 3549
3b1137fe
DA
3550 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3551 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3552 nlflags |= NLM_F_APPEND;
3553
35f1b4e9 3554 remaining = cfg->fc_mp_len;
51ebd318 3555 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 3556
6b9ea5a6
RP
3557 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3558 * rt6_info structs per nexthop
3559 */
51ebd318
ND
3560 while (rtnh_ok(rtnh, remaining)) {
3561 memcpy(&r_cfg, cfg, sizeof(*cfg));
3562 if (rtnh->rtnh_ifindex)
3563 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3564
3565 attrlen = rtnh_attrlen(rtnh);
3566 if (attrlen > 0) {
3567 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3568
3569 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3570 if (nla) {
67b61f6c 3571 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
3572 r_cfg.fc_flags |= RTF_GATEWAY;
3573 }
19e42e45
RP
3574 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3575 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3576 if (nla)
3577 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 3578 }
6b9ea5a6 3579
333c4301 3580 rt = ip6_route_info_create(&r_cfg, extack);
8c5b83f0
RP
3581 if (IS_ERR(rt)) {
3582 err = PTR_ERR(rt);
3583 rt = NULL;
6b9ea5a6 3584 goto cleanup;
8c5b83f0 3585 }
6b9ea5a6
RP
3586
3587 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 3588 if (err) {
587fea74 3589 dst_release_immediate(&rt->dst);
6b9ea5a6
RP
3590 goto cleanup;
3591 }
3592
3593 rtnh = rtnh_next(rtnh, &remaining);
3594 }
3595
3b1137fe
DA
3596 /* for add and replace send one notification with all nexthops.
3597 * Skip the notification in fib6_add_rt2node and send one with
3598 * the full route when done
3599 */
3600 info->skip_notify = 1;
3601
6b9ea5a6
RP
3602 err_nh = NULL;
3603 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe 3604 rt_last = nh->rt6_info;
333c4301 3605 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3b1137fe
DA
3606 /* save reference to first route for notification */
3607 if (!rt_notif && !err)
3608 rt_notif = nh->rt6_info;
3609
6b9ea5a6
RP
3610 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3611 nh->rt6_info = NULL;
3612 if (err) {
3613 if (replace && nhn)
3614 ip6_print_replace_route_err(&rt6_nh_list);
3615 err_nh = nh;
3616 goto add_errout;
51ebd318 3617 }
6b9ea5a6 3618
1a72418b 3619 /* Because each route is added like a single route we remove
27596472
MK
3620 * these flags after the first nexthop: if there is a collision,
3621 * we have already failed to add the first nexthop:
3622 * fib6_add_rt2node() has rejected it; when replacing, old
3623 * nexthops have been replaced by first new, the rest should
3624 * be added to it.
1a72418b 3625 */
27596472
MK
3626 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3627 NLM_F_REPLACE);
6b9ea5a6
RP
3628 nhn++;
3629 }
3630
3b1137fe
DA
3631 /* success ... tell user about new route */
3632 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
3633 goto cleanup;
3634
3635add_errout:
3b1137fe
DA
3636 /* send notification for routes that were added so that
3637 * the delete notifications sent by ip6_route_del are
3638 * coherent
3639 */
3640 if (rt_notif)
3641 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3642
6b9ea5a6
RP
3643 /* Delete routes that were already added */
3644 list_for_each_entry(nh, &rt6_nh_list, next) {
3645 if (err_nh == nh)
3646 break;
333c4301 3647 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
3648 }
3649
3650cleanup:
3651 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
587fea74
WW
3652 if (nh->rt6_info)
3653 dst_release_immediate(&nh->rt6_info->dst);
52fe51f8 3654 kfree(nh->mxc.mx);
6b9ea5a6
RP
3655 list_del(&nh->next);
3656 kfree(nh);
3657 }
3658
3659 return err;
3660}
3661
333c4301
DA
3662static int ip6_route_multipath_del(struct fib6_config *cfg,
3663 struct netlink_ext_ack *extack)
6b9ea5a6
RP
3664{
3665 struct fib6_config r_cfg;
3666 struct rtnexthop *rtnh;
3667 int remaining;
3668 int attrlen;
3669 int err = 1, last_err = 0;
3670
3671 remaining = cfg->fc_mp_len;
3672 rtnh = (struct rtnexthop *)cfg->fc_mp;
3673
3674 /* Parse a Multipath Entry */
3675 while (rtnh_ok(rtnh, remaining)) {
3676 memcpy(&r_cfg, cfg, sizeof(*cfg));
3677 if (rtnh->rtnh_ifindex)
3678 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3679
3680 attrlen = rtnh_attrlen(rtnh);
3681 if (attrlen > 0) {
3682 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3683
3684 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3685 if (nla) {
3686 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3687 r_cfg.fc_flags |= RTF_GATEWAY;
3688 }
3689 }
333c4301 3690 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
3691 if (err)
3692 last_err = err;
3693
51ebd318
ND
3694 rtnh = rtnh_next(rtnh, &remaining);
3695 }
3696
3697 return last_err;
3698}
3699
c21ef3e3
DA
3700static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3701 struct netlink_ext_ack *extack)
1da177e4 3702{
86872cb5
TG
3703 struct fib6_config cfg;
3704 int err;
1da177e4 3705
333c4301 3706 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
3707 if (err < 0)
3708 return err;
3709
51ebd318 3710 if (cfg.fc_mp)
333c4301 3711 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
3712 else {
3713 cfg.fc_delete_all_nh = 1;
333c4301 3714 return ip6_route_del(&cfg, extack);
0ae81335 3715 }
1da177e4
LT
3716}
3717
c21ef3e3
DA
3718static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3719 struct netlink_ext_ack *extack)
1da177e4 3720{
86872cb5
TG
3721 struct fib6_config cfg;
3722 int err;
1da177e4 3723
333c4301 3724 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
3725 if (err < 0)
3726 return err;
3727
51ebd318 3728 if (cfg.fc_mp)
333c4301 3729 return ip6_route_multipath_add(&cfg, extack);
51ebd318 3730 else
333c4301 3731 return ip6_route_add(&cfg, extack);
1da177e4
LT
3732}
3733
beb1afac 3734static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 3735{
beb1afac
DA
3736 int nexthop_len = 0;
3737
3738 if (rt->rt6i_nsiblings) {
3739 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
3740 + NLA_ALIGN(sizeof(struct rtnexthop))
3741 + nla_total_size(16) /* RTA_GATEWAY */
beb1afac
DA
3742 + lwtunnel_get_encap_size(rt->dst.lwtstate);
3743
3744 nexthop_len *= rt->rt6i_nsiblings;
3745 }
3746
339bf98f
TG
3747 return NLMSG_ALIGN(sizeof(struct rtmsg))
3748 + nla_total_size(16) /* RTA_SRC */
3749 + nla_total_size(16) /* RTA_DST */
3750 + nla_total_size(16) /* RTA_GATEWAY */
3751 + nla_total_size(16) /* RTA_PREFSRC */
3752 + nla_total_size(4) /* RTA_TABLE */
3753 + nla_total_size(4) /* RTA_IIF */
3754 + nla_total_size(4) /* RTA_OIF */
3755 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3756 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3757 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3758 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3759 + nla_total_size(1) /* RTA_PREF */
beb1afac
DA
3760 + lwtunnel_get_encap_size(rt->dst.lwtstate)
3761 + nexthop_len;
3762}
3763
3764static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 3765 unsigned int *flags, bool skip_oif)
beb1afac
DA
3766{
3767 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3768 *flags |= RTNH_F_LINKDOWN;
3769 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3770 *flags |= RTNH_F_DEAD;
3771 }
3772
3773 if (rt->rt6i_flags & RTF_GATEWAY) {
3774 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3775 goto nla_put_failure;
3776 }
3777
fe400799 3778 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
3779 *flags |= RTNH_F_OFFLOAD;
3780
5be083ce
DA
3781 /* not needed for multipath encoding b/c it has a rtnexthop struct */
3782 if (!skip_oif && rt->dst.dev &&
beb1afac
DA
3783 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3784 goto nla_put_failure;
3785
3786 if (rt->dst.lwtstate &&
3787 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3788 goto nla_put_failure;
3789
3790 return 0;
3791
3792nla_put_failure:
3793 return -EMSGSIZE;
3794}
3795
5be083ce 3796/* add multipath next hop */
beb1afac
DA
3797static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3798{
3799 struct rtnexthop *rtnh;
3800 unsigned int flags = 0;
3801
3802 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3803 if (!rtnh)
3804 goto nla_put_failure;
3805
3806 rtnh->rtnh_hops = 0;
3807 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3808
5be083ce 3809 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
3810 goto nla_put_failure;
3811
3812 rtnh->rtnh_flags = flags;
3813
3814 /* length of rtnetlink header + attributes */
3815 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3816
3817 return 0;
3818
3819nla_put_failure:
3820 return -EMSGSIZE;
339bf98f
TG
3821}
3822
191cd582
BH
3823static int rt6_fill_node(struct net *net,
3824 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 3825 struct in6_addr *dst, struct in6_addr *src,
15e47304 3826 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 3827 unsigned int flags)
1da177e4 3828{
4b32b5ad 3829 u32 metrics[RTAX_MAX];
1da177e4 3830 struct rtmsg *rtm;
2d7202bf 3831 struct nlmsghdr *nlh;
e3703b3d 3832 long expires;
9e762a4a 3833 u32 table;
1da177e4 3834
15e47304 3835 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 3836 if (!nlh)
26932566 3837 return -EMSGSIZE;
2d7202bf
TG
3838
3839 rtm = nlmsg_data(nlh);
1da177e4
LT
3840 rtm->rtm_family = AF_INET6;
3841 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3842 rtm->rtm_src_len = rt->rt6i_src.plen;
3843 rtm->rtm_tos = 0;
c71099ac 3844 if (rt->rt6i_table)
9e762a4a 3845 table = rt->rt6i_table->tb6_id;
c71099ac 3846 else
9e762a4a
PM
3847 table = RT6_TABLE_UNSPEC;
3848 rtm->rtm_table = table;
c78679e8
DM
3849 if (nla_put_u32(skb, RTA_TABLE, table))
3850 goto nla_put_failure;
ef2c7d7b
ND
3851 if (rt->rt6i_flags & RTF_REJECT) {
3852 switch (rt->dst.error) {
3853 case -EINVAL:
3854 rtm->rtm_type = RTN_BLACKHOLE;
3855 break;
3856 case -EACCES:
3857 rtm->rtm_type = RTN_PROHIBIT;
3858 break;
b4949ab2
ND
3859 case -EAGAIN:
3860 rtm->rtm_type = RTN_THROW;
3861 break;
ef2c7d7b
ND
3862 default:
3863 rtm->rtm_type = RTN_UNREACHABLE;
3864 break;
3865 }
3866 }
38308473 3867 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 3868 rtm->rtm_type = RTN_LOCAL;
4ee39733
DA
3869 else if (rt->rt6i_flags & RTF_ANYCAST)
3870 rtm->rtm_type = RTN_ANYCAST;
d1918542 3871 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
3872 rtm->rtm_type = RTN_LOCAL;
3873 else
3874 rtm->rtm_type = RTN_UNICAST;
3875 rtm->rtm_flags = 0;
3876 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3877 rtm->rtm_protocol = rt->rt6i_protocol;
1da177e4 3878
38308473 3879 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
3880 rtm->rtm_flags |= RTM_F_CLONED;
3881
3882 if (dst) {
930345ea 3883 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 3884 goto nla_put_failure;
1ab1457c 3885 rtm->rtm_dst_len = 128;
1da177e4 3886 } else if (rtm->rtm_dst_len)
930345ea 3887 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 3888 goto nla_put_failure;
1da177e4
LT
3889#ifdef CONFIG_IPV6_SUBTREES
3890 if (src) {
930345ea 3891 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 3892 goto nla_put_failure;
1ab1457c 3893 rtm->rtm_src_len = 128;
c78679e8 3894 } else if (rtm->rtm_src_len &&
930345ea 3895 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 3896 goto nla_put_failure;
1da177e4 3897#endif
7bc570c8
YH
3898 if (iif) {
3899#ifdef CONFIG_IPV6_MROUTE
3900 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
3901 int err = ip6mr_get_route(net, skb, rtm, portid);
3902
3903 if (err == 0)
3904 return 0;
3905 if (err < 0)
3906 goto nla_put_failure;
7bc570c8
YH
3907 } else
3908#endif
c78679e8
DM
3909 if (nla_put_u32(skb, RTA_IIF, iif))
3910 goto nla_put_failure;
7bc570c8 3911 } else if (dst) {
1da177e4 3912 struct in6_addr saddr_buf;
c78679e8 3913 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 3914 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3915 goto nla_put_failure;
1da177e4 3916 }
2d7202bf 3917
c3968a85
DW
3918 if (rt->rt6i_prefsrc.plen) {
3919 struct in6_addr saddr_buf;
4e3fd7a0 3920 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 3921 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3922 goto nla_put_failure;
c3968a85
DW
3923 }
3924
4b32b5ad
MKL
3925 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3926 if (rt->rt6i_pmtu)
3927 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3928 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
3929 goto nla_put_failure;
3930
c78679e8
DM
3931 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3932 goto nla_put_failure;
8253947e 3933
beb1afac
DA
3934 /* For multipath routes, walk the siblings list and add
3935 * each as a nexthop within RTA_MULTIPATH.
3936 */
3937 if (rt->rt6i_nsiblings) {
3938 struct rt6_info *sibling, *next_sibling;
3939 struct nlattr *mp;
3940
3941 mp = nla_nest_start(skb, RTA_MULTIPATH);
3942 if (!mp)
3943 goto nla_put_failure;
3944
3945 if (rt6_add_nexthop(skb, rt) < 0)
3946 goto nla_put_failure;
3947
3948 list_for_each_entry_safe(sibling, next_sibling,
3949 &rt->rt6i_siblings, rt6i_siblings) {
3950 if (rt6_add_nexthop(skb, sibling) < 0)
3951 goto nla_put_failure;
3952 }
3953
3954 nla_nest_end(skb, mp);
3955 } else {
5be083ce 3956 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
3957 goto nla_put_failure;
3958 }
3959
8253947e 3960 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 3961
87a50699 3962 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 3963 goto nla_put_failure;
2d7202bf 3964
c78ba6d6
LR
3965 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3966 goto nla_put_failure;
3967
19e42e45 3968
053c095a
JB
3969 nlmsg_end(skb, nlh);
3970 return 0;
2d7202bf
TG
3971
3972nla_put_failure:
26932566
PM
3973 nlmsg_cancel(skb, nlh);
3974 return -EMSGSIZE;
1da177e4
LT
3975}
3976
1b43af54 3977int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
3978{
3979 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
3980 struct net *net = arg->net;
3981
3982 if (rt == net->ipv6.ip6_null_entry)
3983 return 0;
1da177e4 3984
2d7202bf
TG
3985 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3986 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
3987
3988 /* user wants prefix routes only */
3989 if (rtm->rtm_flags & RTM_F_PREFIX &&
3990 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
3991 /* success since this is not a prefix route */
3992 return 1;
3993 }
3994 }
1da177e4 3995
1f17e2f2 3996 return rt6_fill_node(net,
191cd582 3997 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 3998 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
f8cfe2ce 3999 NLM_F_MULTI);
1da177e4
LT
4000}
4001
c21ef3e3
DA
4002static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4003 struct netlink_ext_ack *extack)
1da177e4 4004{
3b1e0a65 4005 struct net *net = sock_net(in_skb->sk);
ab364a6f 4006 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
4007 int err, iif = 0, oif = 0;
4008 struct dst_entry *dst;
ab364a6f 4009 struct rt6_info *rt;
1da177e4 4010 struct sk_buff *skb;
ab364a6f 4011 struct rtmsg *rtm;
4c9483b2 4012 struct flowi6 fl6;
18c3a61c 4013 bool fibmatch;
1da177e4 4014
fceb6435 4015 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4016 extack);
ab364a6f
TG
4017 if (err < 0)
4018 goto errout;
1da177e4 4019
ab364a6f 4020 err = -EINVAL;
4c9483b2 4021 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4022 rtm = nlmsg_data(nlh);
4023 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4024 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4025
ab364a6f
TG
4026 if (tb[RTA_SRC]) {
4027 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4028 goto errout;
4029
4e3fd7a0 4030 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4031 }
4032
4033 if (tb[RTA_DST]) {
4034 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4035 goto errout;
4036
4e3fd7a0 4037 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4038 }
4039
4040 if (tb[RTA_IIF])
4041 iif = nla_get_u32(tb[RTA_IIF]);
4042
4043 if (tb[RTA_OIF])
72331bc0 4044 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4045
2e47b291
LC
4046 if (tb[RTA_MARK])
4047 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4048
622ec2c9
LC
4049 if (tb[RTA_UID])
4050 fl6.flowi6_uid = make_kuid(current_user_ns(),
4051 nla_get_u32(tb[RTA_UID]));
4052 else
4053 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4054
1da177e4
LT
4055 if (iif) {
4056 struct net_device *dev;
72331bc0
SL
4057 int flags = 0;
4058
121622db
FW
4059 rcu_read_lock();
4060
4061 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4062 if (!dev) {
121622db 4063 rcu_read_unlock();
1da177e4 4064 err = -ENODEV;
ab364a6f 4065 goto errout;
1da177e4 4066 }
72331bc0
SL
4067
4068 fl6.flowi6_iif = iif;
4069
4070 if (!ipv6_addr_any(&fl6.saddr))
4071 flags |= RT6_LOOKUP_F_HAS_SADDR;
4072
18c3a61c
RP
4073 if (!fibmatch)
4074 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
401481e0
AB
4075 else
4076 dst = ip6_route_lookup(net, &fl6, 0);
121622db
FW
4077
4078 rcu_read_unlock();
72331bc0
SL
4079 } else {
4080 fl6.flowi6_oif = oif;
4081
18c3a61c
RP
4082 if (!fibmatch)
4083 dst = ip6_route_output(net, NULL, &fl6);
401481e0
AB
4084 else
4085 dst = ip6_route_lookup(net, &fl6, 0);
18c3a61c
RP
4086 }
4087
18c3a61c
RP
4088
4089 rt = container_of(dst, struct rt6_info, dst);
4090 if (rt->dst.error) {
4091 err = rt->dst.error;
4092 ip6_rt_put(rt);
4093 goto errout;
1da177e4
LT
4094 }
4095
9d6acb3b
WC
4096 if (rt == net->ipv6.ip6_null_entry) {
4097 err = rt->dst.error;
4098 ip6_rt_put(rt);
4099 goto errout;
4100 }
4101
ab364a6f 4102 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4103 if (!skb) {
94e187c0 4104 ip6_rt_put(rt);
ab364a6f
TG
4105 err = -ENOBUFS;
4106 goto errout;
4107 }
1da177e4 4108
d8d1f30b 4109 skb_dst_set(skb, &rt->dst);
18c3a61c
RP
4110 if (fibmatch)
4111 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
4112 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4113 nlh->nlmsg_seq, 0);
4114 else
4115 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
4116 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4117 nlh->nlmsg_seq, 0);
1da177e4 4118 if (err < 0) {
ab364a6f
TG
4119 kfree_skb(skb);
4120 goto errout;
1da177e4
LT
4121 }
4122
15e47304 4123 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4124errout:
1da177e4 4125 return err;
1da177e4
LT
4126}
4127
37a1d361
RP
4128void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4129 unsigned int nlm_flags)
1da177e4
LT
4130{
4131 struct sk_buff *skb;
5578689a 4132 struct net *net = info->nl_net;
528c4ceb
DL
4133 u32 seq;
4134 int err;
4135
4136 err = -ENOBUFS;
38308473 4137 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4138
19e42e45 4139 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4140 if (!skb)
21713ebc
TG
4141 goto errout;
4142
191cd582 4143 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
f8cfe2ce 4144 event, info->portid, seq, nlm_flags);
26932566
PM
4145 if (err < 0) {
4146 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4147 WARN_ON(err == -EMSGSIZE);
4148 kfree_skb(skb);
4149 goto errout;
4150 }
15e47304 4151 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4152 info->nlh, gfp_any());
4153 return;
21713ebc
TG
4154errout:
4155 if (err < 0)
5578689a 4156 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4157}
4158
8ed67789 4159static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4160 unsigned long event, void *ptr)
8ed67789 4161{
351638e7 4162 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4163 struct net *net = dev_net(dev);
8ed67789 4164
242d3a49
WC
4165 if (!(dev->flags & IFF_LOOPBACK))
4166 return NOTIFY_OK;
4167
4168 if (event == NETDEV_REGISTER) {
d8d1f30b 4169 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4170 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4171#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4172 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4173 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4174 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4175 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4176#endif
76da0704
WC
4177 } else if (event == NETDEV_UNREGISTER &&
4178 dev->reg_state != NETREG_UNREGISTERED) {
4179 /* NETDEV_UNREGISTER could be fired for multiple times by
4180 * netdev_wait_allrefs(). Make sure we only call this once.
4181 */
12d94a80 4182 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4183#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4184 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4185 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4186#endif
4187 }
4188
4189 return NOTIFY_OK;
4190}
4191
1da177e4
LT
4192/*
4193 * /proc
4194 */
4195
4196#ifdef CONFIG_PROC_FS
4197
33120b30
AD
4198static const struct file_operations ipv6_route_proc_fops = {
4199 .owner = THIS_MODULE,
4200 .open = ipv6_route_open,
4201 .read = seq_read,
4202 .llseek = seq_lseek,
8d2ca1d7 4203 .release = seq_release_net,
33120b30
AD
4204};
4205
1da177e4
LT
4206static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4207{
69ddb805 4208 struct net *net = (struct net *)seq->private;
1da177e4 4209 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4210 net->ipv6.rt6_stats->fib_nodes,
4211 net->ipv6.rt6_stats->fib_route_nodes,
4212 net->ipv6.rt6_stats->fib_rt_alloc,
4213 net->ipv6.rt6_stats->fib_rt_entries,
4214 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4215 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4216 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4217
4218 return 0;
4219}
4220
4221static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4222{
de05c557 4223 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4224}
4225
9a32144e 4226static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4227 .owner = THIS_MODULE,
4228 .open = rt6_stats_seq_open,
4229 .read = seq_read,
4230 .llseek = seq_lseek,
b6fcbdb4 4231 .release = single_release_net,
1da177e4
LT
4232};
4233#endif /* CONFIG_PROC_FS */
4234
4235#ifdef CONFIG_SYSCTL
4236
1da177e4 4237static
fe2c6338 4238int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4239 void __user *buffer, size_t *lenp, loff_t *ppos)
4240{
c486da34
LAG
4241 struct net *net;
4242 int delay;
4243 if (!write)
1da177e4 4244 return -EINVAL;
c486da34
LAG
4245
4246 net = (struct net *)ctl->extra1;
4247 delay = net->ipv6.sysctl.flush_delay;
4248 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4249 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4250 return 0;
1da177e4
LT
4251}
4252
fe2c6338 4253struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4254 {
1da177e4 4255 .procname = "flush",
4990509f 4256 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4257 .maxlen = sizeof(int),
89c8b3a1 4258 .mode = 0200,
6d9f239a 4259 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4260 },
4261 {
1da177e4 4262 .procname = "gc_thresh",
9a7ec3a9 4263 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4264 .maxlen = sizeof(int),
4265 .mode = 0644,
6d9f239a 4266 .proc_handler = proc_dointvec,
1da177e4
LT
4267 },
4268 {
1da177e4 4269 .procname = "max_size",
4990509f 4270 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
4271 .maxlen = sizeof(int),
4272 .mode = 0644,
6d9f239a 4273 .proc_handler = proc_dointvec,
1da177e4
LT
4274 },
4275 {
1da177e4 4276 .procname = "gc_min_interval",
4990509f 4277 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4278 .maxlen = sizeof(int),
4279 .mode = 0644,
6d9f239a 4280 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4281 },
4282 {
1da177e4 4283 .procname = "gc_timeout",
4990509f 4284 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
4285 .maxlen = sizeof(int),
4286 .mode = 0644,
6d9f239a 4287 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4288 },
4289 {
1da177e4 4290 .procname = "gc_interval",
4990509f 4291 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
4292 .maxlen = sizeof(int),
4293 .mode = 0644,
6d9f239a 4294 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4295 },
4296 {
1da177e4 4297 .procname = "gc_elasticity",
4990509f 4298 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
4299 .maxlen = sizeof(int),
4300 .mode = 0644,
f3d3f616 4301 .proc_handler = proc_dointvec,
1da177e4
LT
4302 },
4303 {
1da177e4 4304 .procname = "mtu_expires",
4990509f 4305 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
4306 .maxlen = sizeof(int),
4307 .mode = 0644,
6d9f239a 4308 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4309 },
4310 {
1da177e4 4311 .procname = "min_adv_mss",
4990509f 4312 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
4313 .maxlen = sizeof(int),
4314 .mode = 0644,
f3d3f616 4315 .proc_handler = proc_dointvec,
1da177e4
LT
4316 },
4317 {
1da177e4 4318 .procname = "gc_min_interval_ms",
4990509f 4319 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4320 .maxlen = sizeof(int),
4321 .mode = 0644,
6d9f239a 4322 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 4323 },
f8572d8f 4324 { }
1da177e4
LT
4325};
4326
2c8c1e72 4327struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
4328{
4329 struct ctl_table *table;
4330
4331 table = kmemdup(ipv6_route_table_template,
4332 sizeof(ipv6_route_table_template),
4333 GFP_KERNEL);
5ee09105
YH
4334
4335 if (table) {
4336 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 4337 table[0].extra1 = net;
86393e52 4338 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
4339 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
4340 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4341 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
4342 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
4343 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
4344 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
4345 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 4346 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
4347
4348 /* Don't export sysctls to unprivileged users */
4349 if (net->user_ns != &init_user_ns)
4350 table[0].procname = NULL;
5ee09105
YH
4351 }
4352
760f2d01
DL
4353 return table;
4354}
1da177e4
LT
4355#endif
4356
2c8c1e72 4357static int __net_init ip6_route_net_init(struct net *net)
cdb18761 4358{
633d424b 4359 int ret = -ENOMEM;
8ed67789 4360
86393e52
AD
4361 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
4362 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 4363
fc66f95c
ED
4364 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
4365 goto out_ip6_dst_ops;
4366
8ed67789
DL
4367 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
4368 sizeof(*net->ipv6.ip6_null_entry),
4369 GFP_KERNEL);
4370 if (!net->ipv6.ip6_null_entry)
fc66f95c 4371 goto out_ip6_dst_entries;
d8d1f30b 4372 net->ipv6.ip6_null_entry->dst.path =
8ed67789 4373 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 4374 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4375 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4376 ip6_template_metrics, true);
8ed67789
DL
4377
4378#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 4379 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
4380 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4381 sizeof(*net->ipv6.ip6_prohibit_entry),
4382 GFP_KERNEL);
68fffc67
PZ
4383 if (!net->ipv6.ip6_prohibit_entry)
4384 goto out_ip6_null_entry;
d8d1f30b 4385 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 4386 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 4387 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4388 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4389 ip6_template_metrics, true);
8ed67789
DL
4390
4391 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4392 sizeof(*net->ipv6.ip6_blk_hole_entry),
4393 GFP_KERNEL);
68fffc67
PZ
4394 if (!net->ipv6.ip6_blk_hole_entry)
4395 goto out_ip6_prohibit_entry;
d8d1f30b 4396 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 4397 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 4398 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4399 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4400 ip6_template_metrics, true);
8ed67789
DL
4401#endif
4402
b339a47c
PZ
4403 net->ipv6.sysctl.flush_delay = 0;
4404 net->ipv6.sysctl.ip6_rt_max_size = 4096;
4405 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4406 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4407 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4408 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4409 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4410 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4411
6891a346
BT
4412 net->ipv6.ip6_rt_gc_expire = 30*HZ;
4413
8ed67789
DL
4414 ret = 0;
4415out:
4416 return ret;
f2fc6a54 4417
68fffc67
PZ
4418#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4419out_ip6_prohibit_entry:
4420 kfree(net->ipv6.ip6_prohibit_entry);
4421out_ip6_null_entry:
4422 kfree(net->ipv6.ip6_null_entry);
4423#endif
fc66f95c
ED
4424out_ip6_dst_entries:
4425 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 4426out_ip6_dst_ops:
f2fc6a54 4427 goto out;
cdb18761
DL
4428}
4429
2c8c1e72 4430static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 4431{
8ed67789
DL
4432 kfree(net->ipv6.ip6_null_entry);
4433#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4434 kfree(net->ipv6.ip6_prohibit_entry);
4435 kfree(net->ipv6.ip6_blk_hole_entry);
4436#endif
41bb78b4 4437 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
4438}
4439
d189634e
TG
4440static int __net_init ip6_route_net_init_late(struct net *net)
4441{
4442#ifdef CONFIG_PROC_FS
d4beaa66
G
4443 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4444 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
4445#endif
4446 return 0;
4447}
4448
4449static void __net_exit ip6_route_net_exit_late(struct net *net)
4450{
4451#ifdef CONFIG_PROC_FS
ece31ffd
G
4452 remove_proc_entry("ipv6_route", net->proc_net);
4453 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
4454#endif
4455}
4456
cdb18761
DL
4457static struct pernet_operations ip6_route_net_ops = {
4458 .init = ip6_route_net_init,
4459 .exit = ip6_route_net_exit,
4460};
4461
c3426b47
DM
4462static int __net_init ipv6_inetpeer_init(struct net *net)
4463{
4464 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4465
4466 if (!bp)
4467 return -ENOMEM;
4468 inet_peer_base_init(bp);
4469 net->ipv6.peers = bp;
4470 return 0;
4471}
4472
4473static void __net_exit ipv6_inetpeer_exit(struct net *net)
4474{
4475 struct inet_peer_base *bp = net->ipv6.peers;
4476
4477 net->ipv6.peers = NULL;
56a6b248 4478 inetpeer_invalidate_tree(bp);
c3426b47
DM
4479 kfree(bp);
4480}
4481
2b823f72 4482static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
4483 .init = ipv6_inetpeer_init,
4484 .exit = ipv6_inetpeer_exit,
4485};
4486
d189634e
TG
4487static struct pernet_operations ip6_route_net_late_ops = {
4488 .init = ip6_route_net_init_late,
4489 .exit = ip6_route_net_exit_late,
4490};
4491
8ed67789
DL
4492static struct notifier_block ip6_route_dev_notifier = {
4493 .notifier_call = ip6_route_dev_notify,
242d3a49 4494 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
4495};
4496
2f460933
WC
4497void __init ip6_route_init_special_entries(void)
4498{
4499 /* Registering of the loopback is done before this portion of code,
4500 * the loopback reference in rt6_info will not be taken, do it
4501 * manually for init_net */
4502 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4503 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4504 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4505 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4506 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4507 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4508 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4509 #endif
4510}
4511
433d49c3 4512int __init ip6_route_init(void)
1da177e4 4513{
433d49c3 4514 int ret;
8d0b94af 4515 int cpu;
433d49c3 4516
9a7ec3a9
DL
4517 ret = -ENOMEM;
4518 ip6_dst_ops_template.kmem_cachep =
e5d679f3 4519 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 4520 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 4521 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 4522 goto out;
14e50e57 4523
fc66f95c 4524 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 4525 if (ret)
bdb3289f 4526 goto out_kmem_cache;
bdb3289f 4527
c3426b47
DM
4528 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4529 if (ret)
e8803b6c 4530 goto out_dst_entries;
2a0c451a 4531
7e52b33b
DM
4532 ret = register_pernet_subsys(&ip6_route_net_ops);
4533 if (ret)
4534 goto out_register_inetpeer;
c3426b47 4535
5dc121e9
AE
4536 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4537
e8803b6c 4538 ret = fib6_init();
433d49c3 4539 if (ret)
8ed67789 4540 goto out_register_subsys;
433d49c3 4541
433d49c3
DL
4542 ret = xfrm6_init();
4543 if (ret)
e8803b6c 4544 goto out_fib6_init;
c35b7e72 4545
433d49c3
DL
4546 ret = fib6_rules_init();
4547 if (ret)
4548 goto xfrm6_init;
7e5449c2 4549
d189634e
TG
4550 ret = register_pernet_subsys(&ip6_route_net_late_ops);
4551 if (ret)
4552 goto fib6_rules_init;
4553
433d49c3 4554 ret = -ENOBUFS;
b97bac64
FW
4555 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
4556 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
e3a22b7f
FW
4557 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
4558 RTNL_FLAG_DOIT_UNLOCKED))
d189634e 4559 goto out_register_late_subsys;
c127ea2c 4560
8ed67789 4561 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 4562 if (ret)
d189634e 4563 goto out_register_late_subsys;
8ed67789 4564
8d0b94af
MKL
4565 for_each_possible_cpu(cpu) {
4566 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4567
4568 INIT_LIST_HEAD(&ul->head);
4569 spin_lock_init(&ul->lock);
4570 }
4571
433d49c3
DL
4572out:
4573 return ret;
4574
d189634e
TG
4575out_register_late_subsys:
4576 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 4577fib6_rules_init:
433d49c3
DL
4578 fib6_rules_cleanup();
4579xfrm6_init:
433d49c3 4580 xfrm6_fini();
2a0c451a
TG
4581out_fib6_init:
4582 fib6_gc_cleanup();
8ed67789
DL
4583out_register_subsys:
4584 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
4585out_register_inetpeer:
4586 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
4587out_dst_entries:
4588 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 4589out_kmem_cache:
f2fc6a54 4590 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 4591 goto out;
1da177e4
LT
4592}
4593
4594void ip6_route_cleanup(void)
4595{
8ed67789 4596 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 4597 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 4598 fib6_rules_cleanup();
1da177e4 4599 xfrm6_fini();
1da177e4 4600 fib6_gc_cleanup();
c3426b47 4601 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 4602 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 4603 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 4604 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 4605}