ipv6: prepare rt6_clean_tohost() for exception table
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
83a09abd 81static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 82static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 83static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 84static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
85static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
569d3645 89static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
90
91static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 92static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 93static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 94static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 95static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
96static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
97 struct sk_buff *skb, u32 mtu);
98static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 struct sk_buff *skb);
4b32b5ad 100static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 101static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3
DA
102static size_t rt6_nlmsg_size(struct rt6_info *rt);
103static int rt6_fill_node(struct net *net,
104 struct sk_buff *skb, struct rt6_info *rt,
105 struct in6_addr *dst, struct in6_addr *src,
106 int iif, int type, u32 portid, u32 seq,
107 unsigned int flags);
35732d01
WW
108static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
109 struct in6_addr *daddr,
110 struct in6_addr *saddr);
1da177e4 111
70ceb4f5 112#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 113static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 114 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
115 const struct in6_addr *gwaddr,
116 struct net_device *dev,
95c96174 117 unsigned int pref);
efa2cea0 118static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 119 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
120 const struct in6_addr *gwaddr,
121 struct net_device *dev);
70ceb4f5
YH
122#endif
123
8d0b94af
MKL
124struct uncached_list {
125 spinlock_t lock;
126 struct list_head head;
127};
128
129static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
130
131static void rt6_uncached_list_add(struct rt6_info *rt)
132{
133 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
134
8d0b94af
MKL
135 rt->rt6i_uncached_list = ul;
136
137 spin_lock_bh(&ul->lock);
138 list_add_tail(&rt->rt6i_uncached, &ul->head);
139 spin_unlock_bh(&ul->lock);
140}
141
142static void rt6_uncached_list_del(struct rt6_info *rt)
143{
144 if (!list_empty(&rt->rt6i_uncached)) {
145 struct uncached_list *ul = rt->rt6i_uncached_list;
146
147 spin_lock_bh(&ul->lock);
148 list_del(&rt->rt6i_uncached);
149 spin_unlock_bh(&ul->lock);
150 }
151}
152
153static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154{
155 struct net_device *loopback_dev = net->loopback_dev;
156 int cpu;
157
e332bc67
EB
158 if (dev == loopback_dev)
159 return;
160
8d0b94af
MKL
161 for_each_possible_cpu(cpu) {
162 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 struct rt6_info *rt;
164
165 spin_lock_bh(&ul->lock);
166 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 struct inet6_dev *rt_idev = rt->rt6i_idev;
168 struct net_device *rt_dev = rt->dst.dev;
169
e332bc67 170 if (rt_idev->dev == dev) {
8d0b94af
MKL
171 rt->rt6i_idev = in6_dev_get(loopback_dev);
172 in6_dev_put(rt_idev);
173 }
174
e332bc67 175 if (rt_dev == dev) {
8d0b94af
MKL
176 rt->dst.dev = loopback_dev;
177 dev_hold(rt->dst.dev);
178 dev_put(rt_dev);
179 }
180 }
181 spin_unlock_bh(&ul->lock);
182 }
183}
184
d52d3997
MKL
185static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
186{
187 return dst_metrics_write_ptr(rt->dst.from);
188}
189
06582540
DM
190static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
191{
4b32b5ad 192 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 193
d52d3997
MKL
194 if (rt->rt6i_flags & RTF_PCPU)
195 return rt6_pcpu_cow_metrics(rt);
196 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
197 return NULL;
198 else
3b471175 199 return dst_cow_metrics_generic(dst, old);
06582540
DM
200}
201
f894cbf8
DM
202static inline const void *choose_neigh_daddr(struct rt6_info *rt,
203 struct sk_buff *skb,
204 const void *daddr)
39232973
DM
205{
206 struct in6_addr *p = &rt->rt6i_gateway;
207
a7563f34 208 if (!ipv6_addr_any(p))
39232973 209 return (const void *) p;
f894cbf8
DM
210 else if (skb)
211 return &ipv6_hdr(skb)->daddr;
39232973
DM
212 return daddr;
213}
214
f894cbf8
DM
215static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
216 struct sk_buff *skb,
217 const void *daddr)
d3aaeb38 218{
39232973
DM
219 struct rt6_info *rt = (struct rt6_info *) dst;
220 struct neighbour *n;
221
f894cbf8 222 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 223 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
224 if (n)
225 return n;
226 return neigh_create(&nd_tbl, daddr, dst->dev);
227}
228
63fca65d
JA
229static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
230{
231 struct net_device *dev = dst->dev;
232 struct rt6_info *rt = (struct rt6_info *)dst;
233
234 daddr = choose_neigh_daddr(rt, NULL, daddr);
235 if (!daddr)
236 return;
237 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
238 return;
239 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
240 return;
241 __ipv6_confirm_neigh(dev, daddr);
242}
243
9a7ec3a9 244static struct dst_ops ip6_dst_ops_template = {
1da177e4 245 .family = AF_INET6,
1da177e4
LT
246 .gc = ip6_dst_gc,
247 .gc_thresh = 1024,
248 .check = ip6_dst_check,
0dbaee3b 249 .default_advmss = ip6_default_advmss,
ebb762f2 250 .mtu = ip6_mtu,
06582540 251 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
252 .destroy = ip6_dst_destroy,
253 .ifdown = ip6_dst_ifdown,
254 .negative_advice = ip6_negative_advice,
255 .link_failure = ip6_link_failure,
256 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 257 .redirect = rt6_do_redirect,
9f8955cc 258 .local_out = __ip6_local_out,
d3aaeb38 259 .neigh_lookup = ip6_neigh_lookup,
63fca65d 260 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
261};
262
ebb762f2 263static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 264{
618f9bc7
SK
265 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
266
267 return mtu ? : dst->dev->mtu;
ec831ea7
RD
268}
269
6700c270
DM
270static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
271 struct sk_buff *skb, u32 mtu)
14e50e57
DM
272{
273}
274
6700c270
DM
275static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
276 struct sk_buff *skb)
b587ee3b
DM
277{
278}
279
14e50e57
DM
280static struct dst_ops ip6_dst_blackhole_ops = {
281 .family = AF_INET6,
14e50e57
DM
282 .destroy = ip6_dst_destroy,
283 .check = ip6_dst_check,
ebb762f2 284 .mtu = ip6_blackhole_mtu,
214f45c9 285 .default_advmss = ip6_default_advmss,
14e50e57 286 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 287 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 288 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 289 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
290};
291
62fa8a84 292static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 293 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
294};
295
fb0af4c7 296static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
297 .dst = {
298 .__refcnt = ATOMIC_INIT(1),
299 .__use = 1,
2c20cbd7 300 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 301 .error = -ENETUNREACH,
d8d1f30b
CG
302 .input = ip6_pkt_discard,
303 .output = ip6_pkt_discard_out,
1da177e4
LT
304 },
305 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 306 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
307 .rt6i_metric = ~(u32) 0,
308 .rt6i_ref = ATOMIC_INIT(1),
309};
310
101367c2
TG
311#ifdef CONFIG_IPV6_MULTIPLE_TABLES
312
fb0af4c7 313static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
314 .dst = {
315 .__refcnt = ATOMIC_INIT(1),
316 .__use = 1,
2c20cbd7 317 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 318 .error = -EACCES,
d8d1f30b
CG
319 .input = ip6_pkt_prohibit,
320 .output = ip6_pkt_prohibit_out,
101367c2
TG
321 },
322 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 323 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
324 .rt6i_metric = ~(u32) 0,
325 .rt6i_ref = ATOMIC_INIT(1),
326};
327
fb0af4c7 328static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
329 .dst = {
330 .__refcnt = ATOMIC_INIT(1),
331 .__use = 1,
2c20cbd7 332 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 333 .error = -EINVAL,
d8d1f30b 334 .input = dst_discard,
ede2059d 335 .output = dst_discard_out,
101367c2
TG
336 },
337 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 338 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
339 .rt6i_metric = ~(u32) 0,
340 .rt6i_ref = ATOMIC_INIT(1),
341};
342
343#endif
344
ebfa45f0
MKL
345static void rt6_info_init(struct rt6_info *rt)
346{
347 struct dst_entry *dst = &rt->dst;
348
349 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
350 INIT_LIST_HEAD(&rt->rt6i_siblings);
351 INIT_LIST_HEAD(&rt->rt6i_uncached);
352}
353
1da177e4 354/* allocate dst with ip6_dst_ops */
d52d3997
MKL
355static struct rt6_info *__ip6_dst_alloc(struct net *net,
356 struct net_device *dev,
ad706862 357 int flags)
1da177e4 358{
97bab73f 359 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 360 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 361
ebfa45f0
MKL
362 if (rt)
363 rt6_info_init(rt);
8104891b 364
cf911662 365 return rt;
1da177e4
LT
366}
367
9ab179d8
DA
368struct rt6_info *ip6_dst_alloc(struct net *net,
369 struct net_device *dev,
370 int flags)
d52d3997 371{
ad706862 372 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
373
374 if (rt) {
375 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
376 if (rt->rt6i_pcpu) {
377 int cpu;
378
379 for_each_possible_cpu(cpu) {
380 struct rt6_info **p;
381
382 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
383 /* no one shares rt */
384 *p = NULL;
385 }
386 } else {
587fea74 387 dst_release_immediate(&rt->dst);
d52d3997
MKL
388 return NULL;
389 }
390 }
391
392 return rt;
393}
9ab179d8 394EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 395
1da177e4
LT
396static void ip6_dst_destroy(struct dst_entry *dst)
397{
398 struct rt6_info *rt = (struct rt6_info *)dst;
35732d01 399 struct rt6_exception_bucket *bucket;
ecd98837 400 struct dst_entry *from = dst->from;
8d0b94af 401 struct inet6_dev *idev;
1da177e4 402
4b32b5ad 403 dst_destroy_metrics_generic(dst);
87775312 404 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
405 rt6_uncached_list_del(rt);
406
407 idev = rt->rt6i_idev;
38308473 408 if (idev) {
1da177e4
LT
409 rt->rt6i_idev = NULL;
410 in6_dev_put(idev);
1ab1457c 411 }
35732d01
WW
412 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
413 if (bucket) {
414 rt->rt6i_exception_bucket = NULL;
415 kfree(bucket);
416 }
1716a961 417
ecd98837
YH
418 dst->from = NULL;
419 dst_release(from);
b3419363
DM
420}
421
1da177e4
LT
422static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
423 int how)
424{
425 struct rt6_info *rt = (struct rt6_info *)dst;
426 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 427 struct net_device *loopback_dev =
c346dca1 428 dev_net(dev)->loopback_dev;
1da177e4 429
e5645f51
WW
430 if (idev && idev->dev != loopback_dev) {
431 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
432 if (loopback_idev) {
433 rt->rt6i_idev = loopback_idev;
434 in6_dev_put(idev);
97cac082 435 }
1da177e4
LT
436 }
437}
438
5973fb1e
MKL
439static bool __rt6_check_expired(const struct rt6_info *rt)
440{
441 if (rt->rt6i_flags & RTF_EXPIRES)
442 return time_after(jiffies, rt->dst.expires);
443 else
444 return false;
445}
446
a50feda5 447static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 448{
1716a961
G
449 if (rt->rt6i_flags & RTF_EXPIRES) {
450 if (time_after(jiffies, rt->dst.expires))
a50feda5 451 return true;
1716a961 452 } else if (rt->dst.from) {
1e2ea8ad
XL
453 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
454 rt6_check_expired((struct rt6_info *)rt->dst.from);
1716a961 455 }
a50feda5 456 return false;
1da177e4
LT
457}
458
51ebd318 459static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
460 struct flowi6 *fl6, int oif,
461 int strict)
51ebd318
ND
462{
463 struct rt6_info *sibling, *next_sibling;
464 int route_choosen;
465
b673d6cc
JS
466 /* We might have already computed the hash for ICMPv6 errors. In such
467 * case it will always be non-zero. Otherwise now is the time to do it.
468 */
469 if (!fl6->mp_hash)
470 fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
471
472 route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
51ebd318
ND
473 /* Don't change the route, if route_choosen == 0
474 * (siblings does not include ourself)
475 */
476 if (route_choosen)
477 list_for_each_entry_safe(sibling, next_sibling,
478 &match->rt6i_siblings, rt6i_siblings) {
479 route_choosen--;
480 if (route_choosen == 0) {
52bd4c0c
ND
481 if (rt6_score_route(sibling, oif, strict) < 0)
482 break;
51ebd318
ND
483 match = sibling;
484 break;
485 }
486 }
487 return match;
488}
489
1da177e4 490/*
c71099ac 491 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
492 */
493
8ed67789
DL
494static inline struct rt6_info *rt6_device_match(struct net *net,
495 struct rt6_info *rt,
b71d1d42 496 const struct in6_addr *saddr,
1da177e4 497 int oif,
d420895e 498 int flags)
1da177e4
LT
499{
500 struct rt6_info *local = NULL;
501 struct rt6_info *sprt;
502
dd3abc4e
YH
503 if (!oif && ipv6_addr_any(saddr))
504 goto out;
505
d8d1f30b 506 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 507 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
508
509 if (oif) {
1da177e4
LT
510 if (dev->ifindex == oif)
511 return sprt;
512 if (dev->flags & IFF_LOOPBACK) {
38308473 513 if (!sprt->rt6i_idev ||
1da177e4 514 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 515 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 516 continue;
17fb0b2b
DA
517 if (local &&
518 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
519 continue;
520 }
521 local = sprt;
522 }
dd3abc4e
YH
523 } else {
524 if (ipv6_chk_addr(net, saddr, dev,
525 flags & RT6_LOOKUP_F_IFACE))
526 return sprt;
1da177e4 527 }
dd3abc4e 528 }
1da177e4 529
dd3abc4e 530 if (oif) {
1da177e4
LT
531 if (local)
532 return local;
533
d420895e 534 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 535 return net->ipv6.ip6_null_entry;
1da177e4 536 }
dd3abc4e 537out:
1da177e4
LT
538 return rt;
539}
540
27097255 541#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
542struct __rt6_probe_work {
543 struct work_struct work;
544 struct in6_addr target;
545 struct net_device *dev;
546};
547
548static void rt6_probe_deferred(struct work_struct *w)
549{
550 struct in6_addr mcaddr;
551 struct __rt6_probe_work *work =
552 container_of(w, struct __rt6_probe_work, work);
553
554 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 555 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 556 dev_put(work->dev);
662f5533 557 kfree(work);
c2f17e82
HFS
558}
559
27097255
YH
560static void rt6_probe(struct rt6_info *rt)
561{
990edb42 562 struct __rt6_probe_work *work;
f2c31e32 563 struct neighbour *neigh;
27097255
YH
564 /*
565 * Okay, this does not seem to be appropriate
566 * for now, however, we need to check if it
567 * is really so; aka Router Reachability Probing.
568 *
569 * Router Reachability Probe MUST be rate-limited
570 * to no more than one per minute.
571 */
2152caea 572 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 573 return;
2152caea
YH
574 rcu_read_lock_bh();
575 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
576 if (neigh) {
8d6c31bf
MKL
577 if (neigh->nud_state & NUD_VALID)
578 goto out;
579
990edb42 580 work = NULL;
2152caea 581 write_lock(&neigh->lock);
990edb42
MKL
582 if (!(neigh->nud_state & NUD_VALID) &&
583 time_after(jiffies,
584 neigh->updated +
585 rt->rt6i_idev->cnf.rtr_probe_interval)) {
586 work = kmalloc(sizeof(*work), GFP_ATOMIC);
587 if (work)
588 __neigh_set_probe_once(neigh);
c2f17e82 589 }
2152caea 590 write_unlock(&neigh->lock);
990edb42
MKL
591 } else {
592 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 593 }
990edb42
MKL
594
595 if (work) {
596 INIT_WORK(&work->work, rt6_probe_deferred);
597 work->target = rt->rt6i_gateway;
598 dev_hold(rt->dst.dev);
599 work->dev = rt->dst.dev;
600 schedule_work(&work->work);
601 }
602
8d6c31bf 603out:
2152caea 604 rcu_read_unlock_bh();
27097255
YH
605}
606#else
607static inline void rt6_probe(struct rt6_info *rt)
608{
27097255
YH
609}
610#endif
611
1da177e4 612/*
554cfb7e 613 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 614 */
b6f99a21 615static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 616{
d1918542 617 struct net_device *dev = rt->dst.dev;
161980f4 618 if (!oif || dev->ifindex == oif)
554cfb7e 619 return 2;
161980f4
DM
620 if ((dev->flags & IFF_LOOPBACK) &&
621 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
622 return 1;
623 return 0;
554cfb7e 624}
1da177e4 625
afc154e9 626static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 627{
f2c31e32 628 struct neighbour *neigh;
afc154e9 629 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 630
4d0c5911
YH
631 if (rt->rt6i_flags & RTF_NONEXTHOP ||
632 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 633 return RT6_NUD_SUCCEED;
145a3621
YH
634
635 rcu_read_lock_bh();
636 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
637 if (neigh) {
638 read_lock(&neigh->lock);
554cfb7e 639 if (neigh->nud_state & NUD_VALID)
afc154e9 640 ret = RT6_NUD_SUCCEED;
398bcbeb 641#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 642 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 643 ret = RT6_NUD_SUCCEED;
7e980569
JB
644 else
645 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 646#endif
145a3621 647 read_unlock(&neigh->lock);
afc154e9
HFS
648 } else {
649 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 650 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 651 }
145a3621
YH
652 rcu_read_unlock_bh();
653
a5a81f0b 654 return ret;
1da177e4
LT
655}
656
554cfb7e
YH
657static int rt6_score_route(struct rt6_info *rt, int oif,
658 int strict)
1da177e4 659{
a5a81f0b 660 int m;
1ab1457c 661
4d0c5911 662 m = rt6_check_dev(rt, oif);
77d16f45 663 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 664 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
665#ifdef CONFIG_IPV6_ROUTER_PREF
666 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
667#endif
afc154e9
HFS
668 if (strict & RT6_LOOKUP_F_REACHABLE) {
669 int n = rt6_check_neigh(rt);
670 if (n < 0)
671 return n;
672 }
554cfb7e
YH
673 return m;
674}
675
f11e6659 676static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
677 int *mpri, struct rt6_info *match,
678 bool *do_rr)
554cfb7e 679{
f11e6659 680 int m;
afc154e9 681 bool match_do_rr = false;
35103d11
AG
682 struct inet6_dev *idev = rt->rt6i_idev;
683 struct net_device *dev = rt->dst.dev;
684
685 if (dev && !netif_carrier_ok(dev) &&
d5d32e4b
DA
686 idev->cnf.ignore_routes_with_linkdown &&
687 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 688 goto out;
f11e6659
DM
689
690 if (rt6_check_expired(rt))
691 goto out;
692
693 m = rt6_score_route(rt, oif, strict);
7e980569 694 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
695 match_do_rr = true;
696 m = 0; /* lowest valid score */
7e980569 697 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 698 goto out;
afc154e9
HFS
699 }
700
701 if (strict & RT6_LOOKUP_F_REACHABLE)
702 rt6_probe(rt);
f11e6659 703
7e980569 704 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 705 if (m > *mpri) {
afc154e9 706 *do_rr = match_do_rr;
f11e6659
DM
707 *mpri = m;
708 match = rt;
f11e6659 709 }
f11e6659
DM
710out:
711 return match;
712}
713
714static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
715 struct rt6_info *rr_head,
afc154e9
HFS
716 u32 metric, int oif, int strict,
717 bool *do_rr)
f11e6659 718{
9fbdcfaf 719 struct rt6_info *rt, *match, *cont;
554cfb7e 720 int mpri = -1;
1da177e4 721
f11e6659 722 match = NULL;
9fbdcfaf
SK
723 cont = NULL;
724 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
725 if (rt->rt6i_metric != metric) {
726 cont = rt;
727 break;
728 }
729
730 match = find_match(rt, oif, strict, &mpri, match, do_rr);
731 }
732
733 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
734 if (rt->rt6i_metric != metric) {
735 cont = rt;
736 break;
737 }
738
afc154e9 739 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
740 }
741
742 if (match || !cont)
743 return match;
744
745 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 746 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 747
f11e6659
DM
748 return match;
749}
1da177e4 750
f11e6659
DM
751static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
752{
753 struct rt6_info *match, *rt0;
8ed67789 754 struct net *net;
afc154e9 755 bool do_rr = false;
1da177e4 756
f11e6659
DM
757 rt0 = fn->rr_ptr;
758 if (!rt0)
759 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 760
afc154e9
HFS
761 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
762 &do_rr);
1da177e4 763
afc154e9 764 if (do_rr) {
d8d1f30b 765 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 766
554cfb7e 767 /* no entries matched; do round-robin */
f11e6659
DM
768 if (!next || next->rt6i_metric != rt0->rt6i_metric)
769 next = fn->leaf;
770
771 if (next != rt0)
772 fn->rr_ptr = next;
1da177e4 773 }
1da177e4 774
d1918542 775 net = dev_net(rt0->dst.dev);
a02cec21 776 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
777}
778
8b9df265
MKL
779static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
780{
781 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
782}
783
70ceb4f5
YH
784#ifdef CONFIG_IPV6_ROUTE_INFO
785int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 786 const struct in6_addr *gwaddr)
70ceb4f5 787{
c346dca1 788 struct net *net = dev_net(dev);
70ceb4f5
YH
789 struct route_info *rinfo = (struct route_info *) opt;
790 struct in6_addr prefix_buf, *prefix;
791 unsigned int pref;
4bed72e4 792 unsigned long lifetime;
70ceb4f5
YH
793 struct rt6_info *rt;
794
795 if (len < sizeof(struct route_info)) {
796 return -EINVAL;
797 }
798
799 /* Sanity check for prefix_len and length */
800 if (rinfo->length > 3) {
801 return -EINVAL;
802 } else if (rinfo->prefix_len > 128) {
803 return -EINVAL;
804 } else if (rinfo->prefix_len > 64) {
805 if (rinfo->length < 2) {
806 return -EINVAL;
807 }
808 } else if (rinfo->prefix_len > 0) {
809 if (rinfo->length < 1) {
810 return -EINVAL;
811 }
812 }
813
814 pref = rinfo->route_pref;
815 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 816 return -EINVAL;
70ceb4f5 817
4bed72e4 818 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
819
820 if (rinfo->length == 3)
821 prefix = (struct in6_addr *)rinfo->prefix;
822 else {
823 /* this function is safe */
824 ipv6_addr_prefix(&prefix_buf,
825 (struct in6_addr *)rinfo->prefix,
826 rinfo->prefix_len);
827 prefix = &prefix_buf;
828 }
829
f104a567
DJ
830 if (rinfo->prefix_len == 0)
831 rt = rt6_get_dflt_router(gwaddr, dev);
832 else
833 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 834 gwaddr, dev);
70ceb4f5
YH
835
836 if (rt && !lifetime) {
e0a1ad73 837 ip6_del_rt(rt);
70ceb4f5
YH
838 rt = NULL;
839 }
840
841 if (!rt && lifetime)
830218c1
DA
842 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
843 dev, pref);
70ceb4f5
YH
844 else if (rt)
845 rt->rt6i_flags = RTF_ROUTEINFO |
846 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
847
848 if (rt) {
1716a961
G
849 if (!addrconf_finite_timeout(lifetime))
850 rt6_clean_expires(rt);
851 else
852 rt6_set_expires(rt, jiffies + HZ * lifetime);
853
94e187c0 854 ip6_rt_put(rt);
70ceb4f5
YH
855 }
856 return 0;
857}
858#endif
859
a3c00e46
MKL
860static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
861 struct in6_addr *saddr)
862{
863 struct fib6_node *pn;
864 while (1) {
865 if (fn->fn_flags & RTN_TL_ROOT)
866 return NULL;
867 pn = fn->parent;
868 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
869 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
870 else
871 fn = pn;
872 if (fn->fn_flags & RTN_RTINFO)
873 return fn;
874 }
875}
c71099ac 876
8ed67789
DL
877static struct rt6_info *ip6_pol_route_lookup(struct net *net,
878 struct fib6_table *table,
4c9483b2 879 struct flowi6 *fl6, int flags)
1da177e4
LT
880{
881 struct fib6_node *fn;
882 struct rt6_info *rt;
883
c71099ac 884 read_lock_bh(&table->tb6_lock);
4c9483b2 885 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
886restart:
887 rt = fn->leaf;
4c9483b2 888 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 889 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 890 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
891 if (rt == net->ipv6.ip6_null_entry) {
892 fn = fib6_backtrack(fn, &fl6->saddr);
893 if (fn)
894 goto restart;
895 }
d8d1f30b 896 dst_use(&rt->dst, jiffies);
c71099ac 897 read_unlock_bh(&table->tb6_lock);
b811580d
DA
898
899 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
900
c71099ac
TG
901 return rt;
902
903}
904
67ba4152 905struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
906 int flags)
907{
908 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
909}
910EXPORT_SYMBOL_GPL(ip6_route_lookup);
911
9acd9f3a
YH
912struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
913 const struct in6_addr *saddr, int oif, int strict)
c71099ac 914{
4c9483b2
DM
915 struct flowi6 fl6 = {
916 .flowi6_oif = oif,
917 .daddr = *daddr,
c71099ac
TG
918 };
919 struct dst_entry *dst;
77d16f45 920 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 921
adaa70bb 922 if (saddr) {
4c9483b2 923 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
924 flags |= RT6_LOOKUP_F_HAS_SADDR;
925 }
926
4c9483b2 927 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
928 if (dst->error == 0)
929 return (struct rt6_info *) dst;
930
931 dst_release(dst);
932
1da177e4
LT
933 return NULL;
934}
7159039a
YH
935EXPORT_SYMBOL(rt6_lookup);
936
c71099ac 937/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
938 * It takes new route entry, the addition fails by any reason the
939 * route is released.
940 * Caller must hold dst before calling it.
1da177e4
LT
941 */
942
e5fd387a 943static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301
DA
944 struct mx6_config *mxc,
945 struct netlink_ext_ack *extack)
1da177e4
LT
946{
947 int err;
c71099ac 948 struct fib6_table *table;
1da177e4 949
c71099ac
TG
950 table = rt->rt6i_table;
951 write_lock_bh(&table->tb6_lock);
333c4301 952 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
c71099ac 953 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
954
955 return err;
956}
957
40e22e8f
TG
958int ip6_ins_rt(struct rt6_info *rt)
959{
e715b6d3
FW
960 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
961 struct mx6_config mxc = { .mx = NULL, };
962
1cfb71ee
WW
963 /* Hold dst to account for the reference from the fib6 tree */
964 dst_hold(&rt->dst);
333c4301 965 return __ip6_ins_rt(rt, &info, &mxc, NULL);
40e22e8f
TG
966}
967
4832c30d
DA
968/* called with rcu_lock held */
969static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
970{
971 struct net_device *dev = rt->dst.dev;
972
973 if (rt->rt6i_flags & RTF_LOCAL) {
974 /* for copies of local routes, dst->dev needs to be the
975 * device if it is a master device, the master device if
976 * device is enslaved, and the loopback as the default
977 */
978 if (netif_is_l3_slave(dev) &&
979 !rt6_need_strict(&rt->rt6i_dst.addr))
980 dev = l3mdev_master_dev_rcu(dev);
981 else if (!netif_is_l3_master(dev))
982 dev = dev_net(dev)->loopback_dev;
983 /* last case is netif_is_l3_master(dev) is true in which
984 * case we want dev returned to be dev
985 */
986 }
987
988 return dev;
989}
990
8b9df265
MKL
991static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
992 const struct in6_addr *daddr,
993 const struct in6_addr *saddr)
1da177e4 994{
4832c30d 995 struct net_device *dev;
1da177e4
LT
996 struct rt6_info *rt;
997
998 /*
999 * Clone the route.
1000 */
1001
d52d3997 1002 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 1003 ort = (struct rt6_info *)ort->dst.from;
1da177e4 1004
4832c30d
DA
1005 rcu_read_lock();
1006 dev = ip6_rt_get_dev_rcu(ort);
1007 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1008 rcu_read_unlock();
83a09abd
MKL
1009 if (!rt)
1010 return NULL;
1011
1012 ip6_rt_copy_init(rt, ort);
1013 rt->rt6i_flags |= RTF_CACHE;
1014 rt->rt6i_metric = 0;
1015 rt->dst.flags |= DST_HOST;
1016 rt->rt6i_dst.addr = *daddr;
1017 rt->rt6i_dst.plen = 128;
1da177e4 1018
83a09abd
MKL
1019 if (!rt6_is_gw_or_nonexthop(ort)) {
1020 if (ort->rt6i_dst.plen != 128 &&
1021 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1022 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1023#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1024 if (rt->rt6i_src.plen && saddr) {
1025 rt->rt6i_src.addr = *saddr;
1026 rt->rt6i_src.plen = 128;
8b9df265 1027 }
83a09abd 1028#endif
95a9a5ba 1029 }
1da177e4 1030
95a9a5ba
YH
1031 return rt;
1032}
1da177e4 1033
d52d3997
MKL
1034static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1035{
4832c30d 1036 struct net_device *dev;
d52d3997
MKL
1037 struct rt6_info *pcpu_rt;
1038
4832c30d
DA
1039 rcu_read_lock();
1040 dev = ip6_rt_get_dev_rcu(rt);
1041 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1042 rcu_read_unlock();
d52d3997
MKL
1043 if (!pcpu_rt)
1044 return NULL;
1045 ip6_rt_copy_init(pcpu_rt, rt);
1046 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1047 pcpu_rt->rt6i_flags |= RTF_PCPU;
1048 return pcpu_rt;
1049}
1050
1051/* It should be called with read_lock_bh(&tb6_lock) acquired */
1052static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1053{
a73e4195 1054 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1055
1056 p = this_cpu_ptr(rt->rt6i_pcpu);
1057 pcpu_rt = *p;
1058
a73e4195
MKL
1059 if (pcpu_rt) {
1060 dst_hold(&pcpu_rt->dst);
1061 rt6_dst_from_metrics_check(pcpu_rt);
1062 }
1063 return pcpu_rt;
1064}
1065
1066static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1067{
9c7370a1 1068 struct fib6_table *table = rt->rt6i_table;
a73e4195 1069 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1070
1071 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1072 if (!pcpu_rt) {
1073 struct net *net = dev_net(rt->dst.dev);
1074
9c7370a1
MKL
1075 dst_hold(&net->ipv6.ip6_null_entry->dst);
1076 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1077 }
1078
9c7370a1
MKL
1079 read_lock_bh(&table->tb6_lock);
1080 if (rt->rt6i_pcpu) {
1081 p = this_cpu_ptr(rt->rt6i_pcpu);
1082 prev = cmpxchg(p, NULL, pcpu_rt);
1083 if (prev) {
1084 /* If someone did it before us, return prev instead */
587fea74 1085 dst_release_immediate(&pcpu_rt->dst);
9c7370a1
MKL
1086 pcpu_rt = prev;
1087 }
1088 } else {
1089 /* rt has been removed from the fib6 tree
1090 * before we have a chance to acquire the read_lock.
1091 * In this case, don't brother to create a pcpu rt
1092 * since rt is going away anyway. The next
1093 * dst_check() will trigger a re-lookup.
1094 */
587fea74 1095 dst_release_immediate(&pcpu_rt->dst);
9c7370a1 1096 pcpu_rt = rt;
d52d3997 1097 }
d52d3997
MKL
1098 dst_hold(&pcpu_rt->dst);
1099 rt6_dst_from_metrics_check(pcpu_rt);
9c7370a1 1100 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1101 return pcpu_rt;
1102}
1103
35732d01
WW
1104/* exception hash table implementation
1105 */
1106static DEFINE_SPINLOCK(rt6_exception_lock);
1107
1108/* Remove rt6_ex from hash table and free the memory
1109 * Caller must hold rt6_exception_lock
1110 */
1111static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1112 struct rt6_exception *rt6_ex)
1113{
1114 if (!bucket || !rt6_ex)
1115 return;
1116 rt6_ex->rt6i->rt6i_node = NULL;
1117 hlist_del_rcu(&rt6_ex->hlist);
1118 rt6_release(rt6_ex->rt6i);
1119 kfree_rcu(rt6_ex, rcu);
1120 WARN_ON_ONCE(!bucket->depth);
1121 bucket->depth--;
1122}
1123
1124/* Remove oldest rt6_ex in bucket and free the memory
1125 * Caller must hold rt6_exception_lock
1126 */
1127static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1128{
1129 struct rt6_exception *rt6_ex, *oldest = NULL;
1130
1131 if (!bucket)
1132 return;
1133
1134 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1135 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1136 oldest = rt6_ex;
1137 }
1138 rt6_remove_exception(bucket, oldest);
1139}
1140
1141static u32 rt6_exception_hash(const struct in6_addr *dst,
1142 const struct in6_addr *src)
1143{
1144 static u32 seed __read_mostly;
1145 u32 val;
1146
1147 net_get_random_once(&seed, sizeof(seed));
1148 val = jhash(dst, sizeof(*dst), seed);
1149
1150#ifdef CONFIG_IPV6_SUBTREES
1151 if (src)
1152 val = jhash(src, sizeof(*src), val);
1153#endif
1154 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1155}
1156
1157/* Helper function to find the cached rt in the hash table
1158 * and update bucket pointer to point to the bucket for this
1159 * (daddr, saddr) pair
1160 * Caller must hold rt6_exception_lock
1161 */
1162static struct rt6_exception *
1163__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1164 const struct in6_addr *daddr,
1165 const struct in6_addr *saddr)
1166{
1167 struct rt6_exception *rt6_ex;
1168 u32 hval;
1169
1170 if (!(*bucket) || !daddr)
1171 return NULL;
1172
1173 hval = rt6_exception_hash(daddr, saddr);
1174 *bucket += hval;
1175
1176 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1177 struct rt6_info *rt6 = rt6_ex->rt6i;
1178 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1179
1180#ifdef CONFIG_IPV6_SUBTREES
1181 if (matched && saddr)
1182 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1183#endif
1184 if (matched)
1185 return rt6_ex;
1186 }
1187 return NULL;
1188}
1189
1190/* Helper function to find the cached rt in the hash table
1191 * and update bucket pointer to point to the bucket for this
1192 * (daddr, saddr) pair
1193 * Caller must hold rcu_read_lock()
1194 */
1195static struct rt6_exception *
1196__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1197 const struct in6_addr *daddr,
1198 const struct in6_addr *saddr)
1199{
1200 struct rt6_exception *rt6_ex;
1201 u32 hval;
1202
1203 WARN_ON_ONCE(!rcu_read_lock_held());
1204
1205 if (!(*bucket) || !daddr)
1206 return NULL;
1207
1208 hval = rt6_exception_hash(daddr, saddr);
1209 *bucket += hval;
1210
1211 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1212 struct rt6_info *rt6 = rt6_ex->rt6i;
1213 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1214
1215#ifdef CONFIG_IPV6_SUBTREES
1216 if (matched && saddr)
1217 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1218#endif
1219 if (matched)
1220 return rt6_ex;
1221 }
1222 return NULL;
1223}
1224
1225static int rt6_insert_exception(struct rt6_info *nrt,
1226 struct rt6_info *ort)
1227{
1228 struct rt6_exception_bucket *bucket;
1229 struct in6_addr *src_key = NULL;
1230 struct rt6_exception *rt6_ex;
1231 int err = 0;
1232
1233 /* ort can't be a cache or pcpu route */
1234 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
1235 ort = (struct rt6_info *)ort->dst.from;
1236 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1237
1238 spin_lock_bh(&rt6_exception_lock);
1239
1240 if (ort->exception_bucket_flushed) {
1241 err = -EINVAL;
1242 goto out;
1243 }
1244
1245 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1246 lockdep_is_held(&rt6_exception_lock));
1247 if (!bucket) {
1248 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1249 GFP_ATOMIC);
1250 if (!bucket) {
1251 err = -ENOMEM;
1252 goto out;
1253 }
1254 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1255 }
1256
1257#ifdef CONFIG_IPV6_SUBTREES
1258 /* rt6i_src.plen != 0 indicates ort is in subtree
1259 * and exception table is indexed by a hash of
1260 * both rt6i_dst and rt6i_src.
1261 * Otherwise, the exception table is indexed by
1262 * a hash of only rt6i_dst.
1263 */
1264 if (ort->rt6i_src.plen)
1265 src_key = &nrt->rt6i_src.addr;
1266#endif
60006a48
WW
1267
1268 /* Update rt6i_prefsrc as it could be changed
1269 * in rt6_remove_prefsrc()
1270 */
1271 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
f5bbe7ee
WW
1272 /* rt6_mtu_change() might lower mtu on ort.
1273 * Only insert this exception route if its mtu
1274 * is less than ort's mtu value.
1275 */
1276 if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
1277 err = -EINVAL;
1278 goto out;
1279 }
60006a48 1280
35732d01
WW
1281 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1282 src_key);
1283 if (rt6_ex)
1284 rt6_remove_exception(bucket, rt6_ex);
1285
1286 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1287 if (!rt6_ex) {
1288 err = -ENOMEM;
1289 goto out;
1290 }
1291 rt6_ex->rt6i = nrt;
1292 rt6_ex->stamp = jiffies;
1293 atomic_inc(&nrt->rt6i_ref);
1294 nrt->rt6i_node = ort->rt6i_node;
1295 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1296 bucket->depth++;
1297
1298 if (bucket->depth > FIB6_MAX_DEPTH)
1299 rt6_exception_remove_oldest(bucket);
1300
1301out:
1302 spin_unlock_bh(&rt6_exception_lock);
1303
1304 /* Update fn->fn_sernum to invalidate all cached dst */
1305 if (!err)
1306 fib6_update_sernum(ort);
1307
1308 return err;
1309}
1310
1311void rt6_flush_exceptions(struct rt6_info *rt)
1312{
1313 struct rt6_exception_bucket *bucket;
1314 struct rt6_exception *rt6_ex;
1315 struct hlist_node *tmp;
1316 int i;
1317
1318 spin_lock_bh(&rt6_exception_lock);
1319 /* Prevent rt6_insert_exception() to recreate the bucket list */
1320 rt->exception_bucket_flushed = 1;
1321
1322 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1323 lockdep_is_held(&rt6_exception_lock));
1324 if (!bucket)
1325 goto out;
1326
1327 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1328 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1329 rt6_remove_exception(bucket, rt6_ex);
1330 WARN_ON_ONCE(bucket->depth);
1331 bucket++;
1332 }
1333
1334out:
1335 spin_unlock_bh(&rt6_exception_lock);
1336}
1337
1338/* Find cached rt in the hash table inside passed in rt
1339 * Caller has to hold rcu_read_lock()
1340 */
1341static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1342 struct in6_addr *daddr,
1343 struct in6_addr *saddr)
1344{
1345 struct rt6_exception_bucket *bucket;
1346 struct in6_addr *src_key = NULL;
1347 struct rt6_exception *rt6_ex;
1348 struct rt6_info *res = NULL;
1349
1350 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1351
1352#ifdef CONFIG_IPV6_SUBTREES
1353 /* rt6i_src.plen != 0 indicates rt is in subtree
1354 * and exception table is indexed by a hash of
1355 * both rt6i_dst and rt6i_src.
1356 * Otherwise, the exception table is indexed by
1357 * a hash of only rt6i_dst.
1358 */
1359 if (rt->rt6i_src.plen)
1360 src_key = saddr;
1361#endif
1362 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1363
1364 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1365 res = rt6_ex->rt6i;
1366
1367 return res;
1368}
1369
1370/* Remove the passed in cached rt from the hash table that contains it */
1371int rt6_remove_exception_rt(struct rt6_info *rt)
1372{
1373 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1374 struct rt6_exception_bucket *bucket;
1375 struct in6_addr *src_key = NULL;
1376 struct rt6_exception *rt6_ex;
1377 int err;
1378
1379 if (!from ||
1380 !(rt->rt6i_flags | RTF_CACHE))
1381 return -EINVAL;
1382
1383 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1384 return -ENOENT;
1385
1386 spin_lock_bh(&rt6_exception_lock);
1387 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1388 lockdep_is_held(&rt6_exception_lock));
1389#ifdef CONFIG_IPV6_SUBTREES
1390 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1391 * and exception table is indexed by a hash of
1392 * both rt6i_dst and rt6i_src.
1393 * Otherwise, the exception table is indexed by
1394 * a hash of only rt6i_dst.
1395 */
1396 if (from->rt6i_src.plen)
1397 src_key = &rt->rt6i_src.addr;
1398#endif
1399 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1400 &rt->rt6i_dst.addr,
1401 src_key);
1402 if (rt6_ex) {
1403 rt6_remove_exception(bucket, rt6_ex);
1404 err = 0;
1405 } else {
1406 err = -ENOENT;
1407 }
1408
1409 spin_unlock_bh(&rt6_exception_lock);
1410 return err;
1411}
1412
1413/* Find rt6_ex which contains the passed in rt cache and
1414 * refresh its stamp
1415 */
1416static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1417{
1418 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1419 struct rt6_exception_bucket *bucket;
1420 struct in6_addr *src_key = NULL;
1421 struct rt6_exception *rt6_ex;
1422
1423 if (!from ||
1424 !(rt->rt6i_flags | RTF_CACHE))
1425 return;
1426
1427 rcu_read_lock();
1428 bucket = rcu_dereference(from->rt6i_exception_bucket);
1429
1430#ifdef CONFIG_IPV6_SUBTREES
1431 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1432 * and exception table is indexed by a hash of
1433 * both rt6i_dst and rt6i_src.
1434 * Otherwise, the exception table is indexed by
1435 * a hash of only rt6i_dst.
1436 */
1437 if (from->rt6i_src.plen)
1438 src_key = &rt->rt6i_src.addr;
1439#endif
1440 rt6_ex = __rt6_find_exception_rcu(&bucket,
1441 &rt->rt6i_dst.addr,
1442 src_key);
1443 if (rt6_ex)
1444 rt6_ex->stamp = jiffies;
1445
1446 rcu_read_unlock();
1447}
1448
60006a48
WW
1449static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1450{
1451 struct rt6_exception_bucket *bucket;
1452 struct rt6_exception *rt6_ex;
1453 int i;
1454
1455 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1456 lockdep_is_held(&rt6_exception_lock));
1457
1458 if (bucket) {
1459 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1460 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1461 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1462 }
1463 bucket++;
1464 }
1465 }
1466}
1467
f5bbe7ee
WW
1468static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
1469{
1470 struct rt6_exception_bucket *bucket;
1471 struct rt6_exception *rt6_ex;
1472 int i;
1473
1474 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1475 lockdep_is_held(&rt6_exception_lock));
1476
1477 if (bucket) {
1478 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1479 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1480 struct rt6_info *entry = rt6_ex->rt6i;
1481 /* For RTF_CACHE with rt6i_pmtu == 0
1482 * (i.e. a redirected route),
1483 * the metrics of its rt->dst.from has already
1484 * been updated.
1485 */
1486 if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
1487 entry->rt6i_pmtu = mtu;
1488 }
1489 bucket++;
1490 }
1491 }
1492}
1493
b16cb459
WW
1494#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1495
1496static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1497 struct in6_addr *gateway)
1498{
1499 struct rt6_exception_bucket *bucket;
1500 struct rt6_exception *rt6_ex;
1501 struct hlist_node *tmp;
1502 int i;
1503
1504 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1505 return;
1506
1507 spin_lock_bh(&rt6_exception_lock);
1508 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1509 lockdep_is_held(&rt6_exception_lock));
1510
1511 if (bucket) {
1512 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1513 hlist_for_each_entry_safe(rt6_ex, tmp,
1514 &bucket->chain, hlist) {
1515 struct rt6_info *entry = rt6_ex->rt6i;
1516
1517 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1518 RTF_CACHE_GATEWAY &&
1519 ipv6_addr_equal(gateway,
1520 &entry->rt6i_gateway)) {
1521 rt6_remove_exception(bucket, rt6_ex);
1522 }
1523 }
1524 bucket++;
1525 }
1526 }
1527
1528 spin_unlock_bh(&rt6_exception_lock);
1529}
1530
9ff74384
DA
1531struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1532 int oif, struct flowi6 *fl6, int flags)
1da177e4 1533{
367efcb9 1534 struct fib6_node *fn, *saved_fn;
45e4fd26 1535 struct rt6_info *rt;
c71099ac 1536 int strict = 0;
1da177e4 1537
77d16f45 1538 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1539 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1540 if (net->ipv6.devconf_all->forwarding == 0)
1541 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1542
c71099ac 1543 read_lock_bh(&table->tb6_lock);
1da177e4 1544
4c9483b2 1545 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1546 saved_fn = fn;
1da177e4 1547
ca254490
DA
1548 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1549 oif = 0;
1550
a3c00e46 1551redo_rt6_select:
367efcb9 1552 rt = rt6_select(fn, oif, strict);
52bd4c0c 1553 if (rt->rt6i_nsiblings)
367efcb9 1554 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1555 if (rt == net->ipv6.ip6_null_entry) {
1556 fn = fib6_backtrack(fn, &fl6->saddr);
1557 if (fn)
1558 goto redo_rt6_select;
367efcb9
MKL
1559 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1560 /* also consider unreachable route */
1561 strict &= ~RT6_LOOKUP_F_REACHABLE;
1562 fn = saved_fn;
1563 goto redo_rt6_select;
367efcb9 1564 }
a3c00e46
MKL
1565 }
1566
fb9de91e 1567
3da59bd9 1568 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1569 dst_use(&rt->dst, jiffies);
1570 read_unlock_bh(&table->tb6_lock);
1571
1572 rt6_dst_from_metrics_check(rt);
b811580d
DA
1573
1574 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
d52d3997 1575 return rt;
3da59bd9
MKL
1576 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1577 !(rt->rt6i_flags & RTF_GATEWAY))) {
1578 /* Create a RTF_CACHE clone which will not be
1579 * owned by the fib6 tree. It is for the special case where
1580 * the daddr in the skb during the neighbor look-up is different
1581 * from the fl6->daddr used to look-up route here.
1582 */
1583
1584 struct rt6_info *uncached_rt;
1585
d52d3997
MKL
1586 dst_use(&rt->dst, jiffies);
1587 read_unlock_bh(&table->tb6_lock);
1588
3da59bd9
MKL
1589 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1590 dst_release(&rt->dst);
c71099ac 1591
1cfb71ee
WW
1592 if (uncached_rt) {
1593 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1594 * No need for another dst_hold()
1595 */
8d0b94af 1596 rt6_uncached_list_add(uncached_rt);
1cfb71ee 1597 } else {
3da59bd9 1598 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1599 dst_hold(&uncached_rt->dst);
1600 }
b811580d
DA
1601
1602 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
3da59bd9 1603 return uncached_rt;
3da59bd9 1604
d52d3997
MKL
1605 } else {
1606 /* Get a percpu copy */
1607
1608 struct rt6_info *pcpu_rt;
1609
1610 rt->dst.lastuse = jiffies;
1611 rt->dst.__use++;
1612 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1613
9c7370a1
MKL
1614 if (pcpu_rt) {
1615 read_unlock_bh(&table->tb6_lock);
1616 } else {
1617 /* We have to do the read_unlock first
1618 * because rt6_make_pcpu_route() may trigger
1619 * ip6_dst_gc() which will take the write_lock.
1620 */
1621 dst_hold(&rt->dst);
1622 read_unlock_bh(&table->tb6_lock);
a73e4195 1623 pcpu_rt = rt6_make_pcpu_route(rt);
9c7370a1
MKL
1624 dst_release(&rt->dst);
1625 }
d52d3997 1626
b811580d 1627 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
d52d3997 1628 return pcpu_rt;
9c7370a1 1629
d52d3997 1630 }
1da177e4 1631}
9ff74384 1632EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1633
8ed67789 1634static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1635 struct flowi6 *fl6, int flags)
4acad72d 1636{
4c9483b2 1637 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1638}
1639
d409b847
MB
1640struct dst_entry *ip6_route_input_lookup(struct net *net,
1641 struct net_device *dev,
1642 struct flowi6 *fl6, int flags)
72331bc0
SL
1643{
1644 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1645 flags |= RT6_LOOKUP_F_IFACE;
1646
1647 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1648}
d409b847 1649EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1650
23aebdac
JS
1651static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1652 struct flow_keys *keys)
1653{
1654 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1655 const struct ipv6hdr *key_iph = outer_iph;
1656 const struct ipv6hdr *inner_iph;
1657 const struct icmp6hdr *icmph;
1658 struct ipv6hdr _inner_iph;
1659
1660 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1661 goto out;
1662
1663 icmph = icmp6_hdr(skb);
1664 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1665 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1666 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1667 icmph->icmp6_type != ICMPV6_PARAMPROB)
1668 goto out;
1669
1670 inner_iph = skb_header_pointer(skb,
1671 skb_transport_offset(skb) + sizeof(*icmph),
1672 sizeof(_inner_iph), &_inner_iph);
1673 if (!inner_iph)
1674 goto out;
1675
1676 key_iph = inner_iph;
1677out:
1678 memset(keys, 0, sizeof(*keys));
1679 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1680 keys->addrs.v6addrs.src = key_iph->saddr;
1681 keys->addrs.v6addrs.dst = key_iph->daddr;
1682 keys->tags.flow_label = ip6_flowinfo(key_iph);
1683 keys->basic.ip_proto = key_iph->nexthdr;
1684}
1685
1686/* if skb is set it will be used and fl6 can be NULL */
1687u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1688{
1689 struct flow_keys hash_keys;
1690
1691 if (skb) {
1692 ip6_multipath_l3_keys(skb, &hash_keys);
1693 return flow_hash_from_keys(&hash_keys);
1694 }
1695
1696 return get_hash_from_flowi6(fl6);
1697}
1698
c71099ac
TG
1699void ip6_route_input(struct sk_buff *skb)
1700{
b71d1d42 1701 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1702 struct net *net = dev_net(skb->dev);
adaa70bb 1703 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1704 struct ip_tunnel_info *tun_info;
4c9483b2 1705 struct flowi6 fl6 = {
e0d56fdd 1706 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
1707 .daddr = iph->daddr,
1708 .saddr = iph->saddr,
6502ca52 1709 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1710 .flowi6_mark = skb->mark,
1711 .flowi6_proto = iph->nexthdr,
c71099ac 1712 };
adaa70bb 1713
904af04d 1714 tun_info = skb_tunnel_info(skb);
46fa062a 1715 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1716 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
23aebdac
JS
1717 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1718 fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
06e9d040 1719 skb_dst_drop(skb);
72331bc0 1720 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1721}
1722
8ed67789 1723static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1724 struct flowi6 *fl6, int flags)
1da177e4 1725{
4c9483b2 1726 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1727}
1728
6f21c96a
PA
1729struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1730 struct flowi6 *fl6, int flags)
c71099ac 1731{
d46a9d67 1732 bool any_src;
c71099ac 1733
4c1feac5
DA
1734 if (rt6_need_strict(&fl6->daddr)) {
1735 struct dst_entry *dst;
1736
1737 dst = l3mdev_link_scope_lookup(net, fl6);
1738 if (dst)
1739 return dst;
1740 }
ca254490 1741
1fb9489b 1742 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1743
d46a9d67 1744 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1745 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1746 (fl6->flowi6_oif && any_src))
77d16f45 1747 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1748
d46a9d67 1749 if (!any_src)
adaa70bb 1750 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1751 else if (sk)
1752 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1753
4c9483b2 1754 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1755}
6f21c96a 1756EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1757
2774c131 1758struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1759{
5c1e6aa3 1760 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 1761 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
1762 struct dst_entry *new = NULL;
1763
1dbe3252 1764 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
b2a9c0ed 1765 DST_OBSOLETE_NONE, 0);
14e50e57 1766 if (rt) {
0a1f5962 1767 rt6_info_init(rt);
8104891b 1768
0a1f5962 1769 new = &rt->dst;
14e50e57 1770 new->__use = 1;
352e512c 1771 new->input = dst_discard;
ede2059d 1772 new->output = dst_discard_out;
14e50e57 1773
0a1f5962 1774 dst_copy_metrics(new, &ort->dst);
14e50e57 1775
1dbe3252 1776 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 1777 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1778 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1779 rt->rt6i_metric = 0;
1780
1781 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1782#ifdef CONFIG_IPV6_SUBTREES
1783 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1784#endif
14e50e57
DM
1785 }
1786
69ead7af
DM
1787 dst_release(dst_orig);
1788 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1789}
14e50e57 1790
1da177e4
LT
1791/*
1792 * Destination cache support functions
1793 */
1794
4b32b5ad
MKL
1795static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1796{
1797 if (rt->dst.from &&
1798 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1799 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1800}
1801
3da59bd9
MKL
1802static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1803{
36143645 1804 u32 rt_cookie = 0;
c5cff856
WW
1805
1806 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
3da59bd9
MKL
1807 return NULL;
1808
1809 if (rt6_check_expired(rt))
1810 return NULL;
1811
1812 return &rt->dst;
1813}
1814
1815static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1816{
5973fb1e
MKL
1817 if (!__rt6_check_expired(rt) &&
1818 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3da59bd9
MKL
1819 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1820 return &rt->dst;
1821 else
1822 return NULL;
1823}
1824
1da177e4
LT
1825static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1826{
1827 struct rt6_info *rt;
1828
1829 rt = (struct rt6_info *) dst;
1830
6f3118b5
ND
1831 /* All IPV6 dsts are created with ->obsolete set to the value
1832 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1833 * into this function always.
1834 */
e3bc10bd 1835
4b32b5ad
MKL
1836 rt6_dst_from_metrics_check(rt);
1837
02bcf4e0 1838 if (rt->rt6i_flags & RTF_PCPU ||
a4c2fd7f 1839 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
3da59bd9
MKL
1840 return rt6_dst_from_check(rt, cookie);
1841 else
1842 return rt6_check(rt, cookie);
1da177e4
LT
1843}
1844
1845static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1846{
1847 struct rt6_info *rt = (struct rt6_info *) dst;
1848
1849 if (rt) {
54c1a859
YH
1850 if (rt->rt6i_flags & RTF_CACHE) {
1851 if (rt6_check_expired(rt)) {
1852 ip6_del_rt(rt);
1853 dst = NULL;
1854 }
1855 } else {
1da177e4 1856 dst_release(dst);
54c1a859
YH
1857 dst = NULL;
1858 }
1da177e4 1859 }
54c1a859 1860 return dst;
1da177e4
LT
1861}
1862
1863static void ip6_link_failure(struct sk_buff *skb)
1864{
1865 struct rt6_info *rt;
1866
3ffe533c 1867 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1868
adf30907 1869 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1870 if (rt) {
1eb4f758 1871 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0
WW
1872 if (dst_hold_safe(&rt->dst))
1873 ip6_del_rt(rt);
c5cff856
WW
1874 } else {
1875 struct fib6_node *fn;
1876
1877 rcu_read_lock();
1878 fn = rcu_dereference(rt->rt6i_node);
1879 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
1880 fn->fn_sernum = -1;
1881 rcu_read_unlock();
1eb4f758 1882 }
1da177e4
LT
1883 }
1884}
1885
45e4fd26
MKL
1886static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1887{
1888 struct net *net = dev_net(rt->dst.dev);
1889
1890 rt->rt6i_flags |= RTF_MODIFIED;
1891 rt->rt6i_pmtu = mtu;
1892 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1893}
1894
0d3f6d29
MKL
1895static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1896{
1897 return !(rt->rt6i_flags & RTF_CACHE) &&
4e587ea7
WW
1898 (rt->rt6i_flags & RTF_PCPU ||
1899 rcu_access_pointer(rt->rt6i_node));
0d3f6d29
MKL
1900}
1901
45e4fd26
MKL
1902static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1903 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1904{
0dec879f 1905 const struct in6_addr *daddr, *saddr;
67ba4152 1906 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1907
45e4fd26
MKL
1908 if (rt6->rt6i_flags & RTF_LOCAL)
1909 return;
81aded24 1910
19bda36c
XL
1911 if (dst_metric_locked(dst, RTAX_MTU))
1912 return;
1913
0dec879f
JA
1914 if (iph) {
1915 daddr = &iph->daddr;
1916 saddr = &iph->saddr;
1917 } else if (sk) {
1918 daddr = &sk->sk_v6_daddr;
1919 saddr = &inet6_sk(sk)->saddr;
1920 } else {
1921 daddr = NULL;
1922 saddr = NULL;
1923 }
1924 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
1925 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1926 if (mtu >= dst_mtu(dst))
1927 return;
9d289715 1928
0d3f6d29 1929 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 1930 rt6_do_update_pmtu(rt6, mtu);
0dec879f 1931 } else if (daddr) {
45e4fd26
MKL
1932 struct rt6_info *nrt6;
1933
45e4fd26
MKL
1934 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1935 if (nrt6) {
1936 rt6_do_update_pmtu(nrt6, mtu);
1937
1938 /* ip6_ins_rt(nrt6) will bump the
1939 * rt6->rt6i_node->fn_sernum
1940 * which will fail the next rt6_check() and
1941 * invalidate the sk->sk_dst_cache.
1942 */
1943 ip6_ins_rt(nrt6);
1cfb71ee
WW
1944 /* Release the reference taken in
1945 * ip6_rt_cache_alloc()
1946 */
1947 dst_release(&nrt6->dst);
45e4fd26 1948 }
1da177e4
LT
1949 }
1950}
1951
45e4fd26
MKL
1952static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1953 struct sk_buff *skb, u32 mtu)
1954{
1955 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1956}
1957
42ae66c8 1958void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 1959 int oif, u32 mark, kuid_t uid)
81aded24
DM
1960{
1961 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1962 struct dst_entry *dst;
1963 struct flowi6 fl6;
1964
1965 memset(&fl6, 0, sizeof(fl6));
1966 fl6.flowi6_oif = oif;
1b3c61dc 1967 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1968 fl6.daddr = iph->daddr;
1969 fl6.saddr = iph->saddr;
6502ca52 1970 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 1971 fl6.flowi6_uid = uid;
81aded24
DM
1972
1973 dst = ip6_route_output(net, NULL, &fl6);
1974 if (!dst->error)
45e4fd26 1975 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1976 dst_release(dst);
1977}
1978EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1979
1980void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1981{
33c162a9
MKL
1982 struct dst_entry *dst;
1983
81aded24 1984 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 1985 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
1986
1987 dst = __sk_dst_get(sk);
1988 if (!dst || !dst->obsolete ||
1989 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1990 return;
1991
1992 bh_lock_sock(sk);
1993 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1994 ip6_datagram_dst_update(sk, false);
1995 bh_unlock_sock(sk);
81aded24
DM
1996}
1997EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1998
b55b76b2
DJ
1999/* Handle redirects */
2000struct ip6rd_flowi {
2001 struct flowi6 fl6;
2002 struct in6_addr gateway;
2003};
2004
2005static struct rt6_info *__ip6_route_redirect(struct net *net,
2006 struct fib6_table *table,
2007 struct flowi6 *fl6,
2008 int flags)
2009{
2010 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2011 struct rt6_info *rt;
2012 struct fib6_node *fn;
2013
2014 /* Get the "current" route for this destination and
67c408cf 2015 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2016 *
2017 * RFC 4861 specifies that redirects should only be
2018 * accepted if they come from the nexthop to the target.
2019 * Due to the way the routes are chosen, this notion
2020 * is a bit fuzzy and one might need to check all possible
2021 * routes.
2022 */
2023
2024 read_lock_bh(&table->tb6_lock);
2025 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2026restart:
2027 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2028 if (rt6_check_expired(rt))
2029 continue;
2030 if (rt->dst.error)
2031 break;
2032 if (!(rt->rt6i_flags & RTF_GATEWAY))
2033 continue;
2034 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
2035 continue;
2036 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
2037 continue;
2038 break;
2039 }
2040
2041 if (!rt)
2042 rt = net->ipv6.ip6_null_entry;
2043 else if (rt->dst.error) {
2044 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2045 goto out;
2046 }
2047
2048 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
2049 fn = fib6_backtrack(fn, &fl6->saddr);
2050 if (fn)
2051 goto restart;
b55b76b2 2052 }
a3c00e46 2053
b0a1ba59 2054out:
b55b76b2
DJ
2055 dst_hold(&rt->dst);
2056
2057 read_unlock_bh(&table->tb6_lock);
2058
b811580d 2059 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
b55b76b2
DJ
2060 return rt;
2061};
2062
2063static struct dst_entry *ip6_route_redirect(struct net *net,
2064 const struct flowi6 *fl6,
2065 const struct in6_addr *gateway)
2066{
2067 int flags = RT6_LOOKUP_F_HAS_SADDR;
2068 struct ip6rd_flowi rdfl;
2069
2070 rdfl.fl6 = *fl6;
2071 rdfl.gateway = *gateway;
2072
2073 return fib6_rule_lookup(net, &rdfl.fl6,
2074 flags, __ip6_route_redirect);
2075}
2076
e2d118a1
LC
2077void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2078 kuid_t uid)
3a5ad2ee
DM
2079{
2080 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2081 struct dst_entry *dst;
2082 struct flowi6 fl6;
2083
2084 memset(&fl6, 0, sizeof(fl6));
e374c618 2085 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2086 fl6.flowi6_oif = oif;
2087 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2088 fl6.daddr = iph->daddr;
2089 fl6.saddr = iph->saddr;
6502ca52 2090 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2091 fl6.flowi6_uid = uid;
3a5ad2ee 2092
b55b76b2
DJ
2093 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
2094 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2095 dst_release(dst);
2096}
2097EXPORT_SYMBOL_GPL(ip6_redirect);
2098
c92a59ec
DJ
2099void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2100 u32 mark)
2101{
2102 const struct ipv6hdr *iph = ipv6_hdr(skb);
2103 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2104 struct dst_entry *dst;
2105 struct flowi6 fl6;
2106
2107 memset(&fl6, 0, sizeof(fl6));
e374c618 2108 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2109 fl6.flowi6_oif = oif;
2110 fl6.flowi6_mark = mark;
c92a59ec
DJ
2111 fl6.daddr = msg->dest;
2112 fl6.saddr = iph->daddr;
e2d118a1 2113 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2114
b55b76b2
DJ
2115 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
2116 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2117 dst_release(dst);
2118}
2119
3a5ad2ee
DM
2120void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2121{
e2d118a1
LC
2122 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2123 sk->sk_uid);
3a5ad2ee
DM
2124}
2125EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2126
0dbaee3b 2127static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2128{
0dbaee3b
DM
2129 struct net_device *dev = dst->dev;
2130 unsigned int mtu = dst_mtu(dst);
2131 struct net *net = dev_net(dev);
2132
1da177e4
LT
2133 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2134
5578689a
DL
2135 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2136 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2137
2138 /*
1ab1457c
YH
2139 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2140 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2141 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2142 * rely only on pmtu discovery"
2143 */
2144 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2145 mtu = IPV6_MAXPLEN;
2146 return mtu;
2147}
2148
ebb762f2 2149static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2150{
4b32b5ad
MKL
2151 const struct rt6_info *rt = (const struct rt6_info *)dst;
2152 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 2153 struct inet6_dev *idev;
618f9bc7 2154
4b32b5ad
MKL
2155 if (mtu)
2156 goto out;
2157
2158 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2159 if (mtu)
30f78d8e 2160 goto out;
618f9bc7
SK
2161
2162 mtu = IPV6_MIN_MTU;
d33e4553
DM
2163
2164 rcu_read_lock();
2165 idev = __in6_dev_get(dst->dev);
2166 if (idev)
2167 mtu = idev->cnf.mtu6;
2168 rcu_read_unlock();
2169
30f78d8e 2170out:
14972cbd
RP
2171 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2172
2173 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2174}
2175
3b00944c 2176struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2177 struct flowi6 *fl6)
1da177e4 2178{
87a11578 2179 struct dst_entry *dst;
1da177e4
LT
2180 struct rt6_info *rt;
2181 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2182 struct net *net = dev_net(dev);
1da177e4 2183
38308473 2184 if (unlikely(!idev))
122bdf67 2185 return ERR_PTR(-ENODEV);
1da177e4 2186
ad706862 2187 rt = ip6_dst_alloc(net, dev, 0);
38308473 2188 if (unlikely(!rt)) {
1da177e4 2189 in6_dev_put(idev);
87a11578 2190 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2191 goto out;
2192 }
2193
8e2ec639
YZ
2194 rt->dst.flags |= DST_HOST;
2195 rt->dst.output = ip6_output;
550bab42 2196 rt->rt6i_gateway = fl6->daddr;
87a11578 2197 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2198 rt->rt6i_dst.plen = 128;
2199 rt->rt6i_idev = idev;
14edd87d 2200 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2201
587fea74
WW
2202 /* Add this dst into uncached_list so that rt6_ifdown() can
2203 * do proper release of the net_device
2204 */
2205 rt6_uncached_list_add(rt);
1da177e4 2206
87a11578
DM
2207 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2208
1da177e4 2209out:
87a11578 2210 return dst;
1da177e4
LT
2211}
2212
569d3645 2213static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2214{
86393e52 2215 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2216 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2217 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2218 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2219 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2220 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2221 int entries;
7019b78e 2222
fc66f95c 2223 entries = dst_entries_get_fast(ops);
49a18d86 2224 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2225 entries <= rt_max_size)
1da177e4
LT
2226 goto out;
2227
6891a346 2228 net->ipv6.ip6_rt_gc_expire++;
14956643 2229 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2230 entries = dst_entries_get_slow(ops);
2231 if (entries < ops->gc_thresh)
7019b78e 2232 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2233out:
7019b78e 2234 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2235 return entries > rt_max_size;
1da177e4
LT
2236}
2237
e715b6d3
FW
2238static int ip6_convert_metrics(struct mx6_config *mxc,
2239 const struct fib6_config *cfg)
2240{
c3a8d947 2241 bool ecn_ca = false;
e715b6d3
FW
2242 struct nlattr *nla;
2243 int remaining;
2244 u32 *mp;
2245
63159f29 2246 if (!cfg->fc_mx)
e715b6d3
FW
2247 return 0;
2248
2249 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
2250 if (unlikely(!mp))
2251 return -ENOMEM;
2252
2253 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
2254 int type = nla_type(nla);
1bb14807 2255 u32 val;
e715b6d3 2256
1bb14807
DB
2257 if (!type)
2258 continue;
2259 if (unlikely(type > RTAX_MAX))
2260 goto err;
ea697639 2261
1bb14807
DB
2262 if (type == RTAX_CC_ALGO) {
2263 char tmp[TCP_CA_NAME_MAX];
e715b6d3 2264
1bb14807 2265 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 2266 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
2267 if (val == TCP_CA_UNSPEC)
2268 goto err;
2269 } else {
2270 val = nla_get_u32(nla);
e715b6d3 2271 }
626abd59
PA
2272 if (type == RTAX_HOPLIMIT && val > 255)
2273 val = 255;
b8d3e416
DB
2274 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
2275 goto err;
1bb14807
DB
2276
2277 mp[type - 1] = val;
2278 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
2279 }
2280
c3a8d947
DB
2281 if (ecn_ca) {
2282 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
2283 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
2284 }
e715b6d3 2285
c3a8d947 2286 mxc->mx = mp;
e715b6d3
FW
2287 return 0;
2288 err:
2289 kfree(mp);
2290 return -EINVAL;
2291}
1da177e4 2292
8c14586f
DA
2293static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2294 struct fib6_config *cfg,
2295 const struct in6_addr *gw_addr)
2296{
2297 struct flowi6 fl6 = {
2298 .flowi6_oif = cfg->fc_ifindex,
2299 .daddr = *gw_addr,
2300 .saddr = cfg->fc_prefsrc,
2301 };
2302 struct fib6_table *table;
2303 struct rt6_info *rt;
d5d32e4b 2304 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
8c14586f
DA
2305
2306 table = fib6_get_table(net, cfg->fc_table);
2307 if (!table)
2308 return NULL;
2309
2310 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2311 flags |= RT6_LOOKUP_F_HAS_SADDR;
2312
2313 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
2314
2315 /* if table lookup failed, fall back to full lookup */
2316 if (rt == net->ipv6.ip6_null_entry) {
2317 ip6_rt_put(rt);
2318 rt = NULL;
2319 }
2320
2321 return rt;
2322}
2323
333c4301
DA
2324static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2325 struct netlink_ext_ack *extack)
1da177e4 2326{
5578689a 2327 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
2328 struct rt6_info *rt = NULL;
2329 struct net_device *dev = NULL;
2330 struct inet6_dev *idev = NULL;
c71099ac 2331 struct fib6_table *table;
1da177e4 2332 int addr_type;
8c5b83f0 2333 int err = -EINVAL;
1da177e4 2334
557c44be 2335 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2336 if (cfg->fc_flags & RTF_PCPU) {
2337 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2338 goto out;
d5d531cb 2339 }
557c44be 2340
d5d531cb
DA
2341 if (cfg->fc_dst_len > 128) {
2342 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2343 goto out;
2344 }
2345 if (cfg->fc_src_len > 128) {
2346 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2347 goto out;
d5d531cb 2348 }
1da177e4 2349#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2350 if (cfg->fc_src_len) {
2351 NL_SET_ERR_MSG(extack,
2352 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2353 goto out;
d5d531cb 2354 }
1da177e4 2355#endif
86872cb5 2356 if (cfg->fc_ifindex) {
1da177e4 2357 err = -ENODEV;
5578689a 2358 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2359 if (!dev)
2360 goto out;
2361 idev = in6_dev_get(dev);
2362 if (!idev)
2363 goto out;
2364 }
2365
86872cb5
TG
2366 if (cfg->fc_metric == 0)
2367 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2368
d71314b4 2369 err = -ENOBUFS;
38308473
DM
2370 if (cfg->fc_nlinfo.nlh &&
2371 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2372 table = fib6_get_table(net, cfg->fc_table);
38308473 2373 if (!table) {
f3213831 2374 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2375 table = fib6_new_table(net, cfg->fc_table);
2376 }
2377 } else {
2378 table = fib6_new_table(net, cfg->fc_table);
2379 }
38308473
DM
2380
2381 if (!table)
c71099ac 2382 goto out;
c71099ac 2383
ad706862
MKL
2384 rt = ip6_dst_alloc(net, NULL,
2385 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 2386
38308473 2387 if (!rt) {
1da177e4
LT
2388 err = -ENOMEM;
2389 goto out;
2390 }
2391
1716a961
G
2392 if (cfg->fc_flags & RTF_EXPIRES)
2393 rt6_set_expires(rt, jiffies +
2394 clock_t_to_jiffies(cfg->fc_expires));
2395 else
2396 rt6_clean_expires(rt);
1da177e4 2397
86872cb5
TG
2398 if (cfg->fc_protocol == RTPROT_UNSPEC)
2399 cfg->fc_protocol = RTPROT_BOOT;
2400 rt->rt6i_protocol = cfg->fc_protocol;
2401
2402 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
2403
2404 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 2405 rt->dst.input = ip6_mc_input;
ab79ad14
2406 else if (cfg->fc_flags & RTF_LOCAL)
2407 rt->dst.input = ip6_input;
1da177e4 2408 else
d8d1f30b 2409 rt->dst.input = ip6_forward;
1da177e4 2410
d8d1f30b 2411 rt->dst.output = ip6_output;
1da177e4 2412
19e42e45
RP
2413 if (cfg->fc_encap) {
2414 struct lwtunnel_state *lwtstate;
2415
30357d7d 2416 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2417 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2418 &lwtstate, extack);
19e42e45
RP
2419 if (err)
2420 goto out;
61adedf3
JB
2421 rt->dst.lwtstate = lwtstate_get(lwtstate);
2422 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
2423 rt->dst.lwtstate->orig_output = rt->dst.output;
2424 rt->dst.output = lwtunnel_output;
25368623 2425 }
61adedf3
JB
2426 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
2427 rt->dst.lwtstate->orig_input = rt->dst.input;
2428 rt->dst.input = lwtunnel_input;
25368623 2429 }
19e42e45
RP
2430 }
2431
86872cb5
TG
2432 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2433 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 2434 if (rt->rt6i_dst.plen == 128)
e5fd387a 2435 rt->dst.flags |= DST_HOST;
e5fd387a 2436
1da177e4 2437#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
2438 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2439 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
2440#endif
2441
86872cb5 2442 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
2443
2444 /* We cannot add true routes via loopback here,
2445 they would result in kernel looping; promote them to reject routes
2446 */
86872cb5 2447 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
2448 (dev && (dev->flags & IFF_LOOPBACK) &&
2449 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2450 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 2451 /* hold loopback dev/idev if we haven't done so. */
5578689a 2452 if (dev != net->loopback_dev) {
1da177e4
LT
2453 if (dev) {
2454 dev_put(dev);
2455 in6_dev_put(idev);
2456 }
5578689a 2457 dev = net->loopback_dev;
1da177e4
LT
2458 dev_hold(dev);
2459 idev = in6_dev_get(dev);
2460 if (!idev) {
2461 err = -ENODEV;
2462 goto out;
2463 }
2464 }
1da177e4 2465 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
2466 switch (cfg->fc_type) {
2467 case RTN_BLACKHOLE:
2468 rt->dst.error = -EINVAL;
ede2059d 2469 rt->dst.output = dst_discard_out;
7150aede 2470 rt->dst.input = dst_discard;
ef2c7d7b
ND
2471 break;
2472 case RTN_PROHIBIT:
2473 rt->dst.error = -EACCES;
7150aede
K
2474 rt->dst.output = ip6_pkt_prohibit_out;
2475 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 2476 break;
b4949ab2 2477 case RTN_THROW:
0315e382 2478 case RTN_UNREACHABLE:
ef2c7d7b 2479 default:
7150aede 2480 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
2481 : (cfg->fc_type == RTN_UNREACHABLE)
2482 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
2483 rt->dst.output = ip6_pkt_discard_out;
2484 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
2485 break;
2486 }
1da177e4
LT
2487 goto install_route;
2488 }
2489
86872cb5 2490 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 2491 const struct in6_addr *gw_addr;
1da177e4
LT
2492 int gwa_type;
2493
86872cb5 2494 gw_addr = &cfg->fc_gateway;
330567b7 2495 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
2496
2497 /* if gw_addr is local we will fail to detect this in case
2498 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2499 * will return already-added prefix route via interface that
2500 * prefix route was assigned to, which might be non-loopback.
2501 */
2502 err = -EINVAL;
330567b7
FW
2503 if (ipv6_chk_addr_and_flags(net, gw_addr,
2504 gwa_type & IPV6_ADDR_LINKLOCAL ?
d5d531cb
DA
2505 dev : NULL, 0, 0)) {
2506 NL_SET_ERR_MSG(extack, "Invalid gateway address");
48ed7b26 2507 goto out;
d5d531cb 2508 }
4e3fd7a0 2509 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
2510
2511 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
8c14586f 2512 struct rt6_info *grt = NULL;
1da177e4
LT
2513
2514 /* IPv6 strictly inhibits using not link-local
2515 addresses as nexthop address.
2516 Otherwise, router will not able to send redirects.
2517 It is very good, but in some (rare!) circumstances
2518 (SIT, PtP, NBMA NOARP links) it is handy to allow
2519 some exceptions. --ANK
96d5822c
EN
2520 We allow IPv4-mapped nexthops to support RFC4798-type
2521 addressing
1da177e4 2522 */
96d5822c 2523 if (!(gwa_type & (IPV6_ADDR_UNICAST |
d5d531cb
DA
2524 IPV6_ADDR_MAPPED))) {
2525 NL_SET_ERR_MSG(extack,
2526 "Invalid gateway address");
1da177e4 2527 goto out;
d5d531cb 2528 }
1da177e4 2529
a435a07f 2530 if (cfg->fc_table) {
8c14586f
DA
2531 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2532
a435a07f
VB
2533 if (grt) {
2534 if (grt->rt6i_flags & RTF_GATEWAY ||
2535 (dev && dev != grt->dst.dev)) {
2536 ip6_rt_put(grt);
2537 grt = NULL;
2538 }
2539 }
2540 }
2541
8c14586f
DA
2542 if (!grt)
2543 grt = rt6_lookup(net, gw_addr, NULL,
2544 cfg->fc_ifindex, 1);
1da177e4
LT
2545
2546 err = -EHOSTUNREACH;
38308473 2547 if (!grt)
1da177e4
LT
2548 goto out;
2549 if (dev) {
d1918542 2550 if (dev != grt->dst.dev) {
94e187c0 2551 ip6_rt_put(grt);
1da177e4
LT
2552 goto out;
2553 }
2554 } else {
d1918542 2555 dev = grt->dst.dev;
1da177e4
LT
2556 idev = grt->rt6i_idev;
2557 dev_hold(dev);
2558 in6_dev_hold(grt->rt6i_idev);
2559 }
38308473 2560 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 2561 err = 0;
94e187c0 2562 ip6_rt_put(grt);
1da177e4
LT
2563
2564 if (err)
2565 goto out;
2566 }
2567 err = -EINVAL;
d5d531cb
DA
2568 if (!dev) {
2569 NL_SET_ERR_MSG(extack, "Egress device not specified");
2570 goto out;
2571 } else if (dev->flags & IFF_LOOPBACK) {
2572 NL_SET_ERR_MSG(extack,
2573 "Egress device can not be loopback device for this route");
1da177e4 2574 goto out;
d5d531cb 2575 }
1da177e4
LT
2576 }
2577
2578 err = -ENODEV;
38308473 2579 if (!dev)
1da177e4
LT
2580 goto out;
2581
c3968a85
DW
2582 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2583 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 2584 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
2585 err = -EINVAL;
2586 goto out;
2587 }
4e3fd7a0 2588 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2589 rt->rt6i_prefsrc.plen = 128;
2590 } else
2591 rt->rt6i_prefsrc.plen = 0;
2592
86872cb5 2593 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2594
2595install_route:
d8d1f30b 2596 rt->dst.dev = dev;
1da177e4 2597 rt->rt6i_idev = idev;
c71099ac 2598 rt->rt6i_table = table;
63152fc0 2599
c346dca1 2600 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2601
8c5b83f0 2602 return rt;
6b9ea5a6
RP
2603out:
2604 if (dev)
2605 dev_put(dev);
2606 if (idev)
2607 in6_dev_put(idev);
587fea74
WW
2608 if (rt)
2609 dst_release_immediate(&rt->dst);
6b9ea5a6 2610
8c5b83f0 2611 return ERR_PTR(err);
6b9ea5a6
RP
2612}
2613
333c4301
DA
2614int ip6_route_add(struct fib6_config *cfg,
2615 struct netlink_ext_ack *extack)
6b9ea5a6
RP
2616{
2617 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2618 struct rt6_info *rt;
6b9ea5a6
RP
2619 int err;
2620
333c4301 2621 rt = ip6_route_info_create(cfg, extack);
8c5b83f0
RP
2622 if (IS_ERR(rt)) {
2623 err = PTR_ERR(rt);
2624 rt = NULL;
6b9ea5a6 2625 goto out;
8c5b83f0 2626 }
6b9ea5a6 2627
e715b6d3
FW
2628 err = ip6_convert_metrics(&mxc, cfg);
2629 if (err)
2630 goto out;
1da177e4 2631
333c4301 2632 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
e715b6d3
FW
2633
2634 kfree(mxc.mx);
6b9ea5a6 2635
e715b6d3 2636 return err;
1da177e4 2637out:
587fea74
WW
2638 if (rt)
2639 dst_release_immediate(&rt->dst);
6b9ea5a6 2640
1da177e4
LT
2641 return err;
2642}
2643
86872cb5 2644static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2645{
2646 int err;
c71099ac 2647 struct fib6_table *table;
d1918542 2648 struct net *net = dev_net(rt->dst.dev);
1da177e4 2649
a4c2fd7f 2650 if (rt == net->ipv6.ip6_null_entry) {
6825a26c
G
2651 err = -ENOENT;
2652 goto out;
2653 }
6c813a72 2654
c71099ac
TG
2655 table = rt->rt6i_table;
2656 write_lock_bh(&table->tb6_lock);
86872cb5 2657 err = fib6_del(rt, info);
c71099ac 2658 write_unlock_bh(&table->tb6_lock);
1da177e4 2659
6825a26c 2660out:
94e187c0 2661 ip6_rt_put(rt);
1da177e4
LT
2662 return err;
2663}
2664
e0a1ad73
TG
2665int ip6_del_rt(struct rt6_info *rt)
2666{
4d1169c1 2667 struct nl_info info = {
d1918542 2668 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2669 };
528c4ceb 2670 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2671}
2672
0ae81335
DA
2673static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2674{
2675 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 2676 struct net *net = info->nl_net;
16a16cd3 2677 struct sk_buff *skb = NULL;
0ae81335 2678 struct fib6_table *table;
e3330039 2679 int err = -ENOENT;
0ae81335 2680
e3330039
WC
2681 if (rt == net->ipv6.ip6_null_entry)
2682 goto out_put;
0ae81335
DA
2683 table = rt->rt6i_table;
2684 write_lock_bh(&table->tb6_lock);
2685
2686 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2687 struct rt6_info *sibling, *next_sibling;
2688
16a16cd3
DA
2689 /* prefer to send a single notification with all hops */
2690 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2691 if (skb) {
2692 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2693
e3330039 2694 if (rt6_fill_node(net, skb, rt,
16a16cd3
DA
2695 NULL, NULL, 0, RTM_DELROUTE,
2696 info->portid, seq, 0) < 0) {
2697 kfree_skb(skb);
2698 skb = NULL;
2699 } else
2700 info->skip_notify = 1;
2701 }
2702
0ae81335
DA
2703 list_for_each_entry_safe(sibling, next_sibling,
2704 &rt->rt6i_siblings,
2705 rt6i_siblings) {
2706 err = fib6_del(sibling, info);
2707 if (err)
e3330039 2708 goto out_unlock;
0ae81335
DA
2709 }
2710 }
2711
2712 err = fib6_del(rt, info);
e3330039 2713out_unlock:
0ae81335 2714 write_unlock_bh(&table->tb6_lock);
e3330039 2715out_put:
0ae81335 2716 ip6_rt_put(rt);
16a16cd3
DA
2717
2718 if (skb) {
e3330039 2719 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
2720 info->nlh, gfp_any());
2721 }
0ae81335
DA
2722 return err;
2723}
2724
333c4301
DA
2725static int ip6_route_del(struct fib6_config *cfg,
2726 struct netlink_ext_ack *extack)
1da177e4 2727{
c71099ac 2728 struct fib6_table *table;
1da177e4
LT
2729 struct fib6_node *fn;
2730 struct rt6_info *rt;
2731 int err = -ESRCH;
2732
5578689a 2733 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
2734 if (!table) {
2735 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 2736 return err;
d5d531cb 2737 }
c71099ac
TG
2738
2739 read_lock_bh(&table->tb6_lock);
1da177e4 2740
c71099ac 2741 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
2742 &cfg->fc_dst, cfg->fc_dst_len,
2743 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 2744
1da177e4 2745 if (fn) {
d8d1f30b 2746 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2747 if ((rt->rt6i_flags & RTF_CACHE) &&
2748 !(cfg->fc_flags & RTF_CACHE))
2749 continue;
86872cb5 2750 if (cfg->fc_ifindex &&
d1918542
DM
2751 (!rt->dst.dev ||
2752 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2753 continue;
86872cb5
TG
2754 if (cfg->fc_flags & RTF_GATEWAY &&
2755 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2756 continue;
86872cb5 2757 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2758 continue;
c2ed1880
M
2759 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2760 continue;
d8d1f30b 2761 dst_hold(&rt->dst);
c71099ac 2762 read_unlock_bh(&table->tb6_lock);
1da177e4 2763
0ae81335
DA
2764 /* if gateway was specified only delete the one hop */
2765 if (cfg->fc_flags & RTF_GATEWAY)
2766 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2767
2768 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
2769 }
2770 }
c71099ac 2771 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2772
2773 return err;
2774}
2775
6700c270 2776static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2777{
a6279458 2778 struct netevent_redirect netevent;
e8599ff4 2779 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2780 struct ndisc_options ndopts;
2781 struct inet6_dev *in6_dev;
2782 struct neighbour *neigh;
71bcdba0 2783 struct rd_msg *msg;
6e157b6a
DM
2784 int optlen, on_link;
2785 u8 *lladdr;
e8599ff4 2786
29a3cad5 2787 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2788 optlen -= sizeof(*msg);
e8599ff4
DM
2789
2790 if (optlen < 0) {
6e157b6a 2791 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2792 return;
2793 }
2794
71bcdba0 2795 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2796
71bcdba0 2797 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2798 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2799 return;
2800 }
2801
6e157b6a 2802 on_link = 0;
71bcdba0 2803 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2804 on_link = 1;
71bcdba0 2805 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2806 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2807 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2808 return;
2809 }
2810
2811 in6_dev = __in6_dev_get(skb->dev);
2812 if (!in6_dev)
2813 return;
2814 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2815 return;
2816
2817 /* RFC2461 8.1:
2818 * The IP source address of the Redirect MUST be the same as the current
2819 * first-hop router for the specified ICMP Destination Address.
2820 */
2821
f997c55c 2822 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2823 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2824 return;
2825 }
6e157b6a
DM
2826
2827 lladdr = NULL;
e8599ff4
DM
2828 if (ndopts.nd_opts_tgt_lladdr) {
2829 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2830 skb->dev);
2831 if (!lladdr) {
2832 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2833 return;
2834 }
2835 }
2836
6e157b6a 2837 rt = (struct rt6_info *) dst;
ec13ad1d 2838 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 2839 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2840 return;
6e157b6a 2841 }
e8599ff4 2842
6e157b6a
DM
2843 /* Redirect received -> path was valid.
2844 * Look, redirects are sent only in response to data packets,
2845 * so that this nexthop apparently is reachable. --ANK
2846 */
0dec879f 2847 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 2848
71bcdba0 2849 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2850 if (!neigh)
2851 return;
a6279458 2852
1da177e4
LT
2853 /*
2854 * We have finally decided to accept it.
2855 */
2856
f997c55c 2857 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
2858 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2859 NEIGH_UPDATE_F_OVERRIDE|
2860 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
2861 NEIGH_UPDATE_F_ISROUTER)),
2862 NDISC_REDIRECT, &ndopts);
1da177e4 2863
83a09abd 2864 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2865 if (!nrt)
1da177e4
LT
2866 goto out;
2867
2868 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2869 if (on_link)
2870 nrt->rt6i_flags &= ~RTF_GATEWAY;
2871
b91d5329 2872 nrt->rt6i_protocol = RTPROT_REDIRECT;
4e3fd7a0 2873 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2874
40e22e8f 2875 if (ip6_ins_rt(nrt))
1cfb71ee 2876 goto out_release;
1da177e4 2877
d8d1f30b
CG
2878 netevent.old = &rt->dst;
2879 netevent.new = &nrt->dst;
71bcdba0 2880 netevent.daddr = &msg->dest;
60592833 2881 netevent.neigh = neigh;
8d71740c
TT
2882 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2883
38308473 2884 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2885 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2886 ip6_del_rt(rt);
1da177e4
LT
2887 }
2888
1cfb71ee
WW
2889out_release:
2890 /* Release the reference taken in
2891 * ip6_rt_cache_alloc()
2892 */
2893 dst_release(&nrt->dst);
2894
1da177e4 2895out:
e8599ff4 2896 neigh_release(neigh);
6e157b6a
DM
2897}
2898
1da177e4
LT
2899/*
2900 * Misc support functions
2901 */
2902
4b32b5ad
MKL
2903static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2904{
2905 BUG_ON(from->dst.from);
2906
2907 rt->rt6i_flags &= ~RTF_EXPIRES;
2908 dst_hold(&from->dst);
2909 rt->dst.from = &from->dst;
2910 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2911}
2912
83a09abd
MKL
2913static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2914{
2915 rt->dst.input = ort->dst.input;
2916 rt->dst.output = ort->dst.output;
2917 rt->rt6i_dst = ort->rt6i_dst;
2918 rt->dst.error = ort->dst.error;
2919 rt->rt6i_idev = ort->rt6i_idev;
2920 if (rt->rt6i_idev)
2921 in6_dev_hold(rt->rt6i_idev);
2922 rt->dst.lastuse = jiffies;
2923 rt->rt6i_gateway = ort->rt6i_gateway;
2924 rt->rt6i_flags = ort->rt6i_flags;
2925 rt6_set_from(rt, ort);
2926 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2927#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2928 rt->rt6i_src = ort->rt6i_src;
1da177e4 2929#endif
83a09abd
MKL
2930 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2931 rt->rt6i_table = ort->rt6i_table;
61adedf3 2932 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2933}
2934
70ceb4f5 2935#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2936static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 2937 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2938 const struct in6_addr *gwaddr,
2939 struct net_device *dev)
70ceb4f5 2940{
830218c1
DA
2941 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2942 int ifindex = dev->ifindex;
70ceb4f5
YH
2943 struct fib6_node *fn;
2944 struct rt6_info *rt = NULL;
c71099ac
TG
2945 struct fib6_table *table;
2946
830218c1 2947 table = fib6_get_table(net, tb_id);
38308473 2948 if (!table)
c71099ac 2949 return NULL;
70ceb4f5 2950
5744dd9b 2951 read_lock_bh(&table->tb6_lock);
67ba4152 2952 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2953 if (!fn)
2954 goto out;
2955
d8d1f30b 2956 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2957 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2958 continue;
2959 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2960 continue;
2961 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2962 continue;
d8d1f30b 2963 dst_hold(&rt->dst);
70ceb4f5
YH
2964 break;
2965 }
2966out:
5744dd9b 2967 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2968 return rt;
2969}
2970
efa2cea0 2971static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 2972 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2973 const struct in6_addr *gwaddr,
2974 struct net_device *dev,
95c96174 2975 unsigned int pref)
70ceb4f5 2976{
86872cb5 2977 struct fib6_config cfg = {
238fc7ea 2978 .fc_metric = IP6_RT_PRIO_USER,
830218c1 2979 .fc_ifindex = dev->ifindex,
86872cb5
TG
2980 .fc_dst_len = prefixlen,
2981 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2982 RTF_UP | RTF_PREF(pref),
b91d5329 2983 .fc_protocol = RTPROT_RA,
15e47304 2984 .fc_nlinfo.portid = 0,
efa2cea0
DL
2985 .fc_nlinfo.nlh = NULL,
2986 .fc_nlinfo.nl_net = net,
86872cb5
TG
2987 };
2988
830218c1 2989 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
2990 cfg.fc_dst = *prefix;
2991 cfg.fc_gateway = *gwaddr;
70ceb4f5 2992
e317da96
YH
2993 /* We should treat it as a default route if prefix length is 0. */
2994 if (!prefixlen)
86872cb5 2995 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2996
333c4301 2997 ip6_route_add(&cfg, NULL);
70ceb4f5 2998
830218c1 2999 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3000}
3001#endif
3002
b71d1d42 3003struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 3004{
830218c1 3005 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 3006 struct rt6_info *rt;
c71099ac 3007 struct fib6_table *table;
1da177e4 3008
830218c1 3009 table = fib6_get_table(dev_net(dev), tb_id);
38308473 3010 if (!table)
c71099ac 3011 return NULL;
1da177e4 3012
5744dd9b 3013 read_lock_bh(&table->tb6_lock);
67ba4152 3014 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 3015 if (dev == rt->dst.dev &&
045927ff 3016 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
3017 ipv6_addr_equal(&rt->rt6i_gateway, addr))
3018 break;
3019 }
3020 if (rt)
d8d1f30b 3021 dst_hold(&rt->dst);
5744dd9b 3022 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
3023 return rt;
3024}
3025
b71d1d42 3026struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
3027 struct net_device *dev,
3028 unsigned int pref)
1da177e4 3029{
86872cb5 3030 struct fib6_config cfg = {
ca254490 3031 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3032 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3033 .fc_ifindex = dev->ifindex,
3034 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3035 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3036 .fc_protocol = RTPROT_RA,
15e47304 3037 .fc_nlinfo.portid = 0,
5578689a 3038 .fc_nlinfo.nlh = NULL,
c346dca1 3039 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 3040 };
1da177e4 3041
4e3fd7a0 3042 cfg.fc_gateway = *gwaddr;
1da177e4 3043
333c4301 3044 if (!ip6_route_add(&cfg, NULL)) {
830218c1
DA
3045 struct fib6_table *table;
3046
3047 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3048 if (table)
3049 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3050 }
1da177e4 3051
1da177e4
LT
3052 return rt6_get_dflt_router(gwaddr, dev);
3053}
3054
830218c1 3055static void __rt6_purge_dflt_routers(struct fib6_table *table)
1da177e4
LT
3056{
3057 struct rt6_info *rt;
3058
3059restart:
c71099ac 3060 read_lock_bh(&table->tb6_lock);
d8d1f30b 3061 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
3062 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3063 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 3064 dst_hold(&rt->dst);
c71099ac 3065 read_unlock_bh(&table->tb6_lock);
e0a1ad73 3066 ip6_del_rt(rt);
1da177e4
LT
3067 goto restart;
3068 }
3069 }
c71099ac 3070 read_unlock_bh(&table->tb6_lock);
830218c1
DA
3071
3072 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3073}
3074
3075void rt6_purge_dflt_routers(struct net *net)
3076{
3077 struct fib6_table *table;
3078 struct hlist_head *head;
3079 unsigned int h;
3080
3081 rcu_read_lock();
3082
3083 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3084 head = &net->ipv6.fib_table_hash[h];
3085 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3086 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3087 __rt6_purge_dflt_routers(table);
3088 }
3089 }
3090
3091 rcu_read_unlock();
1da177e4
LT
3092}
3093
5578689a
DL
3094static void rtmsg_to_fib6_config(struct net *net,
3095 struct in6_rtmsg *rtmsg,
86872cb5
TG
3096 struct fib6_config *cfg)
3097{
3098 memset(cfg, 0, sizeof(*cfg));
3099
ca254490
DA
3100 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3101 : RT6_TABLE_MAIN;
86872cb5
TG
3102 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3103 cfg->fc_metric = rtmsg->rtmsg_metric;
3104 cfg->fc_expires = rtmsg->rtmsg_info;
3105 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3106 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3107 cfg->fc_flags = rtmsg->rtmsg_flags;
3108
5578689a 3109 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3110
4e3fd7a0
AD
3111 cfg->fc_dst = rtmsg->rtmsg_dst;
3112 cfg->fc_src = rtmsg->rtmsg_src;
3113 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3114}
3115
5578689a 3116int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3117{
86872cb5 3118 struct fib6_config cfg;
1da177e4
LT
3119 struct in6_rtmsg rtmsg;
3120 int err;
3121
67ba4152 3122 switch (cmd) {
1da177e4
LT
3123 case SIOCADDRT: /* Add a route */
3124 case SIOCDELRT: /* Delete a route */
af31f412 3125 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3126 return -EPERM;
3127 err = copy_from_user(&rtmsg, arg,
3128 sizeof(struct in6_rtmsg));
3129 if (err)
3130 return -EFAULT;
86872cb5 3131
5578689a 3132 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3133
1da177e4
LT
3134 rtnl_lock();
3135 switch (cmd) {
3136 case SIOCADDRT:
333c4301 3137 err = ip6_route_add(&cfg, NULL);
1da177e4
LT
3138 break;
3139 case SIOCDELRT:
333c4301 3140 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3141 break;
3142 default:
3143 err = -EINVAL;
3144 }
3145 rtnl_unlock();
3146
3147 return err;
3ff50b79 3148 }
1da177e4
LT
3149
3150 return -EINVAL;
3151}
3152
3153/*
3154 * Drop the packet on the floor
3155 */
3156
d5fdd6ba 3157static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3158{
612f09e8 3159 int type;
adf30907 3160 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3161 switch (ipstats_mib_noroutes) {
3162 case IPSTATS_MIB_INNOROUTES:
0660e03f 3163 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3164 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
3165 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3166 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3167 break;
3168 }
3169 /* FALLTHROUGH */
3170 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3171 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3172 ipstats_mib_noroutes);
612f09e8
YH
3173 break;
3174 }
3ffe533c 3175 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3176 kfree_skb(skb);
3177 return 0;
3178}
3179
9ce8ade0
TG
3180static int ip6_pkt_discard(struct sk_buff *skb)
3181{
612f09e8 3182 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3183}
3184
ede2059d 3185static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3186{
adf30907 3187 skb->dev = skb_dst(skb)->dev;
612f09e8 3188 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3189}
3190
9ce8ade0
TG
3191static int ip6_pkt_prohibit(struct sk_buff *skb)
3192{
612f09e8 3193 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3194}
3195
ede2059d 3196static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3197{
adf30907 3198 skb->dev = skb_dst(skb)->dev;
612f09e8 3199 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3200}
3201
1da177e4
LT
3202/*
3203 * Allocate a dst for local (unicast / anycast) address.
3204 */
3205
3206struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
3207 const struct in6_addr *addr,
8f031519 3208 bool anycast)
1da177e4 3209{
ca254490 3210 u32 tb_id;
c346dca1 3211 struct net *net = dev_net(idev->dev);
4832c30d 3212 struct net_device *dev = idev->dev;
5f02ce24
DA
3213 struct rt6_info *rt;
3214
5f02ce24 3215 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 3216 if (!rt)
1da177e4
LT
3217 return ERR_PTR(-ENOMEM);
3218
1da177e4
LT
3219 in6_dev_hold(idev);
3220
11d53b49 3221 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
3222 rt->dst.input = ip6_input;
3223 rt->dst.output = ip6_output;
1da177e4 3224 rt->rt6i_idev = idev;
1da177e4 3225
94b5e0f9 3226 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 3227 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
3228 if (anycast)
3229 rt->rt6i_flags |= RTF_ANYCAST;
3230 else
1da177e4 3231 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 3232
550bab42 3233 rt->rt6i_gateway = *addr;
4e3fd7a0 3234 rt->rt6i_dst.addr = *addr;
1da177e4 3235 rt->rt6i_dst.plen = 128;
ca254490
DA
3236 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3237 rt->rt6i_table = fib6_get_table(net, tb_id);
1da177e4 3238
1da177e4
LT
3239 return rt;
3240}
3241
c3968a85
DW
3242/* remove deleted ip from prefsrc entries */
3243struct arg_dev_net_ip {
3244 struct net_device *dev;
3245 struct net *net;
3246 struct in6_addr *addr;
3247};
3248
3249static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3250{
3251 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3252 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3253 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3254
d1918542 3255 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
3256 rt != net->ipv6.ip6_null_entry &&
3257 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
60006a48 3258 spin_lock_bh(&rt6_exception_lock);
c3968a85
DW
3259 /* remove prefsrc entry */
3260 rt->rt6i_prefsrc.plen = 0;
60006a48
WW
3261 /* need to update cache as well */
3262 rt6_exceptions_remove_prefsrc(rt);
3263 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3264 }
3265 return 0;
3266}
3267
3268void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3269{
3270 struct net *net = dev_net(ifp->idev->dev);
3271 struct arg_dev_net_ip adni = {
3272 .dev = ifp->idev->dev,
3273 .net = net,
3274 .addr = &ifp->addr,
3275 };
0c3584d5 3276 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3277}
3278
be7a010d 3279#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3280
3281/* Remove routers and update dst entries when gateway turn into host. */
3282static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3283{
3284 struct in6_addr *gateway = (struct in6_addr *)arg;
3285
b16cb459
WW
3286 /* RTF_CACHE_GATEWAY case will be removed once the exception
3287 * table is hooked up to store all cached routes.
3288 */
be7a010d
DJ
3289 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
3290 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
3291 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
3292 return -1;
3293 }
b16cb459
WW
3294
3295 /* Further clean up cached routes in exception table.
3296 * This is needed because cached route may have a different
3297 * gateway than its 'parent' in the case of an ip redirect.
3298 */
3299 rt6_exceptions_clean_tohost(rt, gateway);
3300
be7a010d
DJ
3301 return 0;
3302}
3303
3304void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3305{
3306 fib6_clean_all(net, fib6_clean_tohost, gateway);
3307}
3308
8ed67789
DL
3309struct arg_dev_net {
3310 struct net_device *dev;
3311 struct net *net;
3312};
3313
a1a22c12 3314/* called with write lock held for table with rt */
1da177e4
LT
3315static int fib6_ifdown(struct rt6_info *rt, void *arg)
3316{
bc3ef660 3317 const struct arg_dev_net *adn = arg;
3318 const struct net_device *dev = adn->dev;
8ed67789 3319
d1918542 3320 if ((rt->dst.dev == dev || !dev) &&
a1a22c12
DA
3321 rt != adn->net->ipv6.ip6_null_entry &&
3322 (rt->rt6i_nsiblings == 0 ||
8397ed36 3323 (dev && netdev_unregistering(dev)) ||
a1a22c12 3324 !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
1da177e4 3325 return -1;
c159d30c 3326
1da177e4
LT
3327 return 0;
3328}
3329
f3db4851 3330void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 3331{
8ed67789
DL
3332 struct arg_dev_net adn = {
3333 .dev = dev,
3334 .net = net,
3335 };
3336
0c3584d5 3337 fib6_clean_all(net, fib6_ifdown, &adn);
e332bc67
EB
3338 if (dev)
3339 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
3340}
3341
95c96174 3342struct rt6_mtu_change_arg {
1da177e4 3343 struct net_device *dev;
95c96174 3344 unsigned int mtu;
1da177e4
LT
3345};
3346
3347static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3348{
3349 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3350 struct inet6_dev *idev;
3351
3352 /* In IPv6 pmtu discovery is not optional,
3353 so that RTAX_MTU lock cannot disable it.
3354 We still use this lock to block changes
3355 caused by addrconf/ndisc.
3356 */
3357
3358 idev = __in6_dev_get(arg->dev);
38308473 3359 if (!idev)
1da177e4
LT
3360 return 0;
3361
3362 /* For administrative MTU increase, there is no way to discover
3363 IPv6 PMTU increase, so PMTU increase should be updated here.
3364 Since RFC 1981 doesn't include administrative MTU increase
3365 update PMTU increase is a MUST. (i.e. jumbo frame)
3366 */
3367 /*
3368 If new MTU is less than route PMTU, this new MTU will be the
3369 lowest MTU in the path, update the route PMTU to reflect PMTU
3370 decreases; if new MTU is greater than route PMTU, and the
3371 old MTU is the lowest MTU in the path, update the route PMTU
3372 to reflect the increase. In this case if the other nodes' MTU
3373 also have the lowest MTU, TOO BIG MESSAGE will be lead to
67c408cf 3374 PMTU discovery.
1da177e4 3375 */
d1918542 3376 if (rt->dst.dev == arg->dev &&
fb56be83 3377 dst_metric_raw(&rt->dst, RTAX_MTU) &&
4b32b5ad 3378 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
f5bbe7ee
WW
3379 spin_lock_bh(&rt6_exception_lock);
3380 /* This case will be removed once the exception table
3381 * is hooked up.
3382 */
4b32b5ad
MKL
3383 if (rt->rt6i_flags & RTF_CACHE) {
3384 /* For RTF_CACHE with rt6i_pmtu == 0
3385 * (i.e. a redirected route),
3386 * the metrics of its rt->dst.from has already
3387 * been updated.
3388 */
3389 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
3390 rt->rt6i_pmtu = arg->mtu;
3391 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
3392 (dst_mtu(&rt->dst) < arg->mtu &&
3393 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
3394 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
3395 }
f5bbe7ee
WW
3396 rt6_exceptions_update_pmtu(rt, arg->mtu);
3397 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 3398 }
1da177e4
LT
3399 return 0;
3400}
3401
95c96174 3402void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 3403{
c71099ac
TG
3404 struct rt6_mtu_change_arg arg = {
3405 .dev = dev,
3406 .mtu = mtu,
3407 };
1da177e4 3408
0c3584d5 3409 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
3410}
3411
ef7c79ed 3412static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 3413 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 3414 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 3415 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
3416 [RTA_PRIORITY] = { .type = NLA_U32 },
3417 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 3418 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 3419 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
3420 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
3421 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 3422 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 3423 [RTA_UID] = { .type = NLA_U32 },
3b45a410 3424 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
3425};
3426
3427static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
3428 struct fib6_config *cfg,
3429 struct netlink_ext_ack *extack)
1da177e4 3430{
86872cb5
TG
3431 struct rtmsg *rtm;
3432 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 3433 unsigned int pref;
86872cb5 3434 int err;
1da177e4 3435
fceb6435
JB
3436 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3437 NULL);
86872cb5
TG
3438 if (err < 0)
3439 goto errout;
1da177e4 3440
86872cb5
TG
3441 err = -EINVAL;
3442 rtm = nlmsg_data(nlh);
3443 memset(cfg, 0, sizeof(*cfg));
3444
3445 cfg->fc_table = rtm->rtm_table;
3446 cfg->fc_dst_len = rtm->rtm_dst_len;
3447 cfg->fc_src_len = rtm->rtm_src_len;
3448 cfg->fc_flags = RTF_UP;
3449 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 3450 cfg->fc_type = rtm->rtm_type;
86872cb5 3451
ef2c7d7b
ND
3452 if (rtm->rtm_type == RTN_UNREACHABLE ||
3453 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
3454 rtm->rtm_type == RTN_PROHIBIT ||
3455 rtm->rtm_type == RTN_THROW)
86872cb5
TG
3456 cfg->fc_flags |= RTF_REJECT;
3457
ab79ad14
3458 if (rtm->rtm_type == RTN_LOCAL)
3459 cfg->fc_flags |= RTF_LOCAL;
3460
1f56a01f
MKL
3461 if (rtm->rtm_flags & RTM_F_CLONED)
3462 cfg->fc_flags |= RTF_CACHE;
3463
15e47304 3464 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 3465 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 3466 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
3467
3468 if (tb[RTA_GATEWAY]) {
67b61f6c 3469 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 3470 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 3471 }
86872cb5
TG
3472
3473 if (tb[RTA_DST]) {
3474 int plen = (rtm->rtm_dst_len + 7) >> 3;
3475
3476 if (nla_len(tb[RTA_DST]) < plen)
3477 goto errout;
3478
3479 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 3480 }
86872cb5
TG
3481
3482 if (tb[RTA_SRC]) {
3483 int plen = (rtm->rtm_src_len + 7) >> 3;
3484
3485 if (nla_len(tb[RTA_SRC]) < plen)
3486 goto errout;
3487
3488 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 3489 }
86872cb5 3490
c3968a85 3491 if (tb[RTA_PREFSRC])
67b61f6c 3492 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 3493
86872cb5
TG
3494 if (tb[RTA_OIF])
3495 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3496
3497 if (tb[RTA_PRIORITY])
3498 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3499
3500 if (tb[RTA_METRICS]) {
3501 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3502 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 3503 }
86872cb5
TG
3504
3505 if (tb[RTA_TABLE])
3506 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3507
51ebd318
ND
3508 if (tb[RTA_MULTIPATH]) {
3509 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3510 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
3511
3512 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 3513 cfg->fc_mp_len, extack);
9ed59592
DA
3514 if (err < 0)
3515 goto errout;
51ebd318
ND
3516 }
3517
c78ba6d6
LR
3518 if (tb[RTA_PREF]) {
3519 pref = nla_get_u8(tb[RTA_PREF]);
3520 if (pref != ICMPV6_ROUTER_PREF_LOW &&
3521 pref != ICMPV6_ROUTER_PREF_HIGH)
3522 pref = ICMPV6_ROUTER_PREF_MEDIUM;
3523 cfg->fc_flags |= RTF_PREF(pref);
3524 }
3525
19e42e45
RP
3526 if (tb[RTA_ENCAP])
3527 cfg->fc_encap = tb[RTA_ENCAP];
3528
9ed59592 3529 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
3530 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3531
c255bd68 3532 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
3533 if (err < 0)
3534 goto errout;
3535 }
3536
32bc201e
XL
3537 if (tb[RTA_EXPIRES]) {
3538 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3539
3540 if (addrconf_finite_timeout(timeout)) {
3541 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3542 cfg->fc_flags |= RTF_EXPIRES;
3543 }
3544 }
3545
86872cb5
TG
3546 err = 0;
3547errout:
3548 return err;
1da177e4
LT
3549}
3550
6b9ea5a6
RP
3551struct rt6_nh {
3552 struct rt6_info *rt6_info;
3553 struct fib6_config r_cfg;
3554 struct mx6_config mxc;
3555 struct list_head next;
3556};
3557
3558static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3559{
3560 struct rt6_nh *nh;
3561
3562 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 3563 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
3564 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3565 nh->r_cfg.fc_ifindex);
3566 }
3567}
3568
3569static int ip6_route_info_append(struct list_head *rt6_nh_list,
3570 struct rt6_info *rt, struct fib6_config *r_cfg)
3571{
3572 struct rt6_nh *nh;
6b9ea5a6
RP
3573 int err = -EEXIST;
3574
3575 list_for_each_entry(nh, rt6_nh_list, next) {
3576 /* check if rt6_info already exists */
f06b7549 3577 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
6b9ea5a6
RP
3578 return err;
3579 }
3580
3581 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3582 if (!nh)
3583 return -ENOMEM;
3584 nh->rt6_info = rt;
3585 err = ip6_convert_metrics(&nh->mxc, r_cfg);
3586 if (err) {
3587 kfree(nh);
3588 return err;
3589 }
3590 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3591 list_add_tail(&nh->next, rt6_nh_list);
3592
3593 return 0;
3594}
3595
3b1137fe
DA
3596static void ip6_route_mpath_notify(struct rt6_info *rt,
3597 struct rt6_info *rt_last,
3598 struct nl_info *info,
3599 __u16 nlflags)
3600{
3601 /* if this is an APPEND route, then rt points to the first route
3602 * inserted and rt_last points to last route inserted. Userspace
3603 * wants a consistent dump of the route which starts at the first
3604 * nexthop. Since sibling routes are always added at the end of
3605 * the list, find the first sibling of the last route appended
3606 */
3607 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3608 rt = list_first_entry(&rt_last->rt6i_siblings,
3609 struct rt6_info,
3610 rt6i_siblings);
3611 }
3612
3613 if (rt)
3614 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3615}
3616
333c4301
DA
3617static int ip6_route_multipath_add(struct fib6_config *cfg,
3618 struct netlink_ext_ack *extack)
51ebd318 3619{
3b1137fe
DA
3620 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3621 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
3622 struct fib6_config r_cfg;
3623 struct rtnexthop *rtnh;
6b9ea5a6
RP
3624 struct rt6_info *rt;
3625 struct rt6_nh *err_nh;
3626 struct rt6_nh *nh, *nh_safe;
3b1137fe 3627 __u16 nlflags;
51ebd318
ND
3628 int remaining;
3629 int attrlen;
6b9ea5a6
RP
3630 int err = 1;
3631 int nhn = 0;
3632 int replace = (cfg->fc_nlinfo.nlh &&
3633 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3634 LIST_HEAD(rt6_nh_list);
51ebd318 3635
3b1137fe
DA
3636 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3637 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3638 nlflags |= NLM_F_APPEND;
3639
35f1b4e9 3640 remaining = cfg->fc_mp_len;
51ebd318 3641 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 3642
6b9ea5a6
RP
3643 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3644 * rt6_info structs per nexthop
3645 */
51ebd318
ND
3646 while (rtnh_ok(rtnh, remaining)) {
3647 memcpy(&r_cfg, cfg, sizeof(*cfg));
3648 if (rtnh->rtnh_ifindex)
3649 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3650
3651 attrlen = rtnh_attrlen(rtnh);
3652 if (attrlen > 0) {
3653 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3654
3655 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3656 if (nla) {
67b61f6c 3657 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
3658 r_cfg.fc_flags |= RTF_GATEWAY;
3659 }
19e42e45
RP
3660 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3661 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3662 if (nla)
3663 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 3664 }
6b9ea5a6 3665
333c4301 3666 rt = ip6_route_info_create(&r_cfg, extack);
8c5b83f0
RP
3667 if (IS_ERR(rt)) {
3668 err = PTR_ERR(rt);
3669 rt = NULL;
6b9ea5a6 3670 goto cleanup;
8c5b83f0 3671 }
6b9ea5a6
RP
3672
3673 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 3674 if (err) {
587fea74 3675 dst_release_immediate(&rt->dst);
6b9ea5a6
RP
3676 goto cleanup;
3677 }
3678
3679 rtnh = rtnh_next(rtnh, &remaining);
3680 }
3681
3b1137fe
DA
3682 /* for add and replace send one notification with all nexthops.
3683 * Skip the notification in fib6_add_rt2node and send one with
3684 * the full route when done
3685 */
3686 info->skip_notify = 1;
3687
6b9ea5a6
RP
3688 err_nh = NULL;
3689 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe 3690 rt_last = nh->rt6_info;
333c4301 3691 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3b1137fe
DA
3692 /* save reference to first route for notification */
3693 if (!rt_notif && !err)
3694 rt_notif = nh->rt6_info;
3695
6b9ea5a6
RP
3696 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3697 nh->rt6_info = NULL;
3698 if (err) {
3699 if (replace && nhn)
3700 ip6_print_replace_route_err(&rt6_nh_list);
3701 err_nh = nh;
3702 goto add_errout;
51ebd318 3703 }
6b9ea5a6 3704
1a72418b 3705 /* Because each route is added like a single route we remove
27596472
MK
3706 * these flags after the first nexthop: if there is a collision,
3707 * we have already failed to add the first nexthop:
3708 * fib6_add_rt2node() has rejected it; when replacing, old
3709 * nexthops have been replaced by first new, the rest should
3710 * be added to it.
1a72418b 3711 */
27596472
MK
3712 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3713 NLM_F_REPLACE);
6b9ea5a6
RP
3714 nhn++;
3715 }
3716
3b1137fe
DA
3717 /* success ... tell user about new route */
3718 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
3719 goto cleanup;
3720
3721add_errout:
3b1137fe
DA
3722 /* send notification for routes that were added so that
3723 * the delete notifications sent by ip6_route_del are
3724 * coherent
3725 */
3726 if (rt_notif)
3727 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3728
6b9ea5a6
RP
3729 /* Delete routes that were already added */
3730 list_for_each_entry(nh, &rt6_nh_list, next) {
3731 if (err_nh == nh)
3732 break;
333c4301 3733 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
3734 }
3735
3736cleanup:
3737 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
587fea74
WW
3738 if (nh->rt6_info)
3739 dst_release_immediate(&nh->rt6_info->dst);
52fe51f8 3740 kfree(nh->mxc.mx);
6b9ea5a6
RP
3741 list_del(&nh->next);
3742 kfree(nh);
3743 }
3744
3745 return err;
3746}
3747
333c4301
DA
3748static int ip6_route_multipath_del(struct fib6_config *cfg,
3749 struct netlink_ext_ack *extack)
6b9ea5a6
RP
3750{
3751 struct fib6_config r_cfg;
3752 struct rtnexthop *rtnh;
3753 int remaining;
3754 int attrlen;
3755 int err = 1, last_err = 0;
3756
3757 remaining = cfg->fc_mp_len;
3758 rtnh = (struct rtnexthop *)cfg->fc_mp;
3759
3760 /* Parse a Multipath Entry */
3761 while (rtnh_ok(rtnh, remaining)) {
3762 memcpy(&r_cfg, cfg, sizeof(*cfg));
3763 if (rtnh->rtnh_ifindex)
3764 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3765
3766 attrlen = rtnh_attrlen(rtnh);
3767 if (attrlen > 0) {
3768 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3769
3770 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3771 if (nla) {
3772 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3773 r_cfg.fc_flags |= RTF_GATEWAY;
3774 }
3775 }
333c4301 3776 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
3777 if (err)
3778 last_err = err;
3779
51ebd318
ND
3780 rtnh = rtnh_next(rtnh, &remaining);
3781 }
3782
3783 return last_err;
3784}
3785
c21ef3e3
DA
3786static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3787 struct netlink_ext_ack *extack)
1da177e4 3788{
86872cb5
TG
3789 struct fib6_config cfg;
3790 int err;
1da177e4 3791
333c4301 3792 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
3793 if (err < 0)
3794 return err;
3795
51ebd318 3796 if (cfg.fc_mp)
333c4301 3797 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
3798 else {
3799 cfg.fc_delete_all_nh = 1;
333c4301 3800 return ip6_route_del(&cfg, extack);
0ae81335 3801 }
1da177e4
LT
3802}
3803
c21ef3e3
DA
3804static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3805 struct netlink_ext_ack *extack)
1da177e4 3806{
86872cb5
TG
3807 struct fib6_config cfg;
3808 int err;
1da177e4 3809
333c4301 3810 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
3811 if (err < 0)
3812 return err;
3813
51ebd318 3814 if (cfg.fc_mp)
333c4301 3815 return ip6_route_multipath_add(&cfg, extack);
51ebd318 3816 else
333c4301 3817 return ip6_route_add(&cfg, extack);
1da177e4
LT
3818}
3819
beb1afac 3820static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 3821{
beb1afac
DA
3822 int nexthop_len = 0;
3823
3824 if (rt->rt6i_nsiblings) {
3825 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
3826 + NLA_ALIGN(sizeof(struct rtnexthop))
3827 + nla_total_size(16) /* RTA_GATEWAY */
beb1afac
DA
3828 + lwtunnel_get_encap_size(rt->dst.lwtstate);
3829
3830 nexthop_len *= rt->rt6i_nsiblings;
3831 }
3832
339bf98f
TG
3833 return NLMSG_ALIGN(sizeof(struct rtmsg))
3834 + nla_total_size(16) /* RTA_SRC */
3835 + nla_total_size(16) /* RTA_DST */
3836 + nla_total_size(16) /* RTA_GATEWAY */
3837 + nla_total_size(16) /* RTA_PREFSRC */
3838 + nla_total_size(4) /* RTA_TABLE */
3839 + nla_total_size(4) /* RTA_IIF */
3840 + nla_total_size(4) /* RTA_OIF */
3841 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3842 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3843 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3844 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3845 + nla_total_size(1) /* RTA_PREF */
beb1afac
DA
3846 + lwtunnel_get_encap_size(rt->dst.lwtstate)
3847 + nexthop_len;
3848}
3849
3850static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 3851 unsigned int *flags, bool skip_oif)
beb1afac
DA
3852{
3853 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3854 *flags |= RTNH_F_LINKDOWN;
3855 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3856 *flags |= RTNH_F_DEAD;
3857 }
3858
3859 if (rt->rt6i_flags & RTF_GATEWAY) {
3860 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3861 goto nla_put_failure;
3862 }
3863
fe400799 3864 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
3865 *flags |= RTNH_F_OFFLOAD;
3866
5be083ce
DA
3867 /* not needed for multipath encoding b/c it has a rtnexthop struct */
3868 if (!skip_oif && rt->dst.dev &&
beb1afac
DA
3869 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3870 goto nla_put_failure;
3871
3872 if (rt->dst.lwtstate &&
3873 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3874 goto nla_put_failure;
3875
3876 return 0;
3877
3878nla_put_failure:
3879 return -EMSGSIZE;
3880}
3881
5be083ce 3882/* add multipath next hop */
beb1afac
DA
3883static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3884{
3885 struct rtnexthop *rtnh;
3886 unsigned int flags = 0;
3887
3888 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3889 if (!rtnh)
3890 goto nla_put_failure;
3891
3892 rtnh->rtnh_hops = 0;
3893 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3894
5be083ce 3895 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
3896 goto nla_put_failure;
3897
3898 rtnh->rtnh_flags = flags;
3899
3900 /* length of rtnetlink header + attributes */
3901 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3902
3903 return 0;
3904
3905nla_put_failure:
3906 return -EMSGSIZE;
339bf98f
TG
3907}
3908
191cd582
BH
3909static int rt6_fill_node(struct net *net,
3910 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 3911 struct in6_addr *dst, struct in6_addr *src,
15e47304 3912 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 3913 unsigned int flags)
1da177e4 3914{
4b32b5ad 3915 u32 metrics[RTAX_MAX];
1da177e4 3916 struct rtmsg *rtm;
2d7202bf 3917 struct nlmsghdr *nlh;
e3703b3d 3918 long expires;
9e762a4a 3919 u32 table;
1da177e4 3920
15e47304 3921 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 3922 if (!nlh)
26932566 3923 return -EMSGSIZE;
2d7202bf
TG
3924
3925 rtm = nlmsg_data(nlh);
1da177e4
LT
3926 rtm->rtm_family = AF_INET6;
3927 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3928 rtm->rtm_src_len = rt->rt6i_src.plen;
3929 rtm->rtm_tos = 0;
c71099ac 3930 if (rt->rt6i_table)
9e762a4a 3931 table = rt->rt6i_table->tb6_id;
c71099ac 3932 else
9e762a4a
PM
3933 table = RT6_TABLE_UNSPEC;
3934 rtm->rtm_table = table;
c78679e8
DM
3935 if (nla_put_u32(skb, RTA_TABLE, table))
3936 goto nla_put_failure;
ef2c7d7b
ND
3937 if (rt->rt6i_flags & RTF_REJECT) {
3938 switch (rt->dst.error) {
3939 case -EINVAL:
3940 rtm->rtm_type = RTN_BLACKHOLE;
3941 break;
3942 case -EACCES:
3943 rtm->rtm_type = RTN_PROHIBIT;
3944 break;
b4949ab2
ND
3945 case -EAGAIN:
3946 rtm->rtm_type = RTN_THROW;
3947 break;
ef2c7d7b
ND
3948 default:
3949 rtm->rtm_type = RTN_UNREACHABLE;
3950 break;
3951 }
3952 }
38308473 3953 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 3954 rtm->rtm_type = RTN_LOCAL;
4ee39733
DA
3955 else if (rt->rt6i_flags & RTF_ANYCAST)
3956 rtm->rtm_type = RTN_ANYCAST;
d1918542 3957 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
3958 rtm->rtm_type = RTN_LOCAL;
3959 else
3960 rtm->rtm_type = RTN_UNICAST;
3961 rtm->rtm_flags = 0;
3962 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3963 rtm->rtm_protocol = rt->rt6i_protocol;
1da177e4 3964
38308473 3965 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
3966 rtm->rtm_flags |= RTM_F_CLONED;
3967
3968 if (dst) {
930345ea 3969 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 3970 goto nla_put_failure;
1ab1457c 3971 rtm->rtm_dst_len = 128;
1da177e4 3972 } else if (rtm->rtm_dst_len)
930345ea 3973 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 3974 goto nla_put_failure;
1da177e4
LT
3975#ifdef CONFIG_IPV6_SUBTREES
3976 if (src) {
930345ea 3977 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 3978 goto nla_put_failure;
1ab1457c 3979 rtm->rtm_src_len = 128;
c78679e8 3980 } else if (rtm->rtm_src_len &&
930345ea 3981 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 3982 goto nla_put_failure;
1da177e4 3983#endif
7bc570c8
YH
3984 if (iif) {
3985#ifdef CONFIG_IPV6_MROUTE
3986 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
3987 int err = ip6mr_get_route(net, skb, rtm, portid);
3988
3989 if (err == 0)
3990 return 0;
3991 if (err < 0)
3992 goto nla_put_failure;
7bc570c8
YH
3993 } else
3994#endif
c78679e8
DM
3995 if (nla_put_u32(skb, RTA_IIF, iif))
3996 goto nla_put_failure;
7bc570c8 3997 } else if (dst) {
1da177e4 3998 struct in6_addr saddr_buf;
c78679e8 3999 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 4000 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4001 goto nla_put_failure;
1da177e4 4002 }
2d7202bf 4003
c3968a85
DW
4004 if (rt->rt6i_prefsrc.plen) {
4005 struct in6_addr saddr_buf;
4e3fd7a0 4006 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 4007 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4008 goto nla_put_failure;
c3968a85
DW
4009 }
4010
4b32b5ad
MKL
4011 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
4012 if (rt->rt6i_pmtu)
4013 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
4014 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
4015 goto nla_put_failure;
4016
c78679e8
DM
4017 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4018 goto nla_put_failure;
8253947e 4019
beb1afac
DA
4020 /* For multipath routes, walk the siblings list and add
4021 * each as a nexthop within RTA_MULTIPATH.
4022 */
4023 if (rt->rt6i_nsiblings) {
4024 struct rt6_info *sibling, *next_sibling;
4025 struct nlattr *mp;
4026
4027 mp = nla_nest_start(skb, RTA_MULTIPATH);
4028 if (!mp)
4029 goto nla_put_failure;
4030
4031 if (rt6_add_nexthop(skb, rt) < 0)
4032 goto nla_put_failure;
4033
4034 list_for_each_entry_safe(sibling, next_sibling,
4035 &rt->rt6i_siblings, rt6i_siblings) {
4036 if (rt6_add_nexthop(skb, sibling) < 0)
4037 goto nla_put_failure;
4038 }
4039
4040 nla_nest_end(skb, mp);
4041 } else {
5be083ce 4042 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4043 goto nla_put_failure;
4044 }
4045
8253947e 4046 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 4047
87a50699 4048 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 4049 goto nla_put_failure;
2d7202bf 4050
c78ba6d6
LR
4051 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4052 goto nla_put_failure;
4053
19e42e45 4054
053c095a
JB
4055 nlmsg_end(skb, nlh);
4056 return 0;
2d7202bf
TG
4057
4058nla_put_failure:
26932566
PM
4059 nlmsg_cancel(skb, nlh);
4060 return -EMSGSIZE;
1da177e4
LT
4061}
4062
1b43af54 4063int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
4064{
4065 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4066 struct net *net = arg->net;
4067
4068 if (rt == net->ipv6.ip6_null_entry)
4069 return 0;
1da177e4 4070
2d7202bf
TG
4071 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4072 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4073
4074 /* user wants prefix routes only */
4075 if (rtm->rtm_flags & RTM_F_PREFIX &&
4076 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4077 /* success since this is not a prefix route */
4078 return 1;
4079 }
4080 }
1da177e4 4081
1f17e2f2 4082 return rt6_fill_node(net,
191cd582 4083 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 4084 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
f8cfe2ce 4085 NLM_F_MULTI);
1da177e4
LT
4086}
4087
c21ef3e3
DA
4088static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4089 struct netlink_ext_ack *extack)
1da177e4 4090{
3b1e0a65 4091 struct net *net = sock_net(in_skb->sk);
ab364a6f 4092 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
4093 int err, iif = 0, oif = 0;
4094 struct dst_entry *dst;
ab364a6f 4095 struct rt6_info *rt;
1da177e4 4096 struct sk_buff *skb;
ab364a6f 4097 struct rtmsg *rtm;
4c9483b2 4098 struct flowi6 fl6;
18c3a61c 4099 bool fibmatch;
1da177e4 4100
fceb6435 4101 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4102 extack);
ab364a6f
TG
4103 if (err < 0)
4104 goto errout;
1da177e4 4105
ab364a6f 4106 err = -EINVAL;
4c9483b2 4107 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4108 rtm = nlmsg_data(nlh);
4109 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4110 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4111
ab364a6f
TG
4112 if (tb[RTA_SRC]) {
4113 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4114 goto errout;
4115
4e3fd7a0 4116 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4117 }
4118
4119 if (tb[RTA_DST]) {
4120 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4121 goto errout;
4122
4e3fd7a0 4123 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4124 }
4125
4126 if (tb[RTA_IIF])
4127 iif = nla_get_u32(tb[RTA_IIF]);
4128
4129 if (tb[RTA_OIF])
72331bc0 4130 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4131
2e47b291
LC
4132 if (tb[RTA_MARK])
4133 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4134
622ec2c9
LC
4135 if (tb[RTA_UID])
4136 fl6.flowi6_uid = make_kuid(current_user_ns(),
4137 nla_get_u32(tb[RTA_UID]));
4138 else
4139 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4140
1da177e4
LT
4141 if (iif) {
4142 struct net_device *dev;
72331bc0
SL
4143 int flags = 0;
4144
121622db
FW
4145 rcu_read_lock();
4146
4147 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4148 if (!dev) {
121622db 4149 rcu_read_unlock();
1da177e4 4150 err = -ENODEV;
ab364a6f 4151 goto errout;
1da177e4 4152 }
72331bc0
SL
4153
4154 fl6.flowi6_iif = iif;
4155
4156 if (!ipv6_addr_any(&fl6.saddr))
4157 flags |= RT6_LOOKUP_F_HAS_SADDR;
4158
18c3a61c
RP
4159 if (!fibmatch)
4160 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
401481e0
AB
4161 else
4162 dst = ip6_route_lookup(net, &fl6, 0);
121622db
FW
4163
4164 rcu_read_unlock();
72331bc0
SL
4165 } else {
4166 fl6.flowi6_oif = oif;
4167
18c3a61c
RP
4168 if (!fibmatch)
4169 dst = ip6_route_output(net, NULL, &fl6);
401481e0
AB
4170 else
4171 dst = ip6_route_lookup(net, &fl6, 0);
18c3a61c
RP
4172 }
4173
18c3a61c
RP
4174
4175 rt = container_of(dst, struct rt6_info, dst);
4176 if (rt->dst.error) {
4177 err = rt->dst.error;
4178 ip6_rt_put(rt);
4179 goto errout;
1da177e4
LT
4180 }
4181
9d6acb3b
WC
4182 if (rt == net->ipv6.ip6_null_entry) {
4183 err = rt->dst.error;
4184 ip6_rt_put(rt);
4185 goto errout;
4186 }
4187
ab364a6f 4188 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4189 if (!skb) {
94e187c0 4190 ip6_rt_put(rt);
ab364a6f
TG
4191 err = -ENOBUFS;
4192 goto errout;
4193 }
1da177e4 4194
d8d1f30b 4195 skb_dst_set(skb, &rt->dst);
18c3a61c
RP
4196 if (fibmatch)
4197 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
4198 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4199 nlh->nlmsg_seq, 0);
4200 else
4201 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
4202 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4203 nlh->nlmsg_seq, 0);
1da177e4 4204 if (err < 0) {
ab364a6f
TG
4205 kfree_skb(skb);
4206 goto errout;
1da177e4
LT
4207 }
4208
15e47304 4209 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4210errout:
1da177e4 4211 return err;
1da177e4
LT
4212}
4213
37a1d361
RP
4214void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4215 unsigned int nlm_flags)
1da177e4
LT
4216{
4217 struct sk_buff *skb;
5578689a 4218 struct net *net = info->nl_net;
528c4ceb
DL
4219 u32 seq;
4220 int err;
4221
4222 err = -ENOBUFS;
38308473 4223 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4224
19e42e45 4225 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4226 if (!skb)
21713ebc
TG
4227 goto errout;
4228
191cd582 4229 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
f8cfe2ce 4230 event, info->portid, seq, nlm_flags);
26932566
PM
4231 if (err < 0) {
4232 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4233 WARN_ON(err == -EMSGSIZE);
4234 kfree_skb(skb);
4235 goto errout;
4236 }
15e47304 4237 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4238 info->nlh, gfp_any());
4239 return;
21713ebc
TG
4240errout:
4241 if (err < 0)
5578689a 4242 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4243}
4244
8ed67789 4245static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4246 unsigned long event, void *ptr)
8ed67789 4247{
351638e7 4248 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4249 struct net *net = dev_net(dev);
8ed67789 4250
242d3a49
WC
4251 if (!(dev->flags & IFF_LOOPBACK))
4252 return NOTIFY_OK;
4253
4254 if (event == NETDEV_REGISTER) {
d8d1f30b 4255 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4256 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4257#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4258 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4259 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4260 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4261 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4262#endif
76da0704
WC
4263 } else if (event == NETDEV_UNREGISTER &&
4264 dev->reg_state != NETREG_UNREGISTERED) {
4265 /* NETDEV_UNREGISTER could be fired for multiple times by
4266 * netdev_wait_allrefs(). Make sure we only call this once.
4267 */
12d94a80 4268 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4269#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4270 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4271 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4272#endif
4273 }
4274
4275 return NOTIFY_OK;
4276}
4277
1da177e4
LT
4278/*
4279 * /proc
4280 */
4281
4282#ifdef CONFIG_PROC_FS
4283
33120b30
AD
4284static const struct file_operations ipv6_route_proc_fops = {
4285 .owner = THIS_MODULE,
4286 .open = ipv6_route_open,
4287 .read = seq_read,
4288 .llseek = seq_lseek,
8d2ca1d7 4289 .release = seq_release_net,
33120b30
AD
4290};
4291
1da177e4
LT
4292static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4293{
69ddb805 4294 struct net *net = (struct net *)seq->private;
1da177e4 4295 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4296 net->ipv6.rt6_stats->fib_nodes,
4297 net->ipv6.rt6_stats->fib_route_nodes,
4298 net->ipv6.rt6_stats->fib_rt_alloc,
4299 net->ipv6.rt6_stats->fib_rt_entries,
4300 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4301 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4302 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4303
4304 return 0;
4305}
4306
4307static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4308{
de05c557 4309 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4310}
4311
9a32144e 4312static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4313 .owner = THIS_MODULE,
4314 .open = rt6_stats_seq_open,
4315 .read = seq_read,
4316 .llseek = seq_lseek,
b6fcbdb4 4317 .release = single_release_net,
1da177e4
LT
4318};
4319#endif /* CONFIG_PROC_FS */
4320
4321#ifdef CONFIG_SYSCTL
4322
1da177e4 4323static
fe2c6338 4324int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4325 void __user *buffer, size_t *lenp, loff_t *ppos)
4326{
c486da34
LAG
4327 struct net *net;
4328 int delay;
4329 if (!write)
1da177e4 4330 return -EINVAL;
c486da34
LAG
4331
4332 net = (struct net *)ctl->extra1;
4333 delay = net->ipv6.sysctl.flush_delay;
4334 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4335 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4336 return 0;
1da177e4
LT
4337}
4338
fe2c6338 4339struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4340 {
1da177e4 4341 .procname = "flush",
4990509f 4342 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4343 .maxlen = sizeof(int),
89c8b3a1 4344 .mode = 0200,
6d9f239a 4345 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4346 },
4347 {
1da177e4 4348 .procname = "gc_thresh",
9a7ec3a9 4349 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4350 .maxlen = sizeof(int),
4351 .mode = 0644,
6d9f239a 4352 .proc_handler = proc_dointvec,
1da177e4
LT
4353 },
4354 {
1da177e4 4355 .procname = "max_size",
4990509f 4356 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
4357 .maxlen = sizeof(int),
4358 .mode = 0644,
6d9f239a 4359 .proc_handler = proc_dointvec,
1da177e4
LT
4360 },
4361 {
1da177e4 4362 .procname = "gc_min_interval",
4990509f 4363 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4364 .maxlen = sizeof(int),
4365 .mode = 0644,
6d9f239a 4366 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4367 },
4368 {
1da177e4 4369 .procname = "gc_timeout",
4990509f 4370 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
4371 .maxlen = sizeof(int),
4372 .mode = 0644,
6d9f239a 4373 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4374 },
4375 {
1da177e4 4376 .procname = "gc_interval",
4990509f 4377 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
4378 .maxlen = sizeof(int),
4379 .mode = 0644,
6d9f239a 4380 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4381 },
4382 {
1da177e4 4383 .procname = "gc_elasticity",
4990509f 4384 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
4385 .maxlen = sizeof(int),
4386 .mode = 0644,
f3d3f616 4387 .proc_handler = proc_dointvec,
1da177e4
LT
4388 },
4389 {
1da177e4 4390 .procname = "mtu_expires",
4990509f 4391 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
4392 .maxlen = sizeof(int),
4393 .mode = 0644,
6d9f239a 4394 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4395 },
4396 {
1da177e4 4397 .procname = "min_adv_mss",
4990509f 4398 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
4399 .maxlen = sizeof(int),
4400 .mode = 0644,
f3d3f616 4401 .proc_handler = proc_dointvec,
1da177e4
LT
4402 },
4403 {
1da177e4 4404 .procname = "gc_min_interval_ms",
4990509f 4405 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4406 .maxlen = sizeof(int),
4407 .mode = 0644,
6d9f239a 4408 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 4409 },
f8572d8f 4410 { }
1da177e4
LT
4411};
4412
2c8c1e72 4413struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
4414{
4415 struct ctl_table *table;
4416
4417 table = kmemdup(ipv6_route_table_template,
4418 sizeof(ipv6_route_table_template),
4419 GFP_KERNEL);
5ee09105
YH
4420
4421 if (table) {
4422 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 4423 table[0].extra1 = net;
86393e52 4424 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
4425 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
4426 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4427 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
4428 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
4429 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
4430 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
4431 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 4432 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
4433
4434 /* Don't export sysctls to unprivileged users */
4435 if (net->user_ns != &init_user_ns)
4436 table[0].procname = NULL;
5ee09105
YH
4437 }
4438
760f2d01
DL
4439 return table;
4440}
1da177e4
LT
4441#endif
4442
2c8c1e72 4443static int __net_init ip6_route_net_init(struct net *net)
cdb18761 4444{
633d424b 4445 int ret = -ENOMEM;
8ed67789 4446
86393e52
AD
4447 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
4448 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 4449
fc66f95c
ED
4450 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
4451 goto out_ip6_dst_ops;
4452
8ed67789
DL
4453 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
4454 sizeof(*net->ipv6.ip6_null_entry),
4455 GFP_KERNEL);
4456 if (!net->ipv6.ip6_null_entry)
fc66f95c 4457 goto out_ip6_dst_entries;
d8d1f30b 4458 net->ipv6.ip6_null_entry->dst.path =
8ed67789 4459 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 4460 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4461 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4462 ip6_template_metrics, true);
8ed67789
DL
4463
4464#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 4465 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
4466 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4467 sizeof(*net->ipv6.ip6_prohibit_entry),
4468 GFP_KERNEL);
68fffc67
PZ
4469 if (!net->ipv6.ip6_prohibit_entry)
4470 goto out_ip6_null_entry;
d8d1f30b 4471 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 4472 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 4473 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4474 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4475 ip6_template_metrics, true);
8ed67789
DL
4476
4477 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4478 sizeof(*net->ipv6.ip6_blk_hole_entry),
4479 GFP_KERNEL);
68fffc67
PZ
4480 if (!net->ipv6.ip6_blk_hole_entry)
4481 goto out_ip6_prohibit_entry;
d8d1f30b 4482 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 4483 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 4484 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4485 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4486 ip6_template_metrics, true);
8ed67789
DL
4487#endif
4488
b339a47c
PZ
4489 net->ipv6.sysctl.flush_delay = 0;
4490 net->ipv6.sysctl.ip6_rt_max_size = 4096;
4491 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4492 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4493 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4494 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4495 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4496 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4497
6891a346
BT
4498 net->ipv6.ip6_rt_gc_expire = 30*HZ;
4499
8ed67789
DL
4500 ret = 0;
4501out:
4502 return ret;
f2fc6a54 4503
68fffc67
PZ
4504#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4505out_ip6_prohibit_entry:
4506 kfree(net->ipv6.ip6_prohibit_entry);
4507out_ip6_null_entry:
4508 kfree(net->ipv6.ip6_null_entry);
4509#endif
fc66f95c
ED
4510out_ip6_dst_entries:
4511 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 4512out_ip6_dst_ops:
f2fc6a54 4513 goto out;
cdb18761
DL
4514}
4515
2c8c1e72 4516static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 4517{
8ed67789
DL
4518 kfree(net->ipv6.ip6_null_entry);
4519#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4520 kfree(net->ipv6.ip6_prohibit_entry);
4521 kfree(net->ipv6.ip6_blk_hole_entry);
4522#endif
41bb78b4 4523 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
4524}
4525
d189634e
TG
4526static int __net_init ip6_route_net_init_late(struct net *net)
4527{
4528#ifdef CONFIG_PROC_FS
d4beaa66
G
4529 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4530 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
4531#endif
4532 return 0;
4533}
4534
4535static void __net_exit ip6_route_net_exit_late(struct net *net)
4536{
4537#ifdef CONFIG_PROC_FS
ece31ffd
G
4538 remove_proc_entry("ipv6_route", net->proc_net);
4539 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
4540#endif
4541}
4542
cdb18761
DL
4543static struct pernet_operations ip6_route_net_ops = {
4544 .init = ip6_route_net_init,
4545 .exit = ip6_route_net_exit,
4546};
4547
c3426b47
DM
4548static int __net_init ipv6_inetpeer_init(struct net *net)
4549{
4550 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4551
4552 if (!bp)
4553 return -ENOMEM;
4554 inet_peer_base_init(bp);
4555 net->ipv6.peers = bp;
4556 return 0;
4557}
4558
4559static void __net_exit ipv6_inetpeer_exit(struct net *net)
4560{
4561 struct inet_peer_base *bp = net->ipv6.peers;
4562
4563 net->ipv6.peers = NULL;
56a6b248 4564 inetpeer_invalidate_tree(bp);
c3426b47
DM
4565 kfree(bp);
4566}
4567
2b823f72 4568static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
4569 .init = ipv6_inetpeer_init,
4570 .exit = ipv6_inetpeer_exit,
4571};
4572
d189634e
TG
4573static struct pernet_operations ip6_route_net_late_ops = {
4574 .init = ip6_route_net_init_late,
4575 .exit = ip6_route_net_exit_late,
4576};
4577
8ed67789
DL
4578static struct notifier_block ip6_route_dev_notifier = {
4579 .notifier_call = ip6_route_dev_notify,
242d3a49 4580 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
4581};
4582
2f460933
WC
4583void __init ip6_route_init_special_entries(void)
4584{
4585 /* Registering of the loopback is done before this portion of code,
4586 * the loopback reference in rt6_info will not be taken, do it
4587 * manually for init_net */
4588 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4589 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4590 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4591 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4592 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4593 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4594 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4595 #endif
4596}
4597
433d49c3 4598int __init ip6_route_init(void)
1da177e4 4599{
433d49c3 4600 int ret;
8d0b94af 4601 int cpu;
433d49c3 4602
9a7ec3a9
DL
4603 ret = -ENOMEM;
4604 ip6_dst_ops_template.kmem_cachep =
e5d679f3 4605 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 4606 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 4607 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 4608 goto out;
14e50e57 4609
fc66f95c 4610 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 4611 if (ret)
bdb3289f 4612 goto out_kmem_cache;
bdb3289f 4613
c3426b47
DM
4614 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4615 if (ret)
e8803b6c 4616 goto out_dst_entries;
2a0c451a 4617
7e52b33b
DM
4618 ret = register_pernet_subsys(&ip6_route_net_ops);
4619 if (ret)
4620 goto out_register_inetpeer;
c3426b47 4621
5dc121e9
AE
4622 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4623
e8803b6c 4624 ret = fib6_init();
433d49c3 4625 if (ret)
8ed67789 4626 goto out_register_subsys;
433d49c3 4627
433d49c3
DL
4628 ret = xfrm6_init();
4629 if (ret)
e8803b6c 4630 goto out_fib6_init;
c35b7e72 4631
433d49c3
DL
4632 ret = fib6_rules_init();
4633 if (ret)
4634 goto xfrm6_init;
7e5449c2 4635
d189634e
TG
4636 ret = register_pernet_subsys(&ip6_route_net_late_ops);
4637 if (ret)
4638 goto fib6_rules_init;
4639
433d49c3 4640 ret = -ENOBUFS;
b97bac64
FW
4641 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
4642 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
e3a22b7f
FW
4643 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
4644 RTNL_FLAG_DOIT_UNLOCKED))
d189634e 4645 goto out_register_late_subsys;
c127ea2c 4646
8ed67789 4647 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 4648 if (ret)
d189634e 4649 goto out_register_late_subsys;
8ed67789 4650
8d0b94af
MKL
4651 for_each_possible_cpu(cpu) {
4652 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4653
4654 INIT_LIST_HEAD(&ul->head);
4655 spin_lock_init(&ul->lock);
4656 }
4657
433d49c3
DL
4658out:
4659 return ret;
4660
d189634e
TG
4661out_register_late_subsys:
4662 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 4663fib6_rules_init:
433d49c3
DL
4664 fib6_rules_cleanup();
4665xfrm6_init:
433d49c3 4666 xfrm6_fini();
2a0c451a
TG
4667out_fib6_init:
4668 fib6_gc_cleanup();
8ed67789
DL
4669out_register_subsys:
4670 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
4671out_register_inetpeer:
4672 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
4673out_dst_entries:
4674 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 4675out_kmem_cache:
f2fc6a54 4676 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 4677 goto out;
1da177e4
LT
4678}
4679
4680void ip6_route_cleanup(void)
4681{
8ed67789 4682 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 4683 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 4684 fib6_rules_cleanup();
1da177e4 4685 xfrm6_fini();
1da177e4 4686 fib6_gc_cleanup();
c3426b47 4687 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 4688 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 4689 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 4690 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 4691}