ipv6: prepare fib6_locate() for exception table
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
83a09abd 81static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 82static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 83static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 84static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
85static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
569d3645 89static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
90
91static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 92static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 93static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 94static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 95static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
96static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
97 struct sk_buff *skb, u32 mtu);
98static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 struct sk_buff *skb);
4b32b5ad 100static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 101static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3
DA
102static size_t rt6_nlmsg_size(struct rt6_info *rt);
103static int rt6_fill_node(struct net *net,
104 struct sk_buff *skb, struct rt6_info *rt,
105 struct in6_addr *dst, struct in6_addr *src,
106 int iif, int type, u32 portid, u32 seq,
107 unsigned int flags);
35732d01
WW
108static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
109 struct in6_addr *daddr,
110 struct in6_addr *saddr);
1da177e4 111
70ceb4f5 112#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 113static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 114 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
115 const struct in6_addr *gwaddr,
116 struct net_device *dev,
95c96174 117 unsigned int pref);
efa2cea0 118static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 119 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
120 const struct in6_addr *gwaddr,
121 struct net_device *dev);
70ceb4f5
YH
122#endif
123
8d0b94af
MKL
124struct uncached_list {
125 spinlock_t lock;
126 struct list_head head;
127};
128
129static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
130
131static void rt6_uncached_list_add(struct rt6_info *rt)
132{
133 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
134
8d0b94af
MKL
135 rt->rt6i_uncached_list = ul;
136
137 spin_lock_bh(&ul->lock);
138 list_add_tail(&rt->rt6i_uncached, &ul->head);
139 spin_unlock_bh(&ul->lock);
140}
141
142static void rt6_uncached_list_del(struct rt6_info *rt)
143{
144 if (!list_empty(&rt->rt6i_uncached)) {
145 struct uncached_list *ul = rt->rt6i_uncached_list;
146
147 spin_lock_bh(&ul->lock);
148 list_del(&rt->rt6i_uncached);
149 spin_unlock_bh(&ul->lock);
150 }
151}
152
153static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154{
155 struct net_device *loopback_dev = net->loopback_dev;
156 int cpu;
157
e332bc67
EB
158 if (dev == loopback_dev)
159 return;
160
8d0b94af
MKL
161 for_each_possible_cpu(cpu) {
162 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 struct rt6_info *rt;
164
165 spin_lock_bh(&ul->lock);
166 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 struct inet6_dev *rt_idev = rt->rt6i_idev;
168 struct net_device *rt_dev = rt->dst.dev;
169
e332bc67 170 if (rt_idev->dev == dev) {
8d0b94af
MKL
171 rt->rt6i_idev = in6_dev_get(loopback_dev);
172 in6_dev_put(rt_idev);
173 }
174
e332bc67 175 if (rt_dev == dev) {
8d0b94af
MKL
176 rt->dst.dev = loopback_dev;
177 dev_hold(rt->dst.dev);
178 dev_put(rt_dev);
179 }
180 }
181 spin_unlock_bh(&ul->lock);
182 }
183}
184
d52d3997
MKL
185static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
186{
187 return dst_metrics_write_ptr(rt->dst.from);
188}
189
06582540
DM
190static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
191{
4b32b5ad 192 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 193
d52d3997
MKL
194 if (rt->rt6i_flags & RTF_PCPU)
195 return rt6_pcpu_cow_metrics(rt);
196 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
197 return NULL;
198 else
3b471175 199 return dst_cow_metrics_generic(dst, old);
06582540
DM
200}
201
f894cbf8
DM
202static inline const void *choose_neigh_daddr(struct rt6_info *rt,
203 struct sk_buff *skb,
204 const void *daddr)
39232973
DM
205{
206 struct in6_addr *p = &rt->rt6i_gateway;
207
a7563f34 208 if (!ipv6_addr_any(p))
39232973 209 return (const void *) p;
f894cbf8
DM
210 else if (skb)
211 return &ipv6_hdr(skb)->daddr;
39232973
DM
212 return daddr;
213}
214
f894cbf8
DM
215static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
216 struct sk_buff *skb,
217 const void *daddr)
d3aaeb38 218{
39232973
DM
219 struct rt6_info *rt = (struct rt6_info *) dst;
220 struct neighbour *n;
221
f894cbf8 222 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 223 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
224 if (n)
225 return n;
226 return neigh_create(&nd_tbl, daddr, dst->dev);
227}
228
63fca65d
JA
229static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
230{
231 struct net_device *dev = dst->dev;
232 struct rt6_info *rt = (struct rt6_info *)dst;
233
234 daddr = choose_neigh_daddr(rt, NULL, daddr);
235 if (!daddr)
236 return;
237 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
238 return;
239 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
240 return;
241 __ipv6_confirm_neigh(dev, daddr);
242}
243
9a7ec3a9 244static struct dst_ops ip6_dst_ops_template = {
1da177e4 245 .family = AF_INET6,
1da177e4
LT
246 .gc = ip6_dst_gc,
247 .gc_thresh = 1024,
248 .check = ip6_dst_check,
0dbaee3b 249 .default_advmss = ip6_default_advmss,
ebb762f2 250 .mtu = ip6_mtu,
06582540 251 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
252 .destroy = ip6_dst_destroy,
253 .ifdown = ip6_dst_ifdown,
254 .negative_advice = ip6_negative_advice,
255 .link_failure = ip6_link_failure,
256 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 257 .redirect = rt6_do_redirect,
9f8955cc 258 .local_out = __ip6_local_out,
d3aaeb38 259 .neigh_lookup = ip6_neigh_lookup,
63fca65d 260 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
261};
262
ebb762f2 263static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 264{
618f9bc7
SK
265 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
266
267 return mtu ? : dst->dev->mtu;
ec831ea7
RD
268}
269
6700c270
DM
270static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
271 struct sk_buff *skb, u32 mtu)
14e50e57
DM
272{
273}
274
6700c270
DM
275static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
276 struct sk_buff *skb)
b587ee3b
DM
277{
278}
279
14e50e57
DM
280static struct dst_ops ip6_dst_blackhole_ops = {
281 .family = AF_INET6,
14e50e57
DM
282 .destroy = ip6_dst_destroy,
283 .check = ip6_dst_check,
ebb762f2 284 .mtu = ip6_blackhole_mtu,
214f45c9 285 .default_advmss = ip6_default_advmss,
14e50e57 286 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 287 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 288 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 289 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
290};
291
62fa8a84 292static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 293 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
294};
295
fb0af4c7 296static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
297 .dst = {
298 .__refcnt = ATOMIC_INIT(1),
299 .__use = 1,
2c20cbd7 300 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 301 .error = -ENETUNREACH,
d8d1f30b
CG
302 .input = ip6_pkt_discard,
303 .output = ip6_pkt_discard_out,
1da177e4
LT
304 },
305 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 306 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
307 .rt6i_metric = ~(u32) 0,
308 .rt6i_ref = ATOMIC_INIT(1),
309};
310
101367c2
TG
311#ifdef CONFIG_IPV6_MULTIPLE_TABLES
312
fb0af4c7 313static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
314 .dst = {
315 .__refcnt = ATOMIC_INIT(1),
316 .__use = 1,
2c20cbd7 317 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 318 .error = -EACCES,
d8d1f30b
CG
319 .input = ip6_pkt_prohibit,
320 .output = ip6_pkt_prohibit_out,
101367c2
TG
321 },
322 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 323 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
324 .rt6i_metric = ~(u32) 0,
325 .rt6i_ref = ATOMIC_INIT(1),
326};
327
fb0af4c7 328static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
329 .dst = {
330 .__refcnt = ATOMIC_INIT(1),
331 .__use = 1,
2c20cbd7 332 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 333 .error = -EINVAL,
d8d1f30b 334 .input = dst_discard,
ede2059d 335 .output = dst_discard_out,
101367c2
TG
336 },
337 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 338 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
339 .rt6i_metric = ~(u32) 0,
340 .rt6i_ref = ATOMIC_INIT(1),
341};
342
343#endif
344
ebfa45f0
MKL
345static void rt6_info_init(struct rt6_info *rt)
346{
347 struct dst_entry *dst = &rt->dst;
348
349 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
350 INIT_LIST_HEAD(&rt->rt6i_siblings);
351 INIT_LIST_HEAD(&rt->rt6i_uncached);
352}
353
1da177e4 354/* allocate dst with ip6_dst_ops */
d52d3997
MKL
355static struct rt6_info *__ip6_dst_alloc(struct net *net,
356 struct net_device *dev,
ad706862 357 int flags)
1da177e4 358{
97bab73f 359 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 360 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 361
ebfa45f0
MKL
362 if (rt)
363 rt6_info_init(rt);
8104891b 364
cf911662 365 return rt;
1da177e4
LT
366}
367
9ab179d8
DA
368struct rt6_info *ip6_dst_alloc(struct net *net,
369 struct net_device *dev,
370 int flags)
d52d3997 371{
ad706862 372 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
373
374 if (rt) {
375 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
376 if (rt->rt6i_pcpu) {
377 int cpu;
378
379 for_each_possible_cpu(cpu) {
380 struct rt6_info **p;
381
382 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
383 /* no one shares rt */
384 *p = NULL;
385 }
386 } else {
587fea74 387 dst_release_immediate(&rt->dst);
d52d3997
MKL
388 return NULL;
389 }
390 }
391
392 return rt;
393}
9ab179d8 394EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 395
1da177e4
LT
396static void ip6_dst_destroy(struct dst_entry *dst)
397{
398 struct rt6_info *rt = (struct rt6_info *)dst;
35732d01 399 struct rt6_exception_bucket *bucket;
ecd98837 400 struct dst_entry *from = dst->from;
8d0b94af 401 struct inet6_dev *idev;
1da177e4 402
4b32b5ad 403 dst_destroy_metrics_generic(dst);
87775312 404 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
405 rt6_uncached_list_del(rt);
406
407 idev = rt->rt6i_idev;
38308473 408 if (idev) {
1da177e4
LT
409 rt->rt6i_idev = NULL;
410 in6_dev_put(idev);
1ab1457c 411 }
35732d01
WW
412 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
413 if (bucket) {
414 rt->rt6i_exception_bucket = NULL;
415 kfree(bucket);
416 }
1716a961 417
ecd98837
YH
418 dst->from = NULL;
419 dst_release(from);
b3419363
DM
420}
421
1da177e4
LT
422static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
423 int how)
424{
425 struct rt6_info *rt = (struct rt6_info *)dst;
426 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 427 struct net_device *loopback_dev =
c346dca1 428 dev_net(dev)->loopback_dev;
1da177e4 429
e5645f51
WW
430 if (idev && idev->dev != loopback_dev) {
431 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
432 if (loopback_idev) {
433 rt->rt6i_idev = loopback_idev;
434 in6_dev_put(idev);
97cac082 435 }
1da177e4
LT
436 }
437}
438
5973fb1e
MKL
439static bool __rt6_check_expired(const struct rt6_info *rt)
440{
441 if (rt->rt6i_flags & RTF_EXPIRES)
442 return time_after(jiffies, rt->dst.expires);
443 else
444 return false;
445}
446
a50feda5 447static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 448{
1716a961
G
449 if (rt->rt6i_flags & RTF_EXPIRES) {
450 if (time_after(jiffies, rt->dst.expires))
a50feda5 451 return true;
1716a961 452 } else if (rt->dst.from) {
1e2ea8ad
XL
453 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
454 rt6_check_expired((struct rt6_info *)rt->dst.from);
1716a961 455 }
a50feda5 456 return false;
1da177e4
LT
457}
458
51ebd318 459static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
460 struct flowi6 *fl6, int oif,
461 int strict)
51ebd318
ND
462{
463 struct rt6_info *sibling, *next_sibling;
464 int route_choosen;
465
b673d6cc
JS
466 /* We might have already computed the hash for ICMPv6 errors. In such
467 * case it will always be non-zero. Otherwise now is the time to do it.
468 */
469 if (!fl6->mp_hash)
470 fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
471
472 route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
51ebd318
ND
473 /* Don't change the route, if route_choosen == 0
474 * (siblings does not include ourself)
475 */
476 if (route_choosen)
477 list_for_each_entry_safe(sibling, next_sibling,
478 &match->rt6i_siblings, rt6i_siblings) {
479 route_choosen--;
480 if (route_choosen == 0) {
52bd4c0c
ND
481 if (rt6_score_route(sibling, oif, strict) < 0)
482 break;
51ebd318
ND
483 match = sibling;
484 break;
485 }
486 }
487 return match;
488}
489
1da177e4 490/*
c71099ac 491 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
492 */
493
8ed67789
DL
494static inline struct rt6_info *rt6_device_match(struct net *net,
495 struct rt6_info *rt,
b71d1d42 496 const struct in6_addr *saddr,
1da177e4 497 int oif,
d420895e 498 int flags)
1da177e4
LT
499{
500 struct rt6_info *local = NULL;
501 struct rt6_info *sprt;
502
dd3abc4e
YH
503 if (!oif && ipv6_addr_any(saddr))
504 goto out;
505
d8d1f30b 506 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 507 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
508
509 if (oif) {
1da177e4
LT
510 if (dev->ifindex == oif)
511 return sprt;
512 if (dev->flags & IFF_LOOPBACK) {
38308473 513 if (!sprt->rt6i_idev ||
1da177e4 514 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 515 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 516 continue;
17fb0b2b
DA
517 if (local &&
518 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
519 continue;
520 }
521 local = sprt;
522 }
dd3abc4e
YH
523 } else {
524 if (ipv6_chk_addr(net, saddr, dev,
525 flags & RT6_LOOKUP_F_IFACE))
526 return sprt;
1da177e4 527 }
dd3abc4e 528 }
1da177e4 529
dd3abc4e 530 if (oif) {
1da177e4
LT
531 if (local)
532 return local;
533
d420895e 534 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 535 return net->ipv6.ip6_null_entry;
1da177e4 536 }
dd3abc4e 537out:
1da177e4
LT
538 return rt;
539}
540
27097255 541#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
542struct __rt6_probe_work {
543 struct work_struct work;
544 struct in6_addr target;
545 struct net_device *dev;
546};
547
548static void rt6_probe_deferred(struct work_struct *w)
549{
550 struct in6_addr mcaddr;
551 struct __rt6_probe_work *work =
552 container_of(w, struct __rt6_probe_work, work);
553
554 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 555 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 556 dev_put(work->dev);
662f5533 557 kfree(work);
c2f17e82
HFS
558}
559
27097255
YH
560static void rt6_probe(struct rt6_info *rt)
561{
990edb42 562 struct __rt6_probe_work *work;
f2c31e32 563 struct neighbour *neigh;
27097255
YH
564 /*
565 * Okay, this does not seem to be appropriate
566 * for now, however, we need to check if it
567 * is really so; aka Router Reachability Probing.
568 *
569 * Router Reachability Probe MUST be rate-limited
570 * to no more than one per minute.
571 */
2152caea 572 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 573 return;
2152caea
YH
574 rcu_read_lock_bh();
575 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
576 if (neigh) {
8d6c31bf
MKL
577 if (neigh->nud_state & NUD_VALID)
578 goto out;
579
990edb42 580 work = NULL;
2152caea 581 write_lock(&neigh->lock);
990edb42
MKL
582 if (!(neigh->nud_state & NUD_VALID) &&
583 time_after(jiffies,
584 neigh->updated +
585 rt->rt6i_idev->cnf.rtr_probe_interval)) {
586 work = kmalloc(sizeof(*work), GFP_ATOMIC);
587 if (work)
588 __neigh_set_probe_once(neigh);
c2f17e82 589 }
2152caea 590 write_unlock(&neigh->lock);
990edb42
MKL
591 } else {
592 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 593 }
990edb42
MKL
594
595 if (work) {
596 INIT_WORK(&work->work, rt6_probe_deferred);
597 work->target = rt->rt6i_gateway;
598 dev_hold(rt->dst.dev);
599 work->dev = rt->dst.dev;
600 schedule_work(&work->work);
601 }
602
8d6c31bf 603out:
2152caea 604 rcu_read_unlock_bh();
27097255
YH
605}
606#else
607static inline void rt6_probe(struct rt6_info *rt)
608{
27097255
YH
609}
610#endif
611
1da177e4 612/*
554cfb7e 613 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 614 */
b6f99a21 615static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 616{
d1918542 617 struct net_device *dev = rt->dst.dev;
161980f4 618 if (!oif || dev->ifindex == oif)
554cfb7e 619 return 2;
161980f4
DM
620 if ((dev->flags & IFF_LOOPBACK) &&
621 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
622 return 1;
623 return 0;
554cfb7e 624}
1da177e4 625
afc154e9 626static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 627{
f2c31e32 628 struct neighbour *neigh;
afc154e9 629 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 630
4d0c5911
YH
631 if (rt->rt6i_flags & RTF_NONEXTHOP ||
632 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 633 return RT6_NUD_SUCCEED;
145a3621
YH
634
635 rcu_read_lock_bh();
636 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
637 if (neigh) {
638 read_lock(&neigh->lock);
554cfb7e 639 if (neigh->nud_state & NUD_VALID)
afc154e9 640 ret = RT6_NUD_SUCCEED;
398bcbeb 641#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 642 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 643 ret = RT6_NUD_SUCCEED;
7e980569
JB
644 else
645 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 646#endif
145a3621 647 read_unlock(&neigh->lock);
afc154e9
HFS
648 } else {
649 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 650 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 651 }
145a3621
YH
652 rcu_read_unlock_bh();
653
a5a81f0b 654 return ret;
1da177e4
LT
655}
656
554cfb7e
YH
657static int rt6_score_route(struct rt6_info *rt, int oif,
658 int strict)
1da177e4 659{
a5a81f0b 660 int m;
1ab1457c 661
4d0c5911 662 m = rt6_check_dev(rt, oif);
77d16f45 663 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 664 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
665#ifdef CONFIG_IPV6_ROUTER_PREF
666 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
667#endif
afc154e9
HFS
668 if (strict & RT6_LOOKUP_F_REACHABLE) {
669 int n = rt6_check_neigh(rt);
670 if (n < 0)
671 return n;
672 }
554cfb7e
YH
673 return m;
674}
675
f11e6659 676static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
677 int *mpri, struct rt6_info *match,
678 bool *do_rr)
554cfb7e 679{
f11e6659 680 int m;
afc154e9 681 bool match_do_rr = false;
35103d11
AG
682 struct inet6_dev *idev = rt->rt6i_idev;
683 struct net_device *dev = rt->dst.dev;
684
685 if (dev && !netif_carrier_ok(dev) &&
d5d32e4b
DA
686 idev->cnf.ignore_routes_with_linkdown &&
687 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 688 goto out;
f11e6659
DM
689
690 if (rt6_check_expired(rt))
691 goto out;
692
693 m = rt6_score_route(rt, oif, strict);
7e980569 694 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
695 match_do_rr = true;
696 m = 0; /* lowest valid score */
7e980569 697 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 698 goto out;
afc154e9
HFS
699 }
700
701 if (strict & RT6_LOOKUP_F_REACHABLE)
702 rt6_probe(rt);
f11e6659 703
7e980569 704 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 705 if (m > *mpri) {
afc154e9 706 *do_rr = match_do_rr;
f11e6659
DM
707 *mpri = m;
708 match = rt;
f11e6659 709 }
f11e6659
DM
710out:
711 return match;
712}
713
714static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
715 struct rt6_info *rr_head,
afc154e9
HFS
716 u32 metric, int oif, int strict,
717 bool *do_rr)
f11e6659 718{
9fbdcfaf 719 struct rt6_info *rt, *match, *cont;
554cfb7e 720 int mpri = -1;
1da177e4 721
f11e6659 722 match = NULL;
9fbdcfaf
SK
723 cont = NULL;
724 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
725 if (rt->rt6i_metric != metric) {
726 cont = rt;
727 break;
728 }
729
730 match = find_match(rt, oif, strict, &mpri, match, do_rr);
731 }
732
733 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
734 if (rt->rt6i_metric != metric) {
735 cont = rt;
736 break;
737 }
738
afc154e9 739 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
740 }
741
742 if (match || !cont)
743 return match;
744
745 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 746 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 747
f11e6659
DM
748 return match;
749}
1da177e4 750
f11e6659
DM
751static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
752{
753 struct rt6_info *match, *rt0;
8ed67789 754 struct net *net;
afc154e9 755 bool do_rr = false;
1da177e4 756
f11e6659
DM
757 rt0 = fn->rr_ptr;
758 if (!rt0)
759 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 760
afc154e9
HFS
761 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
762 &do_rr);
1da177e4 763
afc154e9 764 if (do_rr) {
d8d1f30b 765 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 766
554cfb7e 767 /* no entries matched; do round-robin */
f11e6659
DM
768 if (!next || next->rt6i_metric != rt0->rt6i_metric)
769 next = fn->leaf;
770
771 if (next != rt0)
772 fn->rr_ptr = next;
1da177e4 773 }
1da177e4 774
d1918542 775 net = dev_net(rt0->dst.dev);
a02cec21 776 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
777}
778
8b9df265
MKL
779static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
780{
781 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
782}
783
70ceb4f5
YH
784#ifdef CONFIG_IPV6_ROUTE_INFO
785int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 786 const struct in6_addr *gwaddr)
70ceb4f5 787{
c346dca1 788 struct net *net = dev_net(dev);
70ceb4f5
YH
789 struct route_info *rinfo = (struct route_info *) opt;
790 struct in6_addr prefix_buf, *prefix;
791 unsigned int pref;
4bed72e4 792 unsigned long lifetime;
70ceb4f5
YH
793 struct rt6_info *rt;
794
795 if (len < sizeof(struct route_info)) {
796 return -EINVAL;
797 }
798
799 /* Sanity check for prefix_len and length */
800 if (rinfo->length > 3) {
801 return -EINVAL;
802 } else if (rinfo->prefix_len > 128) {
803 return -EINVAL;
804 } else if (rinfo->prefix_len > 64) {
805 if (rinfo->length < 2) {
806 return -EINVAL;
807 }
808 } else if (rinfo->prefix_len > 0) {
809 if (rinfo->length < 1) {
810 return -EINVAL;
811 }
812 }
813
814 pref = rinfo->route_pref;
815 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 816 return -EINVAL;
70ceb4f5 817
4bed72e4 818 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
819
820 if (rinfo->length == 3)
821 prefix = (struct in6_addr *)rinfo->prefix;
822 else {
823 /* this function is safe */
824 ipv6_addr_prefix(&prefix_buf,
825 (struct in6_addr *)rinfo->prefix,
826 rinfo->prefix_len);
827 prefix = &prefix_buf;
828 }
829
f104a567
DJ
830 if (rinfo->prefix_len == 0)
831 rt = rt6_get_dflt_router(gwaddr, dev);
832 else
833 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 834 gwaddr, dev);
70ceb4f5
YH
835
836 if (rt && !lifetime) {
e0a1ad73 837 ip6_del_rt(rt);
70ceb4f5
YH
838 rt = NULL;
839 }
840
841 if (!rt && lifetime)
830218c1
DA
842 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
843 dev, pref);
70ceb4f5
YH
844 else if (rt)
845 rt->rt6i_flags = RTF_ROUTEINFO |
846 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
847
848 if (rt) {
1716a961
G
849 if (!addrconf_finite_timeout(lifetime))
850 rt6_clean_expires(rt);
851 else
852 rt6_set_expires(rt, jiffies + HZ * lifetime);
853
94e187c0 854 ip6_rt_put(rt);
70ceb4f5
YH
855 }
856 return 0;
857}
858#endif
859
a3c00e46
MKL
860static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
861 struct in6_addr *saddr)
862{
863 struct fib6_node *pn;
864 while (1) {
865 if (fn->fn_flags & RTN_TL_ROOT)
866 return NULL;
867 pn = fn->parent;
868 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
869 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
870 else
871 fn = pn;
872 if (fn->fn_flags & RTN_RTINFO)
873 return fn;
874 }
875}
c71099ac 876
8ed67789
DL
877static struct rt6_info *ip6_pol_route_lookup(struct net *net,
878 struct fib6_table *table,
4c9483b2 879 struct flowi6 *fl6, int flags)
1da177e4
LT
880{
881 struct fib6_node *fn;
882 struct rt6_info *rt;
883
c71099ac 884 read_lock_bh(&table->tb6_lock);
4c9483b2 885 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
886restart:
887 rt = fn->leaf;
4c9483b2 888 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 889 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 890 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
891 if (rt == net->ipv6.ip6_null_entry) {
892 fn = fib6_backtrack(fn, &fl6->saddr);
893 if (fn)
894 goto restart;
895 }
d8d1f30b 896 dst_use(&rt->dst, jiffies);
c71099ac 897 read_unlock_bh(&table->tb6_lock);
b811580d
DA
898
899 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
900
c71099ac
TG
901 return rt;
902
903}
904
67ba4152 905struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
906 int flags)
907{
908 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
909}
910EXPORT_SYMBOL_GPL(ip6_route_lookup);
911
9acd9f3a
YH
912struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
913 const struct in6_addr *saddr, int oif, int strict)
c71099ac 914{
4c9483b2
DM
915 struct flowi6 fl6 = {
916 .flowi6_oif = oif,
917 .daddr = *daddr,
c71099ac
TG
918 };
919 struct dst_entry *dst;
77d16f45 920 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 921
adaa70bb 922 if (saddr) {
4c9483b2 923 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
924 flags |= RT6_LOOKUP_F_HAS_SADDR;
925 }
926
4c9483b2 927 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
928 if (dst->error == 0)
929 return (struct rt6_info *) dst;
930
931 dst_release(dst);
932
1da177e4
LT
933 return NULL;
934}
7159039a
YH
935EXPORT_SYMBOL(rt6_lookup);
936
c71099ac 937/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
938 * It takes new route entry, the addition fails by any reason the
939 * route is released.
940 * Caller must hold dst before calling it.
1da177e4
LT
941 */
942
e5fd387a 943static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301
DA
944 struct mx6_config *mxc,
945 struct netlink_ext_ack *extack)
1da177e4
LT
946{
947 int err;
c71099ac 948 struct fib6_table *table;
1da177e4 949
c71099ac
TG
950 table = rt->rt6i_table;
951 write_lock_bh(&table->tb6_lock);
333c4301 952 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
c71099ac 953 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
954
955 return err;
956}
957
40e22e8f
TG
958int ip6_ins_rt(struct rt6_info *rt)
959{
e715b6d3
FW
960 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
961 struct mx6_config mxc = { .mx = NULL, };
962
1cfb71ee
WW
963 /* Hold dst to account for the reference from the fib6 tree */
964 dst_hold(&rt->dst);
333c4301 965 return __ip6_ins_rt(rt, &info, &mxc, NULL);
40e22e8f
TG
966}
967
4832c30d
DA
968/* called with rcu_lock held */
969static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
970{
971 struct net_device *dev = rt->dst.dev;
972
973 if (rt->rt6i_flags & RTF_LOCAL) {
974 /* for copies of local routes, dst->dev needs to be the
975 * device if it is a master device, the master device if
976 * device is enslaved, and the loopback as the default
977 */
978 if (netif_is_l3_slave(dev) &&
979 !rt6_need_strict(&rt->rt6i_dst.addr))
980 dev = l3mdev_master_dev_rcu(dev);
981 else if (!netif_is_l3_master(dev))
982 dev = dev_net(dev)->loopback_dev;
983 /* last case is netif_is_l3_master(dev) is true in which
984 * case we want dev returned to be dev
985 */
986 }
987
988 return dev;
989}
990
8b9df265
MKL
991static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
992 const struct in6_addr *daddr,
993 const struct in6_addr *saddr)
1da177e4 994{
4832c30d 995 struct net_device *dev;
1da177e4
LT
996 struct rt6_info *rt;
997
998 /*
999 * Clone the route.
1000 */
1001
d52d3997 1002 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 1003 ort = (struct rt6_info *)ort->dst.from;
1da177e4 1004
4832c30d
DA
1005 rcu_read_lock();
1006 dev = ip6_rt_get_dev_rcu(ort);
1007 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1008 rcu_read_unlock();
83a09abd
MKL
1009 if (!rt)
1010 return NULL;
1011
1012 ip6_rt_copy_init(rt, ort);
1013 rt->rt6i_flags |= RTF_CACHE;
1014 rt->rt6i_metric = 0;
1015 rt->dst.flags |= DST_HOST;
1016 rt->rt6i_dst.addr = *daddr;
1017 rt->rt6i_dst.plen = 128;
1da177e4 1018
83a09abd
MKL
1019 if (!rt6_is_gw_or_nonexthop(ort)) {
1020 if (ort->rt6i_dst.plen != 128 &&
1021 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1022 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1023#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1024 if (rt->rt6i_src.plen && saddr) {
1025 rt->rt6i_src.addr = *saddr;
1026 rt->rt6i_src.plen = 128;
8b9df265 1027 }
83a09abd 1028#endif
95a9a5ba 1029 }
1da177e4 1030
95a9a5ba
YH
1031 return rt;
1032}
1da177e4 1033
d52d3997
MKL
1034static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1035{
4832c30d 1036 struct net_device *dev;
d52d3997
MKL
1037 struct rt6_info *pcpu_rt;
1038
4832c30d
DA
1039 rcu_read_lock();
1040 dev = ip6_rt_get_dev_rcu(rt);
1041 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1042 rcu_read_unlock();
d52d3997
MKL
1043 if (!pcpu_rt)
1044 return NULL;
1045 ip6_rt_copy_init(pcpu_rt, rt);
1046 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1047 pcpu_rt->rt6i_flags |= RTF_PCPU;
1048 return pcpu_rt;
1049}
1050
1051/* It should be called with read_lock_bh(&tb6_lock) acquired */
1052static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1053{
a73e4195 1054 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1055
1056 p = this_cpu_ptr(rt->rt6i_pcpu);
1057 pcpu_rt = *p;
1058
a73e4195
MKL
1059 if (pcpu_rt) {
1060 dst_hold(&pcpu_rt->dst);
1061 rt6_dst_from_metrics_check(pcpu_rt);
1062 }
1063 return pcpu_rt;
1064}
1065
1066static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1067{
9c7370a1 1068 struct fib6_table *table = rt->rt6i_table;
a73e4195 1069 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1070
1071 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1072 if (!pcpu_rt) {
1073 struct net *net = dev_net(rt->dst.dev);
1074
9c7370a1
MKL
1075 dst_hold(&net->ipv6.ip6_null_entry->dst);
1076 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1077 }
1078
9c7370a1
MKL
1079 read_lock_bh(&table->tb6_lock);
1080 if (rt->rt6i_pcpu) {
1081 p = this_cpu_ptr(rt->rt6i_pcpu);
1082 prev = cmpxchg(p, NULL, pcpu_rt);
1083 if (prev) {
1084 /* If someone did it before us, return prev instead */
587fea74 1085 dst_release_immediate(&pcpu_rt->dst);
9c7370a1
MKL
1086 pcpu_rt = prev;
1087 }
1088 } else {
1089 /* rt has been removed from the fib6 tree
1090 * before we have a chance to acquire the read_lock.
1091 * In this case, don't brother to create a pcpu rt
1092 * since rt is going away anyway. The next
1093 * dst_check() will trigger a re-lookup.
1094 */
587fea74 1095 dst_release_immediate(&pcpu_rt->dst);
9c7370a1 1096 pcpu_rt = rt;
d52d3997 1097 }
d52d3997
MKL
1098 dst_hold(&pcpu_rt->dst);
1099 rt6_dst_from_metrics_check(pcpu_rt);
9c7370a1 1100 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1101 return pcpu_rt;
1102}
1103
35732d01
WW
1104/* exception hash table implementation
1105 */
1106static DEFINE_SPINLOCK(rt6_exception_lock);
1107
1108/* Remove rt6_ex from hash table and free the memory
1109 * Caller must hold rt6_exception_lock
1110 */
1111static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1112 struct rt6_exception *rt6_ex)
1113{
1114 if (!bucket || !rt6_ex)
1115 return;
1116 rt6_ex->rt6i->rt6i_node = NULL;
1117 hlist_del_rcu(&rt6_ex->hlist);
1118 rt6_release(rt6_ex->rt6i);
1119 kfree_rcu(rt6_ex, rcu);
1120 WARN_ON_ONCE(!bucket->depth);
1121 bucket->depth--;
1122}
1123
1124/* Remove oldest rt6_ex in bucket and free the memory
1125 * Caller must hold rt6_exception_lock
1126 */
1127static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1128{
1129 struct rt6_exception *rt6_ex, *oldest = NULL;
1130
1131 if (!bucket)
1132 return;
1133
1134 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1135 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1136 oldest = rt6_ex;
1137 }
1138 rt6_remove_exception(bucket, oldest);
1139}
1140
1141static u32 rt6_exception_hash(const struct in6_addr *dst,
1142 const struct in6_addr *src)
1143{
1144 static u32 seed __read_mostly;
1145 u32 val;
1146
1147 net_get_random_once(&seed, sizeof(seed));
1148 val = jhash(dst, sizeof(*dst), seed);
1149
1150#ifdef CONFIG_IPV6_SUBTREES
1151 if (src)
1152 val = jhash(src, sizeof(*src), val);
1153#endif
1154 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1155}
1156
1157/* Helper function to find the cached rt in the hash table
1158 * and update bucket pointer to point to the bucket for this
1159 * (daddr, saddr) pair
1160 * Caller must hold rt6_exception_lock
1161 */
1162static struct rt6_exception *
1163__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1164 const struct in6_addr *daddr,
1165 const struct in6_addr *saddr)
1166{
1167 struct rt6_exception *rt6_ex;
1168 u32 hval;
1169
1170 if (!(*bucket) || !daddr)
1171 return NULL;
1172
1173 hval = rt6_exception_hash(daddr, saddr);
1174 *bucket += hval;
1175
1176 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1177 struct rt6_info *rt6 = rt6_ex->rt6i;
1178 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1179
1180#ifdef CONFIG_IPV6_SUBTREES
1181 if (matched && saddr)
1182 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1183#endif
1184 if (matched)
1185 return rt6_ex;
1186 }
1187 return NULL;
1188}
1189
1190/* Helper function to find the cached rt in the hash table
1191 * and update bucket pointer to point to the bucket for this
1192 * (daddr, saddr) pair
1193 * Caller must hold rcu_read_lock()
1194 */
1195static struct rt6_exception *
1196__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1197 const struct in6_addr *daddr,
1198 const struct in6_addr *saddr)
1199{
1200 struct rt6_exception *rt6_ex;
1201 u32 hval;
1202
1203 WARN_ON_ONCE(!rcu_read_lock_held());
1204
1205 if (!(*bucket) || !daddr)
1206 return NULL;
1207
1208 hval = rt6_exception_hash(daddr, saddr);
1209 *bucket += hval;
1210
1211 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1212 struct rt6_info *rt6 = rt6_ex->rt6i;
1213 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1214
1215#ifdef CONFIG_IPV6_SUBTREES
1216 if (matched && saddr)
1217 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1218#endif
1219 if (matched)
1220 return rt6_ex;
1221 }
1222 return NULL;
1223}
1224
1225static int rt6_insert_exception(struct rt6_info *nrt,
1226 struct rt6_info *ort)
1227{
1228 struct rt6_exception_bucket *bucket;
1229 struct in6_addr *src_key = NULL;
1230 struct rt6_exception *rt6_ex;
1231 int err = 0;
1232
1233 /* ort can't be a cache or pcpu route */
1234 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
1235 ort = (struct rt6_info *)ort->dst.from;
1236 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1237
1238 spin_lock_bh(&rt6_exception_lock);
1239
1240 if (ort->exception_bucket_flushed) {
1241 err = -EINVAL;
1242 goto out;
1243 }
1244
1245 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1246 lockdep_is_held(&rt6_exception_lock));
1247 if (!bucket) {
1248 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1249 GFP_ATOMIC);
1250 if (!bucket) {
1251 err = -ENOMEM;
1252 goto out;
1253 }
1254 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1255 }
1256
1257#ifdef CONFIG_IPV6_SUBTREES
1258 /* rt6i_src.plen != 0 indicates ort is in subtree
1259 * and exception table is indexed by a hash of
1260 * both rt6i_dst and rt6i_src.
1261 * Otherwise, the exception table is indexed by
1262 * a hash of only rt6i_dst.
1263 */
1264 if (ort->rt6i_src.plen)
1265 src_key = &nrt->rt6i_src.addr;
1266#endif
60006a48
WW
1267
1268 /* Update rt6i_prefsrc as it could be changed
1269 * in rt6_remove_prefsrc()
1270 */
1271 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
f5bbe7ee
WW
1272 /* rt6_mtu_change() might lower mtu on ort.
1273 * Only insert this exception route if its mtu
1274 * is less than ort's mtu value.
1275 */
1276 if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
1277 err = -EINVAL;
1278 goto out;
1279 }
60006a48 1280
35732d01
WW
1281 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1282 src_key);
1283 if (rt6_ex)
1284 rt6_remove_exception(bucket, rt6_ex);
1285
1286 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1287 if (!rt6_ex) {
1288 err = -ENOMEM;
1289 goto out;
1290 }
1291 rt6_ex->rt6i = nrt;
1292 rt6_ex->stamp = jiffies;
1293 atomic_inc(&nrt->rt6i_ref);
1294 nrt->rt6i_node = ort->rt6i_node;
1295 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1296 bucket->depth++;
1297
1298 if (bucket->depth > FIB6_MAX_DEPTH)
1299 rt6_exception_remove_oldest(bucket);
1300
1301out:
1302 spin_unlock_bh(&rt6_exception_lock);
1303
1304 /* Update fn->fn_sernum to invalidate all cached dst */
1305 if (!err)
1306 fib6_update_sernum(ort);
1307
1308 return err;
1309}
1310
1311void rt6_flush_exceptions(struct rt6_info *rt)
1312{
1313 struct rt6_exception_bucket *bucket;
1314 struct rt6_exception *rt6_ex;
1315 struct hlist_node *tmp;
1316 int i;
1317
1318 spin_lock_bh(&rt6_exception_lock);
1319 /* Prevent rt6_insert_exception() to recreate the bucket list */
1320 rt->exception_bucket_flushed = 1;
1321
1322 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1323 lockdep_is_held(&rt6_exception_lock));
1324 if (!bucket)
1325 goto out;
1326
1327 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1328 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1329 rt6_remove_exception(bucket, rt6_ex);
1330 WARN_ON_ONCE(bucket->depth);
1331 bucket++;
1332 }
1333
1334out:
1335 spin_unlock_bh(&rt6_exception_lock);
1336}
1337
1338/* Find cached rt in the hash table inside passed in rt
1339 * Caller has to hold rcu_read_lock()
1340 */
1341static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1342 struct in6_addr *daddr,
1343 struct in6_addr *saddr)
1344{
1345 struct rt6_exception_bucket *bucket;
1346 struct in6_addr *src_key = NULL;
1347 struct rt6_exception *rt6_ex;
1348 struct rt6_info *res = NULL;
1349
1350 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1351
1352#ifdef CONFIG_IPV6_SUBTREES
1353 /* rt6i_src.plen != 0 indicates rt is in subtree
1354 * and exception table is indexed by a hash of
1355 * both rt6i_dst and rt6i_src.
1356 * Otherwise, the exception table is indexed by
1357 * a hash of only rt6i_dst.
1358 */
1359 if (rt->rt6i_src.plen)
1360 src_key = saddr;
1361#endif
1362 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1363
1364 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1365 res = rt6_ex->rt6i;
1366
1367 return res;
1368}
1369
1370/* Remove the passed in cached rt from the hash table that contains it */
1371int rt6_remove_exception_rt(struct rt6_info *rt)
1372{
1373 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1374 struct rt6_exception_bucket *bucket;
1375 struct in6_addr *src_key = NULL;
1376 struct rt6_exception *rt6_ex;
1377 int err;
1378
1379 if (!from ||
1380 !(rt->rt6i_flags | RTF_CACHE))
1381 return -EINVAL;
1382
1383 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1384 return -ENOENT;
1385
1386 spin_lock_bh(&rt6_exception_lock);
1387 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1388 lockdep_is_held(&rt6_exception_lock));
1389#ifdef CONFIG_IPV6_SUBTREES
1390 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1391 * and exception table is indexed by a hash of
1392 * both rt6i_dst and rt6i_src.
1393 * Otherwise, the exception table is indexed by
1394 * a hash of only rt6i_dst.
1395 */
1396 if (from->rt6i_src.plen)
1397 src_key = &rt->rt6i_src.addr;
1398#endif
1399 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1400 &rt->rt6i_dst.addr,
1401 src_key);
1402 if (rt6_ex) {
1403 rt6_remove_exception(bucket, rt6_ex);
1404 err = 0;
1405 } else {
1406 err = -ENOENT;
1407 }
1408
1409 spin_unlock_bh(&rt6_exception_lock);
1410 return err;
1411}
1412
1413/* Find rt6_ex which contains the passed in rt cache and
1414 * refresh its stamp
1415 */
1416static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1417{
1418 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1419 struct rt6_exception_bucket *bucket;
1420 struct in6_addr *src_key = NULL;
1421 struct rt6_exception *rt6_ex;
1422
1423 if (!from ||
1424 !(rt->rt6i_flags | RTF_CACHE))
1425 return;
1426
1427 rcu_read_lock();
1428 bucket = rcu_dereference(from->rt6i_exception_bucket);
1429
1430#ifdef CONFIG_IPV6_SUBTREES
1431 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1432 * and exception table is indexed by a hash of
1433 * both rt6i_dst and rt6i_src.
1434 * Otherwise, the exception table is indexed by
1435 * a hash of only rt6i_dst.
1436 */
1437 if (from->rt6i_src.plen)
1438 src_key = &rt->rt6i_src.addr;
1439#endif
1440 rt6_ex = __rt6_find_exception_rcu(&bucket,
1441 &rt->rt6i_dst.addr,
1442 src_key);
1443 if (rt6_ex)
1444 rt6_ex->stamp = jiffies;
1445
1446 rcu_read_unlock();
1447}
1448
60006a48
WW
1449static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1450{
1451 struct rt6_exception_bucket *bucket;
1452 struct rt6_exception *rt6_ex;
1453 int i;
1454
1455 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1456 lockdep_is_held(&rt6_exception_lock));
1457
1458 if (bucket) {
1459 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1460 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1461 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1462 }
1463 bucket++;
1464 }
1465 }
1466}
1467
f5bbe7ee
WW
1468static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
1469{
1470 struct rt6_exception_bucket *bucket;
1471 struct rt6_exception *rt6_ex;
1472 int i;
1473
1474 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1475 lockdep_is_held(&rt6_exception_lock));
1476
1477 if (bucket) {
1478 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1479 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1480 struct rt6_info *entry = rt6_ex->rt6i;
1481 /* For RTF_CACHE with rt6i_pmtu == 0
1482 * (i.e. a redirected route),
1483 * the metrics of its rt->dst.from has already
1484 * been updated.
1485 */
1486 if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
1487 entry->rt6i_pmtu = mtu;
1488 }
1489 bucket++;
1490 }
1491 }
1492}
1493
b16cb459
WW
1494#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1495
1496static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1497 struct in6_addr *gateway)
1498{
1499 struct rt6_exception_bucket *bucket;
1500 struct rt6_exception *rt6_ex;
1501 struct hlist_node *tmp;
1502 int i;
1503
1504 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1505 return;
1506
1507 spin_lock_bh(&rt6_exception_lock);
1508 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1509 lockdep_is_held(&rt6_exception_lock));
1510
1511 if (bucket) {
1512 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1513 hlist_for_each_entry_safe(rt6_ex, tmp,
1514 &bucket->chain, hlist) {
1515 struct rt6_info *entry = rt6_ex->rt6i;
1516
1517 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1518 RTF_CACHE_GATEWAY &&
1519 ipv6_addr_equal(gateway,
1520 &entry->rt6i_gateway)) {
1521 rt6_remove_exception(bucket, rt6_ex);
1522 }
1523 }
1524 bucket++;
1525 }
1526 }
1527
1528 spin_unlock_bh(&rt6_exception_lock);
1529}
1530
c757faa8
WW
1531static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1532 struct rt6_exception *rt6_ex,
1533 struct fib6_gc_args *gc_args,
1534 unsigned long now)
1535{
1536 struct rt6_info *rt = rt6_ex->rt6i;
1537
1538 if (atomic_read(&rt->dst.__refcnt) == 1 &&
1539 time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1540 RT6_TRACE("aging clone %p\n", rt);
1541 rt6_remove_exception(bucket, rt6_ex);
1542 return;
1543 } else if (rt->rt6i_flags & RTF_GATEWAY) {
1544 struct neighbour *neigh;
1545 __u8 neigh_flags = 0;
1546
1547 neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
1548 if (neigh) {
1549 neigh_flags = neigh->flags;
1550 neigh_release(neigh);
1551 }
1552 if (!(neigh_flags & NTF_ROUTER)) {
1553 RT6_TRACE("purging route %p via non-router but gateway\n",
1554 rt);
1555 rt6_remove_exception(bucket, rt6_ex);
1556 return;
1557 }
1558 }
1559 gc_args->more++;
1560}
1561
1562void rt6_age_exceptions(struct rt6_info *rt,
1563 struct fib6_gc_args *gc_args,
1564 unsigned long now)
1565{
1566 struct rt6_exception_bucket *bucket;
1567 struct rt6_exception *rt6_ex;
1568 struct hlist_node *tmp;
1569 int i;
1570
1571 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1572 return;
1573
1574 spin_lock_bh(&rt6_exception_lock);
1575 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1576 lockdep_is_held(&rt6_exception_lock));
1577
1578 if (bucket) {
1579 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1580 hlist_for_each_entry_safe(rt6_ex, tmp,
1581 &bucket->chain, hlist) {
1582 rt6_age_examine_exception(bucket, rt6_ex,
1583 gc_args, now);
1584 }
1585 bucket++;
1586 }
1587 }
1588 spin_unlock_bh(&rt6_exception_lock);
1589}
1590
9ff74384
DA
1591struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1592 int oif, struct flowi6 *fl6, int flags)
1da177e4 1593{
367efcb9 1594 struct fib6_node *fn, *saved_fn;
45e4fd26 1595 struct rt6_info *rt;
c71099ac 1596 int strict = 0;
1da177e4 1597
77d16f45 1598 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1599 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1600 if (net->ipv6.devconf_all->forwarding == 0)
1601 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1602
c71099ac 1603 read_lock_bh(&table->tb6_lock);
1da177e4 1604
4c9483b2 1605 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1606 saved_fn = fn;
1da177e4 1607
ca254490
DA
1608 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1609 oif = 0;
1610
a3c00e46 1611redo_rt6_select:
367efcb9 1612 rt = rt6_select(fn, oif, strict);
52bd4c0c 1613 if (rt->rt6i_nsiblings)
367efcb9 1614 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1615 if (rt == net->ipv6.ip6_null_entry) {
1616 fn = fib6_backtrack(fn, &fl6->saddr);
1617 if (fn)
1618 goto redo_rt6_select;
367efcb9
MKL
1619 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1620 /* also consider unreachable route */
1621 strict &= ~RT6_LOOKUP_F_REACHABLE;
1622 fn = saved_fn;
1623 goto redo_rt6_select;
367efcb9 1624 }
a3c00e46
MKL
1625 }
1626
fb9de91e 1627
3da59bd9 1628 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1629 dst_use(&rt->dst, jiffies);
1630 read_unlock_bh(&table->tb6_lock);
1631
1632 rt6_dst_from_metrics_check(rt);
b811580d
DA
1633
1634 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
d52d3997 1635 return rt;
3da59bd9
MKL
1636 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1637 !(rt->rt6i_flags & RTF_GATEWAY))) {
1638 /* Create a RTF_CACHE clone which will not be
1639 * owned by the fib6 tree. It is for the special case where
1640 * the daddr in the skb during the neighbor look-up is different
1641 * from the fl6->daddr used to look-up route here.
1642 */
1643
1644 struct rt6_info *uncached_rt;
1645
d52d3997
MKL
1646 dst_use(&rt->dst, jiffies);
1647 read_unlock_bh(&table->tb6_lock);
1648
3da59bd9
MKL
1649 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1650 dst_release(&rt->dst);
c71099ac 1651
1cfb71ee
WW
1652 if (uncached_rt) {
1653 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1654 * No need for another dst_hold()
1655 */
8d0b94af 1656 rt6_uncached_list_add(uncached_rt);
1cfb71ee 1657 } else {
3da59bd9 1658 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1659 dst_hold(&uncached_rt->dst);
1660 }
b811580d
DA
1661
1662 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
3da59bd9 1663 return uncached_rt;
3da59bd9 1664
d52d3997
MKL
1665 } else {
1666 /* Get a percpu copy */
1667
1668 struct rt6_info *pcpu_rt;
1669
1670 rt->dst.lastuse = jiffies;
1671 rt->dst.__use++;
1672 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1673
9c7370a1
MKL
1674 if (pcpu_rt) {
1675 read_unlock_bh(&table->tb6_lock);
1676 } else {
1677 /* We have to do the read_unlock first
1678 * because rt6_make_pcpu_route() may trigger
1679 * ip6_dst_gc() which will take the write_lock.
1680 */
1681 dst_hold(&rt->dst);
1682 read_unlock_bh(&table->tb6_lock);
a73e4195 1683 pcpu_rt = rt6_make_pcpu_route(rt);
9c7370a1
MKL
1684 dst_release(&rt->dst);
1685 }
d52d3997 1686
b811580d 1687 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
d52d3997 1688 return pcpu_rt;
9c7370a1 1689
d52d3997 1690 }
1da177e4 1691}
9ff74384 1692EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1693
8ed67789 1694static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1695 struct flowi6 *fl6, int flags)
4acad72d 1696{
4c9483b2 1697 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1698}
1699
d409b847
MB
1700struct dst_entry *ip6_route_input_lookup(struct net *net,
1701 struct net_device *dev,
1702 struct flowi6 *fl6, int flags)
72331bc0
SL
1703{
1704 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1705 flags |= RT6_LOOKUP_F_IFACE;
1706
1707 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1708}
d409b847 1709EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1710
23aebdac
JS
1711static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1712 struct flow_keys *keys)
1713{
1714 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1715 const struct ipv6hdr *key_iph = outer_iph;
1716 const struct ipv6hdr *inner_iph;
1717 const struct icmp6hdr *icmph;
1718 struct ipv6hdr _inner_iph;
1719
1720 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1721 goto out;
1722
1723 icmph = icmp6_hdr(skb);
1724 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1725 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1726 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1727 icmph->icmp6_type != ICMPV6_PARAMPROB)
1728 goto out;
1729
1730 inner_iph = skb_header_pointer(skb,
1731 skb_transport_offset(skb) + sizeof(*icmph),
1732 sizeof(_inner_iph), &_inner_iph);
1733 if (!inner_iph)
1734 goto out;
1735
1736 key_iph = inner_iph;
1737out:
1738 memset(keys, 0, sizeof(*keys));
1739 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1740 keys->addrs.v6addrs.src = key_iph->saddr;
1741 keys->addrs.v6addrs.dst = key_iph->daddr;
1742 keys->tags.flow_label = ip6_flowinfo(key_iph);
1743 keys->basic.ip_proto = key_iph->nexthdr;
1744}
1745
1746/* if skb is set it will be used and fl6 can be NULL */
1747u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1748{
1749 struct flow_keys hash_keys;
1750
1751 if (skb) {
1752 ip6_multipath_l3_keys(skb, &hash_keys);
1753 return flow_hash_from_keys(&hash_keys);
1754 }
1755
1756 return get_hash_from_flowi6(fl6);
1757}
1758
c71099ac
TG
1759void ip6_route_input(struct sk_buff *skb)
1760{
b71d1d42 1761 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1762 struct net *net = dev_net(skb->dev);
adaa70bb 1763 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1764 struct ip_tunnel_info *tun_info;
4c9483b2 1765 struct flowi6 fl6 = {
e0d56fdd 1766 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
1767 .daddr = iph->daddr,
1768 .saddr = iph->saddr,
6502ca52 1769 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1770 .flowi6_mark = skb->mark,
1771 .flowi6_proto = iph->nexthdr,
c71099ac 1772 };
adaa70bb 1773
904af04d 1774 tun_info = skb_tunnel_info(skb);
46fa062a 1775 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1776 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
23aebdac
JS
1777 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1778 fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
06e9d040 1779 skb_dst_drop(skb);
72331bc0 1780 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1781}
1782
8ed67789 1783static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1784 struct flowi6 *fl6, int flags)
1da177e4 1785{
4c9483b2 1786 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1787}
1788
6f21c96a
PA
1789struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1790 struct flowi6 *fl6, int flags)
c71099ac 1791{
d46a9d67 1792 bool any_src;
c71099ac 1793
4c1feac5
DA
1794 if (rt6_need_strict(&fl6->daddr)) {
1795 struct dst_entry *dst;
1796
1797 dst = l3mdev_link_scope_lookup(net, fl6);
1798 if (dst)
1799 return dst;
1800 }
ca254490 1801
1fb9489b 1802 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1803
d46a9d67 1804 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1805 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1806 (fl6->flowi6_oif && any_src))
77d16f45 1807 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1808
d46a9d67 1809 if (!any_src)
adaa70bb 1810 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1811 else if (sk)
1812 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1813
4c9483b2 1814 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1815}
6f21c96a 1816EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1817
2774c131 1818struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1819{
5c1e6aa3 1820 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 1821 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
1822 struct dst_entry *new = NULL;
1823
1dbe3252 1824 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
b2a9c0ed 1825 DST_OBSOLETE_NONE, 0);
14e50e57 1826 if (rt) {
0a1f5962 1827 rt6_info_init(rt);
8104891b 1828
0a1f5962 1829 new = &rt->dst;
14e50e57 1830 new->__use = 1;
352e512c 1831 new->input = dst_discard;
ede2059d 1832 new->output = dst_discard_out;
14e50e57 1833
0a1f5962 1834 dst_copy_metrics(new, &ort->dst);
14e50e57 1835
1dbe3252 1836 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 1837 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1838 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1839 rt->rt6i_metric = 0;
1840
1841 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1842#ifdef CONFIG_IPV6_SUBTREES
1843 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1844#endif
14e50e57
DM
1845 }
1846
69ead7af
DM
1847 dst_release(dst_orig);
1848 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1849}
14e50e57 1850
1da177e4
LT
1851/*
1852 * Destination cache support functions
1853 */
1854
4b32b5ad
MKL
1855static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1856{
1857 if (rt->dst.from &&
1858 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1859 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1860}
1861
3da59bd9
MKL
1862static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1863{
36143645 1864 u32 rt_cookie = 0;
c5cff856
WW
1865
1866 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
3da59bd9
MKL
1867 return NULL;
1868
1869 if (rt6_check_expired(rt))
1870 return NULL;
1871
1872 return &rt->dst;
1873}
1874
1875static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1876{
5973fb1e
MKL
1877 if (!__rt6_check_expired(rt) &&
1878 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3da59bd9
MKL
1879 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1880 return &rt->dst;
1881 else
1882 return NULL;
1883}
1884
1da177e4
LT
1885static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1886{
1887 struct rt6_info *rt;
1888
1889 rt = (struct rt6_info *) dst;
1890
6f3118b5
ND
1891 /* All IPV6 dsts are created with ->obsolete set to the value
1892 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1893 * into this function always.
1894 */
e3bc10bd 1895
4b32b5ad
MKL
1896 rt6_dst_from_metrics_check(rt);
1897
02bcf4e0 1898 if (rt->rt6i_flags & RTF_PCPU ||
a4c2fd7f 1899 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
3da59bd9
MKL
1900 return rt6_dst_from_check(rt, cookie);
1901 else
1902 return rt6_check(rt, cookie);
1da177e4
LT
1903}
1904
1905static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1906{
1907 struct rt6_info *rt = (struct rt6_info *) dst;
1908
1909 if (rt) {
54c1a859
YH
1910 if (rt->rt6i_flags & RTF_CACHE) {
1911 if (rt6_check_expired(rt)) {
1912 ip6_del_rt(rt);
1913 dst = NULL;
1914 }
1915 } else {
1da177e4 1916 dst_release(dst);
54c1a859
YH
1917 dst = NULL;
1918 }
1da177e4 1919 }
54c1a859 1920 return dst;
1da177e4
LT
1921}
1922
1923static void ip6_link_failure(struct sk_buff *skb)
1924{
1925 struct rt6_info *rt;
1926
3ffe533c 1927 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1928
adf30907 1929 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1930 if (rt) {
1eb4f758 1931 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0
WW
1932 if (dst_hold_safe(&rt->dst))
1933 ip6_del_rt(rt);
c5cff856
WW
1934 } else {
1935 struct fib6_node *fn;
1936
1937 rcu_read_lock();
1938 fn = rcu_dereference(rt->rt6i_node);
1939 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
1940 fn->fn_sernum = -1;
1941 rcu_read_unlock();
1eb4f758 1942 }
1da177e4
LT
1943 }
1944}
1945
45e4fd26
MKL
1946static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1947{
1948 struct net *net = dev_net(rt->dst.dev);
1949
1950 rt->rt6i_flags |= RTF_MODIFIED;
1951 rt->rt6i_pmtu = mtu;
1952 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1953}
1954
0d3f6d29
MKL
1955static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1956{
1957 return !(rt->rt6i_flags & RTF_CACHE) &&
4e587ea7
WW
1958 (rt->rt6i_flags & RTF_PCPU ||
1959 rcu_access_pointer(rt->rt6i_node));
0d3f6d29
MKL
1960}
1961
45e4fd26
MKL
1962static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1963 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1964{
0dec879f 1965 const struct in6_addr *daddr, *saddr;
67ba4152 1966 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1967
45e4fd26
MKL
1968 if (rt6->rt6i_flags & RTF_LOCAL)
1969 return;
81aded24 1970
19bda36c
XL
1971 if (dst_metric_locked(dst, RTAX_MTU))
1972 return;
1973
0dec879f
JA
1974 if (iph) {
1975 daddr = &iph->daddr;
1976 saddr = &iph->saddr;
1977 } else if (sk) {
1978 daddr = &sk->sk_v6_daddr;
1979 saddr = &inet6_sk(sk)->saddr;
1980 } else {
1981 daddr = NULL;
1982 saddr = NULL;
1983 }
1984 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
1985 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1986 if (mtu >= dst_mtu(dst))
1987 return;
9d289715 1988
0d3f6d29 1989 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 1990 rt6_do_update_pmtu(rt6, mtu);
0dec879f 1991 } else if (daddr) {
45e4fd26
MKL
1992 struct rt6_info *nrt6;
1993
45e4fd26
MKL
1994 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1995 if (nrt6) {
1996 rt6_do_update_pmtu(nrt6, mtu);
1997
1998 /* ip6_ins_rt(nrt6) will bump the
1999 * rt6->rt6i_node->fn_sernum
2000 * which will fail the next rt6_check() and
2001 * invalidate the sk->sk_dst_cache.
2002 */
2003 ip6_ins_rt(nrt6);
1cfb71ee
WW
2004 /* Release the reference taken in
2005 * ip6_rt_cache_alloc()
2006 */
2007 dst_release(&nrt6->dst);
45e4fd26 2008 }
1da177e4
LT
2009 }
2010}
2011
45e4fd26
MKL
2012static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2013 struct sk_buff *skb, u32 mtu)
2014{
2015 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2016}
2017
42ae66c8 2018void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2019 int oif, u32 mark, kuid_t uid)
81aded24
DM
2020{
2021 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2022 struct dst_entry *dst;
2023 struct flowi6 fl6;
2024
2025 memset(&fl6, 0, sizeof(fl6));
2026 fl6.flowi6_oif = oif;
1b3c61dc 2027 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2028 fl6.daddr = iph->daddr;
2029 fl6.saddr = iph->saddr;
6502ca52 2030 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2031 fl6.flowi6_uid = uid;
81aded24
DM
2032
2033 dst = ip6_route_output(net, NULL, &fl6);
2034 if (!dst->error)
45e4fd26 2035 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2036 dst_release(dst);
2037}
2038EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2039
2040void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2041{
33c162a9
MKL
2042 struct dst_entry *dst;
2043
81aded24 2044 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2045 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2046
2047 dst = __sk_dst_get(sk);
2048 if (!dst || !dst->obsolete ||
2049 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2050 return;
2051
2052 bh_lock_sock(sk);
2053 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2054 ip6_datagram_dst_update(sk, false);
2055 bh_unlock_sock(sk);
81aded24
DM
2056}
2057EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2058
b55b76b2
DJ
2059/* Handle redirects */
2060struct ip6rd_flowi {
2061 struct flowi6 fl6;
2062 struct in6_addr gateway;
2063};
2064
2065static struct rt6_info *__ip6_route_redirect(struct net *net,
2066 struct fib6_table *table,
2067 struct flowi6 *fl6,
2068 int flags)
2069{
2070 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2071 struct rt6_info *rt;
2072 struct fib6_node *fn;
2073
2074 /* Get the "current" route for this destination and
67c408cf 2075 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2076 *
2077 * RFC 4861 specifies that redirects should only be
2078 * accepted if they come from the nexthop to the target.
2079 * Due to the way the routes are chosen, this notion
2080 * is a bit fuzzy and one might need to check all possible
2081 * routes.
2082 */
2083
2084 read_lock_bh(&table->tb6_lock);
2085 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2086restart:
2087 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2088 if (rt6_check_expired(rt))
2089 continue;
2090 if (rt->dst.error)
2091 break;
2092 if (!(rt->rt6i_flags & RTF_GATEWAY))
2093 continue;
2094 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
2095 continue;
2096 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
2097 continue;
2098 break;
2099 }
2100
2101 if (!rt)
2102 rt = net->ipv6.ip6_null_entry;
2103 else if (rt->dst.error) {
2104 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2105 goto out;
2106 }
2107
2108 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
2109 fn = fib6_backtrack(fn, &fl6->saddr);
2110 if (fn)
2111 goto restart;
b55b76b2 2112 }
a3c00e46 2113
b0a1ba59 2114out:
b55b76b2
DJ
2115 dst_hold(&rt->dst);
2116
2117 read_unlock_bh(&table->tb6_lock);
2118
b811580d 2119 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
b55b76b2
DJ
2120 return rt;
2121};
2122
2123static struct dst_entry *ip6_route_redirect(struct net *net,
2124 const struct flowi6 *fl6,
2125 const struct in6_addr *gateway)
2126{
2127 int flags = RT6_LOOKUP_F_HAS_SADDR;
2128 struct ip6rd_flowi rdfl;
2129
2130 rdfl.fl6 = *fl6;
2131 rdfl.gateway = *gateway;
2132
2133 return fib6_rule_lookup(net, &rdfl.fl6,
2134 flags, __ip6_route_redirect);
2135}
2136
e2d118a1
LC
2137void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2138 kuid_t uid)
3a5ad2ee
DM
2139{
2140 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2141 struct dst_entry *dst;
2142 struct flowi6 fl6;
2143
2144 memset(&fl6, 0, sizeof(fl6));
e374c618 2145 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2146 fl6.flowi6_oif = oif;
2147 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2148 fl6.daddr = iph->daddr;
2149 fl6.saddr = iph->saddr;
6502ca52 2150 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2151 fl6.flowi6_uid = uid;
3a5ad2ee 2152
b55b76b2
DJ
2153 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
2154 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2155 dst_release(dst);
2156}
2157EXPORT_SYMBOL_GPL(ip6_redirect);
2158
c92a59ec
DJ
2159void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2160 u32 mark)
2161{
2162 const struct ipv6hdr *iph = ipv6_hdr(skb);
2163 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2164 struct dst_entry *dst;
2165 struct flowi6 fl6;
2166
2167 memset(&fl6, 0, sizeof(fl6));
e374c618 2168 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2169 fl6.flowi6_oif = oif;
2170 fl6.flowi6_mark = mark;
c92a59ec
DJ
2171 fl6.daddr = msg->dest;
2172 fl6.saddr = iph->daddr;
e2d118a1 2173 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2174
b55b76b2
DJ
2175 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
2176 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2177 dst_release(dst);
2178}
2179
3a5ad2ee
DM
2180void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2181{
e2d118a1
LC
2182 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2183 sk->sk_uid);
3a5ad2ee
DM
2184}
2185EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2186
0dbaee3b 2187static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2188{
0dbaee3b
DM
2189 struct net_device *dev = dst->dev;
2190 unsigned int mtu = dst_mtu(dst);
2191 struct net *net = dev_net(dev);
2192
1da177e4
LT
2193 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2194
5578689a
DL
2195 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2196 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2197
2198 /*
1ab1457c
YH
2199 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2200 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2201 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2202 * rely only on pmtu discovery"
2203 */
2204 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2205 mtu = IPV6_MAXPLEN;
2206 return mtu;
2207}
2208
ebb762f2 2209static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2210{
4b32b5ad
MKL
2211 const struct rt6_info *rt = (const struct rt6_info *)dst;
2212 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 2213 struct inet6_dev *idev;
618f9bc7 2214
4b32b5ad
MKL
2215 if (mtu)
2216 goto out;
2217
2218 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2219 if (mtu)
30f78d8e 2220 goto out;
618f9bc7
SK
2221
2222 mtu = IPV6_MIN_MTU;
d33e4553
DM
2223
2224 rcu_read_lock();
2225 idev = __in6_dev_get(dst->dev);
2226 if (idev)
2227 mtu = idev->cnf.mtu6;
2228 rcu_read_unlock();
2229
30f78d8e 2230out:
14972cbd
RP
2231 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2232
2233 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2234}
2235
3b00944c 2236struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2237 struct flowi6 *fl6)
1da177e4 2238{
87a11578 2239 struct dst_entry *dst;
1da177e4
LT
2240 struct rt6_info *rt;
2241 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2242 struct net *net = dev_net(dev);
1da177e4 2243
38308473 2244 if (unlikely(!idev))
122bdf67 2245 return ERR_PTR(-ENODEV);
1da177e4 2246
ad706862 2247 rt = ip6_dst_alloc(net, dev, 0);
38308473 2248 if (unlikely(!rt)) {
1da177e4 2249 in6_dev_put(idev);
87a11578 2250 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2251 goto out;
2252 }
2253
8e2ec639
YZ
2254 rt->dst.flags |= DST_HOST;
2255 rt->dst.output = ip6_output;
550bab42 2256 rt->rt6i_gateway = fl6->daddr;
87a11578 2257 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2258 rt->rt6i_dst.plen = 128;
2259 rt->rt6i_idev = idev;
14edd87d 2260 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2261
587fea74
WW
2262 /* Add this dst into uncached_list so that rt6_ifdown() can
2263 * do proper release of the net_device
2264 */
2265 rt6_uncached_list_add(rt);
1da177e4 2266
87a11578
DM
2267 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2268
1da177e4 2269out:
87a11578 2270 return dst;
1da177e4
LT
2271}
2272
569d3645 2273static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2274{
86393e52 2275 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2276 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2277 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2278 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2279 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2280 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2281 int entries;
7019b78e 2282
fc66f95c 2283 entries = dst_entries_get_fast(ops);
49a18d86 2284 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2285 entries <= rt_max_size)
1da177e4
LT
2286 goto out;
2287
6891a346 2288 net->ipv6.ip6_rt_gc_expire++;
14956643 2289 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2290 entries = dst_entries_get_slow(ops);
2291 if (entries < ops->gc_thresh)
7019b78e 2292 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2293out:
7019b78e 2294 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2295 return entries > rt_max_size;
1da177e4
LT
2296}
2297
e715b6d3
FW
2298static int ip6_convert_metrics(struct mx6_config *mxc,
2299 const struct fib6_config *cfg)
2300{
c3a8d947 2301 bool ecn_ca = false;
e715b6d3
FW
2302 struct nlattr *nla;
2303 int remaining;
2304 u32 *mp;
2305
63159f29 2306 if (!cfg->fc_mx)
e715b6d3
FW
2307 return 0;
2308
2309 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
2310 if (unlikely(!mp))
2311 return -ENOMEM;
2312
2313 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
2314 int type = nla_type(nla);
1bb14807 2315 u32 val;
e715b6d3 2316
1bb14807
DB
2317 if (!type)
2318 continue;
2319 if (unlikely(type > RTAX_MAX))
2320 goto err;
ea697639 2321
1bb14807
DB
2322 if (type == RTAX_CC_ALGO) {
2323 char tmp[TCP_CA_NAME_MAX];
e715b6d3 2324
1bb14807 2325 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 2326 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
2327 if (val == TCP_CA_UNSPEC)
2328 goto err;
2329 } else {
2330 val = nla_get_u32(nla);
e715b6d3 2331 }
626abd59
PA
2332 if (type == RTAX_HOPLIMIT && val > 255)
2333 val = 255;
b8d3e416
DB
2334 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
2335 goto err;
1bb14807
DB
2336
2337 mp[type - 1] = val;
2338 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
2339 }
2340
c3a8d947
DB
2341 if (ecn_ca) {
2342 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
2343 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
2344 }
e715b6d3 2345
c3a8d947 2346 mxc->mx = mp;
e715b6d3
FW
2347 return 0;
2348 err:
2349 kfree(mp);
2350 return -EINVAL;
2351}
1da177e4 2352
8c14586f
DA
2353static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2354 struct fib6_config *cfg,
2355 const struct in6_addr *gw_addr)
2356{
2357 struct flowi6 fl6 = {
2358 .flowi6_oif = cfg->fc_ifindex,
2359 .daddr = *gw_addr,
2360 .saddr = cfg->fc_prefsrc,
2361 };
2362 struct fib6_table *table;
2363 struct rt6_info *rt;
d5d32e4b 2364 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
8c14586f
DA
2365
2366 table = fib6_get_table(net, cfg->fc_table);
2367 if (!table)
2368 return NULL;
2369
2370 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2371 flags |= RT6_LOOKUP_F_HAS_SADDR;
2372
2373 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
2374
2375 /* if table lookup failed, fall back to full lookup */
2376 if (rt == net->ipv6.ip6_null_entry) {
2377 ip6_rt_put(rt);
2378 rt = NULL;
2379 }
2380
2381 return rt;
2382}
2383
333c4301
DA
2384static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2385 struct netlink_ext_ack *extack)
1da177e4 2386{
5578689a 2387 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
2388 struct rt6_info *rt = NULL;
2389 struct net_device *dev = NULL;
2390 struct inet6_dev *idev = NULL;
c71099ac 2391 struct fib6_table *table;
1da177e4 2392 int addr_type;
8c5b83f0 2393 int err = -EINVAL;
1da177e4 2394
557c44be 2395 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2396 if (cfg->fc_flags & RTF_PCPU) {
2397 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2398 goto out;
d5d531cb 2399 }
557c44be 2400
d5d531cb
DA
2401 if (cfg->fc_dst_len > 128) {
2402 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2403 goto out;
2404 }
2405 if (cfg->fc_src_len > 128) {
2406 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2407 goto out;
d5d531cb 2408 }
1da177e4 2409#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2410 if (cfg->fc_src_len) {
2411 NL_SET_ERR_MSG(extack,
2412 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2413 goto out;
d5d531cb 2414 }
1da177e4 2415#endif
86872cb5 2416 if (cfg->fc_ifindex) {
1da177e4 2417 err = -ENODEV;
5578689a 2418 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2419 if (!dev)
2420 goto out;
2421 idev = in6_dev_get(dev);
2422 if (!idev)
2423 goto out;
2424 }
2425
86872cb5
TG
2426 if (cfg->fc_metric == 0)
2427 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2428
d71314b4 2429 err = -ENOBUFS;
38308473
DM
2430 if (cfg->fc_nlinfo.nlh &&
2431 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2432 table = fib6_get_table(net, cfg->fc_table);
38308473 2433 if (!table) {
f3213831 2434 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2435 table = fib6_new_table(net, cfg->fc_table);
2436 }
2437 } else {
2438 table = fib6_new_table(net, cfg->fc_table);
2439 }
38308473
DM
2440
2441 if (!table)
c71099ac 2442 goto out;
c71099ac 2443
ad706862
MKL
2444 rt = ip6_dst_alloc(net, NULL,
2445 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 2446
38308473 2447 if (!rt) {
1da177e4
LT
2448 err = -ENOMEM;
2449 goto out;
2450 }
2451
1716a961
G
2452 if (cfg->fc_flags & RTF_EXPIRES)
2453 rt6_set_expires(rt, jiffies +
2454 clock_t_to_jiffies(cfg->fc_expires));
2455 else
2456 rt6_clean_expires(rt);
1da177e4 2457
86872cb5
TG
2458 if (cfg->fc_protocol == RTPROT_UNSPEC)
2459 cfg->fc_protocol = RTPROT_BOOT;
2460 rt->rt6i_protocol = cfg->fc_protocol;
2461
2462 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
2463
2464 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 2465 rt->dst.input = ip6_mc_input;
ab79ad14
2466 else if (cfg->fc_flags & RTF_LOCAL)
2467 rt->dst.input = ip6_input;
1da177e4 2468 else
d8d1f30b 2469 rt->dst.input = ip6_forward;
1da177e4 2470
d8d1f30b 2471 rt->dst.output = ip6_output;
1da177e4 2472
19e42e45
RP
2473 if (cfg->fc_encap) {
2474 struct lwtunnel_state *lwtstate;
2475
30357d7d 2476 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2477 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2478 &lwtstate, extack);
19e42e45
RP
2479 if (err)
2480 goto out;
61adedf3
JB
2481 rt->dst.lwtstate = lwtstate_get(lwtstate);
2482 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
2483 rt->dst.lwtstate->orig_output = rt->dst.output;
2484 rt->dst.output = lwtunnel_output;
25368623 2485 }
61adedf3
JB
2486 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
2487 rt->dst.lwtstate->orig_input = rt->dst.input;
2488 rt->dst.input = lwtunnel_input;
25368623 2489 }
19e42e45
RP
2490 }
2491
86872cb5
TG
2492 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2493 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 2494 if (rt->rt6i_dst.plen == 128)
e5fd387a 2495 rt->dst.flags |= DST_HOST;
e5fd387a 2496
1da177e4 2497#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
2498 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2499 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
2500#endif
2501
86872cb5 2502 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
2503
2504 /* We cannot add true routes via loopback here,
2505 they would result in kernel looping; promote them to reject routes
2506 */
86872cb5 2507 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
2508 (dev && (dev->flags & IFF_LOOPBACK) &&
2509 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2510 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 2511 /* hold loopback dev/idev if we haven't done so. */
5578689a 2512 if (dev != net->loopback_dev) {
1da177e4
LT
2513 if (dev) {
2514 dev_put(dev);
2515 in6_dev_put(idev);
2516 }
5578689a 2517 dev = net->loopback_dev;
1da177e4
LT
2518 dev_hold(dev);
2519 idev = in6_dev_get(dev);
2520 if (!idev) {
2521 err = -ENODEV;
2522 goto out;
2523 }
2524 }
1da177e4 2525 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
2526 switch (cfg->fc_type) {
2527 case RTN_BLACKHOLE:
2528 rt->dst.error = -EINVAL;
ede2059d 2529 rt->dst.output = dst_discard_out;
7150aede 2530 rt->dst.input = dst_discard;
ef2c7d7b
ND
2531 break;
2532 case RTN_PROHIBIT:
2533 rt->dst.error = -EACCES;
7150aede
K
2534 rt->dst.output = ip6_pkt_prohibit_out;
2535 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 2536 break;
b4949ab2 2537 case RTN_THROW:
0315e382 2538 case RTN_UNREACHABLE:
ef2c7d7b 2539 default:
7150aede 2540 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
2541 : (cfg->fc_type == RTN_UNREACHABLE)
2542 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
2543 rt->dst.output = ip6_pkt_discard_out;
2544 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
2545 break;
2546 }
1da177e4
LT
2547 goto install_route;
2548 }
2549
86872cb5 2550 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 2551 const struct in6_addr *gw_addr;
1da177e4
LT
2552 int gwa_type;
2553
86872cb5 2554 gw_addr = &cfg->fc_gateway;
330567b7 2555 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
2556
2557 /* if gw_addr is local we will fail to detect this in case
2558 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2559 * will return already-added prefix route via interface that
2560 * prefix route was assigned to, which might be non-loopback.
2561 */
2562 err = -EINVAL;
330567b7
FW
2563 if (ipv6_chk_addr_and_flags(net, gw_addr,
2564 gwa_type & IPV6_ADDR_LINKLOCAL ?
d5d531cb
DA
2565 dev : NULL, 0, 0)) {
2566 NL_SET_ERR_MSG(extack, "Invalid gateway address");
48ed7b26 2567 goto out;
d5d531cb 2568 }
4e3fd7a0 2569 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
2570
2571 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
8c14586f 2572 struct rt6_info *grt = NULL;
1da177e4
LT
2573
2574 /* IPv6 strictly inhibits using not link-local
2575 addresses as nexthop address.
2576 Otherwise, router will not able to send redirects.
2577 It is very good, but in some (rare!) circumstances
2578 (SIT, PtP, NBMA NOARP links) it is handy to allow
2579 some exceptions. --ANK
96d5822c
EN
2580 We allow IPv4-mapped nexthops to support RFC4798-type
2581 addressing
1da177e4 2582 */
96d5822c 2583 if (!(gwa_type & (IPV6_ADDR_UNICAST |
d5d531cb
DA
2584 IPV6_ADDR_MAPPED))) {
2585 NL_SET_ERR_MSG(extack,
2586 "Invalid gateway address");
1da177e4 2587 goto out;
d5d531cb 2588 }
1da177e4 2589
a435a07f 2590 if (cfg->fc_table) {
8c14586f
DA
2591 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2592
a435a07f
VB
2593 if (grt) {
2594 if (grt->rt6i_flags & RTF_GATEWAY ||
2595 (dev && dev != grt->dst.dev)) {
2596 ip6_rt_put(grt);
2597 grt = NULL;
2598 }
2599 }
2600 }
2601
8c14586f
DA
2602 if (!grt)
2603 grt = rt6_lookup(net, gw_addr, NULL,
2604 cfg->fc_ifindex, 1);
1da177e4
LT
2605
2606 err = -EHOSTUNREACH;
38308473 2607 if (!grt)
1da177e4
LT
2608 goto out;
2609 if (dev) {
d1918542 2610 if (dev != grt->dst.dev) {
94e187c0 2611 ip6_rt_put(grt);
1da177e4
LT
2612 goto out;
2613 }
2614 } else {
d1918542 2615 dev = grt->dst.dev;
1da177e4
LT
2616 idev = grt->rt6i_idev;
2617 dev_hold(dev);
2618 in6_dev_hold(grt->rt6i_idev);
2619 }
38308473 2620 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 2621 err = 0;
94e187c0 2622 ip6_rt_put(grt);
1da177e4
LT
2623
2624 if (err)
2625 goto out;
2626 }
2627 err = -EINVAL;
d5d531cb
DA
2628 if (!dev) {
2629 NL_SET_ERR_MSG(extack, "Egress device not specified");
2630 goto out;
2631 } else if (dev->flags & IFF_LOOPBACK) {
2632 NL_SET_ERR_MSG(extack,
2633 "Egress device can not be loopback device for this route");
1da177e4 2634 goto out;
d5d531cb 2635 }
1da177e4
LT
2636 }
2637
2638 err = -ENODEV;
38308473 2639 if (!dev)
1da177e4
LT
2640 goto out;
2641
c3968a85
DW
2642 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2643 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 2644 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
2645 err = -EINVAL;
2646 goto out;
2647 }
4e3fd7a0 2648 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2649 rt->rt6i_prefsrc.plen = 128;
2650 } else
2651 rt->rt6i_prefsrc.plen = 0;
2652
86872cb5 2653 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2654
2655install_route:
d8d1f30b 2656 rt->dst.dev = dev;
1da177e4 2657 rt->rt6i_idev = idev;
c71099ac 2658 rt->rt6i_table = table;
63152fc0 2659
c346dca1 2660 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2661
8c5b83f0 2662 return rt;
6b9ea5a6
RP
2663out:
2664 if (dev)
2665 dev_put(dev);
2666 if (idev)
2667 in6_dev_put(idev);
587fea74
WW
2668 if (rt)
2669 dst_release_immediate(&rt->dst);
6b9ea5a6 2670
8c5b83f0 2671 return ERR_PTR(err);
6b9ea5a6
RP
2672}
2673
333c4301
DA
2674int ip6_route_add(struct fib6_config *cfg,
2675 struct netlink_ext_ack *extack)
6b9ea5a6
RP
2676{
2677 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2678 struct rt6_info *rt;
6b9ea5a6
RP
2679 int err;
2680
333c4301 2681 rt = ip6_route_info_create(cfg, extack);
8c5b83f0
RP
2682 if (IS_ERR(rt)) {
2683 err = PTR_ERR(rt);
2684 rt = NULL;
6b9ea5a6 2685 goto out;
8c5b83f0 2686 }
6b9ea5a6 2687
e715b6d3
FW
2688 err = ip6_convert_metrics(&mxc, cfg);
2689 if (err)
2690 goto out;
1da177e4 2691
333c4301 2692 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
e715b6d3
FW
2693
2694 kfree(mxc.mx);
6b9ea5a6 2695
e715b6d3 2696 return err;
1da177e4 2697out:
587fea74
WW
2698 if (rt)
2699 dst_release_immediate(&rt->dst);
6b9ea5a6 2700
1da177e4
LT
2701 return err;
2702}
2703
86872cb5 2704static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2705{
2706 int err;
c71099ac 2707 struct fib6_table *table;
d1918542 2708 struct net *net = dev_net(rt->dst.dev);
1da177e4 2709
a4c2fd7f 2710 if (rt == net->ipv6.ip6_null_entry) {
6825a26c
G
2711 err = -ENOENT;
2712 goto out;
2713 }
6c813a72 2714
c71099ac
TG
2715 table = rt->rt6i_table;
2716 write_lock_bh(&table->tb6_lock);
86872cb5 2717 err = fib6_del(rt, info);
c71099ac 2718 write_unlock_bh(&table->tb6_lock);
1da177e4 2719
6825a26c 2720out:
94e187c0 2721 ip6_rt_put(rt);
1da177e4
LT
2722 return err;
2723}
2724
e0a1ad73
TG
2725int ip6_del_rt(struct rt6_info *rt)
2726{
4d1169c1 2727 struct nl_info info = {
d1918542 2728 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2729 };
528c4ceb 2730 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2731}
2732
0ae81335
DA
2733static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2734{
2735 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 2736 struct net *net = info->nl_net;
16a16cd3 2737 struct sk_buff *skb = NULL;
0ae81335 2738 struct fib6_table *table;
e3330039 2739 int err = -ENOENT;
0ae81335 2740
e3330039
WC
2741 if (rt == net->ipv6.ip6_null_entry)
2742 goto out_put;
0ae81335
DA
2743 table = rt->rt6i_table;
2744 write_lock_bh(&table->tb6_lock);
2745
2746 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2747 struct rt6_info *sibling, *next_sibling;
2748
16a16cd3
DA
2749 /* prefer to send a single notification with all hops */
2750 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2751 if (skb) {
2752 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2753
e3330039 2754 if (rt6_fill_node(net, skb, rt,
16a16cd3
DA
2755 NULL, NULL, 0, RTM_DELROUTE,
2756 info->portid, seq, 0) < 0) {
2757 kfree_skb(skb);
2758 skb = NULL;
2759 } else
2760 info->skip_notify = 1;
2761 }
2762
0ae81335
DA
2763 list_for_each_entry_safe(sibling, next_sibling,
2764 &rt->rt6i_siblings,
2765 rt6i_siblings) {
2766 err = fib6_del(sibling, info);
2767 if (err)
e3330039 2768 goto out_unlock;
0ae81335
DA
2769 }
2770 }
2771
2772 err = fib6_del(rt, info);
e3330039 2773out_unlock:
0ae81335 2774 write_unlock_bh(&table->tb6_lock);
e3330039 2775out_put:
0ae81335 2776 ip6_rt_put(rt);
16a16cd3
DA
2777
2778 if (skb) {
e3330039 2779 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
2780 info->nlh, gfp_any());
2781 }
0ae81335
DA
2782 return err;
2783}
2784
333c4301
DA
2785static int ip6_route_del(struct fib6_config *cfg,
2786 struct netlink_ext_ack *extack)
1da177e4 2787{
c71099ac 2788 struct fib6_table *table;
1da177e4
LT
2789 struct fib6_node *fn;
2790 struct rt6_info *rt;
2791 int err = -ESRCH;
2792
5578689a 2793 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
2794 if (!table) {
2795 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 2796 return err;
d5d531cb 2797 }
c71099ac
TG
2798
2799 read_lock_bh(&table->tb6_lock);
1da177e4 2800
c71099ac 2801 fn = fib6_locate(&table->tb6_root,
86872cb5 2802 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee
WW
2803 &cfg->fc_src, cfg->fc_src_len,
2804 true);
1ab1457c 2805
1da177e4 2806 if (fn) {
d8d1f30b 2807 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2808 if ((rt->rt6i_flags & RTF_CACHE) &&
2809 !(cfg->fc_flags & RTF_CACHE))
2810 continue;
86872cb5 2811 if (cfg->fc_ifindex &&
d1918542
DM
2812 (!rt->dst.dev ||
2813 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2814 continue;
86872cb5
TG
2815 if (cfg->fc_flags & RTF_GATEWAY &&
2816 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2817 continue;
86872cb5 2818 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2819 continue;
c2ed1880
M
2820 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2821 continue;
d8d1f30b 2822 dst_hold(&rt->dst);
c71099ac 2823 read_unlock_bh(&table->tb6_lock);
1da177e4 2824
0ae81335
DA
2825 /* if gateway was specified only delete the one hop */
2826 if (cfg->fc_flags & RTF_GATEWAY)
2827 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2828
2829 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
2830 }
2831 }
c71099ac 2832 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2833
2834 return err;
2835}
2836
6700c270 2837static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2838{
a6279458 2839 struct netevent_redirect netevent;
e8599ff4 2840 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2841 struct ndisc_options ndopts;
2842 struct inet6_dev *in6_dev;
2843 struct neighbour *neigh;
71bcdba0 2844 struct rd_msg *msg;
6e157b6a
DM
2845 int optlen, on_link;
2846 u8 *lladdr;
e8599ff4 2847
29a3cad5 2848 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2849 optlen -= sizeof(*msg);
e8599ff4
DM
2850
2851 if (optlen < 0) {
6e157b6a 2852 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2853 return;
2854 }
2855
71bcdba0 2856 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2857
71bcdba0 2858 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2859 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2860 return;
2861 }
2862
6e157b6a 2863 on_link = 0;
71bcdba0 2864 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2865 on_link = 1;
71bcdba0 2866 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2867 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2868 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2869 return;
2870 }
2871
2872 in6_dev = __in6_dev_get(skb->dev);
2873 if (!in6_dev)
2874 return;
2875 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2876 return;
2877
2878 /* RFC2461 8.1:
2879 * The IP source address of the Redirect MUST be the same as the current
2880 * first-hop router for the specified ICMP Destination Address.
2881 */
2882
f997c55c 2883 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2884 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2885 return;
2886 }
6e157b6a
DM
2887
2888 lladdr = NULL;
e8599ff4
DM
2889 if (ndopts.nd_opts_tgt_lladdr) {
2890 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2891 skb->dev);
2892 if (!lladdr) {
2893 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2894 return;
2895 }
2896 }
2897
6e157b6a 2898 rt = (struct rt6_info *) dst;
ec13ad1d 2899 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 2900 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2901 return;
6e157b6a 2902 }
e8599ff4 2903
6e157b6a
DM
2904 /* Redirect received -> path was valid.
2905 * Look, redirects are sent only in response to data packets,
2906 * so that this nexthop apparently is reachable. --ANK
2907 */
0dec879f 2908 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 2909
71bcdba0 2910 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2911 if (!neigh)
2912 return;
a6279458 2913
1da177e4
LT
2914 /*
2915 * We have finally decided to accept it.
2916 */
2917
f997c55c 2918 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
2919 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2920 NEIGH_UPDATE_F_OVERRIDE|
2921 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
2922 NEIGH_UPDATE_F_ISROUTER)),
2923 NDISC_REDIRECT, &ndopts);
1da177e4 2924
83a09abd 2925 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2926 if (!nrt)
1da177e4
LT
2927 goto out;
2928
2929 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2930 if (on_link)
2931 nrt->rt6i_flags &= ~RTF_GATEWAY;
2932
b91d5329 2933 nrt->rt6i_protocol = RTPROT_REDIRECT;
4e3fd7a0 2934 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2935
40e22e8f 2936 if (ip6_ins_rt(nrt))
1cfb71ee 2937 goto out_release;
1da177e4 2938
d8d1f30b
CG
2939 netevent.old = &rt->dst;
2940 netevent.new = &nrt->dst;
71bcdba0 2941 netevent.daddr = &msg->dest;
60592833 2942 netevent.neigh = neigh;
8d71740c
TT
2943 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2944
38308473 2945 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2946 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2947 ip6_del_rt(rt);
1da177e4
LT
2948 }
2949
1cfb71ee
WW
2950out_release:
2951 /* Release the reference taken in
2952 * ip6_rt_cache_alloc()
2953 */
2954 dst_release(&nrt->dst);
2955
1da177e4 2956out:
e8599ff4 2957 neigh_release(neigh);
6e157b6a
DM
2958}
2959
1da177e4
LT
2960/*
2961 * Misc support functions
2962 */
2963
4b32b5ad
MKL
2964static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2965{
2966 BUG_ON(from->dst.from);
2967
2968 rt->rt6i_flags &= ~RTF_EXPIRES;
2969 dst_hold(&from->dst);
2970 rt->dst.from = &from->dst;
2971 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2972}
2973
83a09abd
MKL
2974static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2975{
2976 rt->dst.input = ort->dst.input;
2977 rt->dst.output = ort->dst.output;
2978 rt->rt6i_dst = ort->rt6i_dst;
2979 rt->dst.error = ort->dst.error;
2980 rt->rt6i_idev = ort->rt6i_idev;
2981 if (rt->rt6i_idev)
2982 in6_dev_hold(rt->rt6i_idev);
2983 rt->dst.lastuse = jiffies;
2984 rt->rt6i_gateway = ort->rt6i_gateway;
2985 rt->rt6i_flags = ort->rt6i_flags;
2986 rt6_set_from(rt, ort);
2987 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2988#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2989 rt->rt6i_src = ort->rt6i_src;
1da177e4 2990#endif
83a09abd
MKL
2991 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2992 rt->rt6i_table = ort->rt6i_table;
61adedf3 2993 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2994}
2995
70ceb4f5 2996#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2997static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 2998 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2999 const struct in6_addr *gwaddr,
3000 struct net_device *dev)
70ceb4f5 3001{
830218c1
DA
3002 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3003 int ifindex = dev->ifindex;
70ceb4f5
YH
3004 struct fib6_node *fn;
3005 struct rt6_info *rt = NULL;
c71099ac
TG
3006 struct fib6_table *table;
3007
830218c1 3008 table = fib6_get_table(net, tb_id);
38308473 3009 if (!table)
c71099ac 3010 return NULL;
70ceb4f5 3011
5744dd9b 3012 read_lock_bh(&table->tb6_lock);
38fbeeee 3013 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3014 if (!fn)
3015 goto out;
3016
d8d1f30b 3017 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 3018 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
3019 continue;
3020 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3021 continue;
3022 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
3023 continue;
d8d1f30b 3024 dst_hold(&rt->dst);
70ceb4f5
YH
3025 break;
3026 }
3027out:
5744dd9b 3028 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
3029 return rt;
3030}
3031
efa2cea0 3032static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 3033 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3034 const struct in6_addr *gwaddr,
3035 struct net_device *dev,
95c96174 3036 unsigned int pref)
70ceb4f5 3037{
86872cb5 3038 struct fib6_config cfg = {
238fc7ea 3039 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3040 .fc_ifindex = dev->ifindex,
86872cb5
TG
3041 .fc_dst_len = prefixlen,
3042 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3043 RTF_UP | RTF_PREF(pref),
b91d5329 3044 .fc_protocol = RTPROT_RA,
15e47304 3045 .fc_nlinfo.portid = 0,
efa2cea0
DL
3046 .fc_nlinfo.nlh = NULL,
3047 .fc_nlinfo.nl_net = net,
86872cb5
TG
3048 };
3049
830218c1 3050 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3051 cfg.fc_dst = *prefix;
3052 cfg.fc_gateway = *gwaddr;
70ceb4f5 3053
e317da96
YH
3054 /* We should treat it as a default route if prefix length is 0. */
3055 if (!prefixlen)
86872cb5 3056 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3057
333c4301 3058 ip6_route_add(&cfg, NULL);
70ceb4f5 3059
830218c1 3060 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3061}
3062#endif
3063
b71d1d42 3064struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 3065{
830218c1 3066 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 3067 struct rt6_info *rt;
c71099ac 3068 struct fib6_table *table;
1da177e4 3069
830218c1 3070 table = fib6_get_table(dev_net(dev), tb_id);
38308473 3071 if (!table)
c71099ac 3072 return NULL;
1da177e4 3073
5744dd9b 3074 read_lock_bh(&table->tb6_lock);
67ba4152 3075 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 3076 if (dev == rt->dst.dev &&
045927ff 3077 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
3078 ipv6_addr_equal(&rt->rt6i_gateway, addr))
3079 break;
3080 }
3081 if (rt)
d8d1f30b 3082 dst_hold(&rt->dst);
5744dd9b 3083 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
3084 return rt;
3085}
3086
b71d1d42 3087struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
3088 struct net_device *dev,
3089 unsigned int pref)
1da177e4 3090{
86872cb5 3091 struct fib6_config cfg = {
ca254490 3092 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3093 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3094 .fc_ifindex = dev->ifindex,
3095 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3096 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3097 .fc_protocol = RTPROT_RA,
15e47304 3098 .fc_nlinfo.portid = 0,
5578689a 3099 .fc_nlinfo.nlh = NULL,
c346dca1 3100 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 3101 };
1da177e4 3102
4e3fd7a0 3103 cfg.fc_gateway = *gwaddr;
1da177e4 3104
333c4301 3105 if (!ip6_route_add(&cfg, NULL)) {
830218c1
DA
3106 struct fib6_table *table;
3107
3108 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3109 if (table)
3110 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3111 }
1da177e4 3112
1da177e4
LT
3113 return rt6_get_dflt_router(gwaddr, dev);
3114}
3115
830218c1 3116static void __rt6_purge_dflt_routers(struct fib6_table *table)
1da177e4
LT
3117{
3118 struct rt6_info *rt;
3119
3120restart:
c71099ac 3121 read_lock_bh(&table->tb6_lock);
d8d1f30b 3122 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
3123 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3124 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 3125 dst_hold(&rt->dst);
c71099ac 3126 read_unlock_bh(&table->tb6_lock);
e0a1ad73 3127 ip6_del_rt(rt);
1da177e4
LT
3128 goto restart;
3129 }
3130 }
c71099ac 3131 read_unlock_bh(&table->tb6_lock);
830218c1
DA
3132
3133 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3134}
3135
3136void rt6_purge_dflt_routers(struct net *net)
3137{
3138 struct fib6_table *table;
3139 struct hlist_head *head;
3140 unsigned int h;
3141
3142 rcu_read_lock();
3143
3144 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3145 head = &net->ipv6.fib_table_hash[h];
3146 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3147 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3148 __rt6_purge_dflt_routers(table);
3149 }
3150 }
3151
3152 rcu_read_unlock();
1da177e4
LT
3153}
3154
5578689a
DL
3155static void rtmsg_to_fib6_config(struct net *net,
3156 struct in6_rtmsg *rtmsg,
86872cb5
TG
3157 struct fib6_config *cfg)
3158{
3159 memset(cfg, 0, sizeof(*cfg));
3160
ca254490
DA
3161 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3162 : RT6_TABLE_MAIN;
86872cb5
TG
3163 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3164 cfg->fc_metric = rtmsg->rtmsg_metric;
3165 cfg->fc_expires = rtmsg->rtmsg_info;
3166 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3167 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3168 cfg->fc_flags = rtmsg->rtmsg_flags;
3169
5578689a 3170 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3171
4e3fd7a0
AD
3172 cfg->fc_dst = rtmsg->rtmsg_dst;
3173 cfg->fc_src = rtmsg->rtmsg_src;
3174 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3175}
3176
5578689a 3177int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3178{
86872cb5 3179 struct fib6_config cfg;
1da177e4
LT
3180 struct in6_rtmsg rtmsg;
3181 int err;
3182
67ba4152 3183 switch (cmd) {
1da177e4
LT
3184 case SIOCADDRT: /* Add a route */
3185 case SIOCDELRT: /* Delete a route */
af31f412 3186 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3187 return -EPERM;
3188 err = copy_from_user(&rtmsg, arg,
3189 sizeof(struct in6_rtmsg));
3190 if (err)
3191 return -EFAULT;
86872cb5 3192
5578689a 3193 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3194
1da177e4
LT
3195 rtnl_lock();
3196 switch (cmd) {
3197 case SIOCADDRT:
333c4301 3198 err = ip6_route_add(&cfg, NULL);
1da177e4
LT
3199 break;
3200 case SIOCDELRT:
333c4301 3201 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3202 break;
3203 default:
3204 err = -EINVAL;
3205 }
3206 rtnl_unlock();
3207
3208 return err;
3ff50b79 3209 }
1da177e4
LT
3210
3211 return -EINVAL;
3212}
3213
3214/*
3215 * Drop the packet on the floor
3216 */
3217
d5fdd6ba 3218static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3219{
612f09e8 3220 int type;
adf30907 3221 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3222 switch (ipstats_mib_noroutes) {
3223 case IPSTATS_MIB_INNOROUTES:
0660e03f 3224 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3225 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
3226 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3227 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3228 break;
3229 }
3230 /* FALLTHROUGH */
3231 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3232 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3233 ipstats_mib_noroutes);
612f09e8
YH
3234 break;
3235 }
3ffe533c 3236 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3237 kfree_skb(skb);
3238 return 0;
3239}
3240
9ce8ade0
TG
3241static int ip6_pkt_discard(struct sk_buff *skb)
3242{
612f09e8 3243 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3244}
3245
ede2059d 3246static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3247{
adf30907 3248 skb->dev = skb_dst(skb)->dev;
612f09e8 3249 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3250}
3251
9ce8ade0
TG
3252static int ip6_pkt_prohibit(struct sk_buff *skb)
3253{
612f09e8 3254 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3255}
3256
ede2059d 3257static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3258{
adf30907 3259 skb->dev = skb_dst(skb)->dev;
612f09e8 3260 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3261}
3262
1da177e4
LT
3263/*
3264 * Allocate a dst for local (unicast / anycast) address.
3265 */
3266
3267struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
3268 const struct in6_addr *addr,
8f031519 3269 bool anycast)
1da177e4 3270{
ca254490 3271 u32 tb_id;
c346dca1 3272 struct net *net = dev_net(idev->dev);
4832c30d 3273 struct net_device *dev = idev->dev;
5f02ce24
DA
3274 struct rt6_info *rt;
3275
5f02ce24 3276 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 3277 if (!rt)
1da177e4
LT
3278 return ERR_PTR(-ENOMEM);
3279
1da177e4
LT
3280 in6_dev_hold(idev);
3281
11d53b49 3282 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
3283 rt->dst.input = ip6_input;
3284 rt->dst.output = ip6_output;
1da177e4 3285 rt->rt6i_idev = idev;
1da177e4 3286
94b5e0f9 3287 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 3288 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
3289 if (anycast)
3290 rt->rt6i_flags |= RTF_ANYCAST;
3291 else
1da177e4 3292 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 3293
550bab42 3294 rt->rt6i_gateway = *addr;
4e3fd7a0 3295 rt->rt6i_dst.addr = *addr;
1da177e4 3296 rt->rt6i_dst.plen = 128;
ca254490
DA
3297 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3298 rt->rt6i_table = fib6_get_table(net, tb_id);
1da177e4 3299
1da177e4
LT
3300 return rt;
3301}
3302
c3968a85
DW
3303/* remove deleted ip from prefsrc entries */
3304struct arg_dev_net_ip {
3305 struct net_device *dev;
3306 struct net *net;
3307 struct in6_addr *addr;
3308};
3309
3310static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3311{
3312 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3313 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3314 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3315
d1918542 3316 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
3317 rt != net->ipv6.ip6_null_entry &&
3318 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
60006a48 3319 spin_lock_bh(&rt6_exception_lock);
c3968a85
DW
3320 /* remove prefsrc entry */
3321 rt->rt6i_prefsrc.plen = 0;
60006a48
WW
3322 /* need to update cache as well */
3323 rt6_exceptions_remove_prefsrc(rt);
3324 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3325 }
3326 return 0;
3327}
3328
3329void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3330{
3331 struct net *net = dev_net(ifp->idev->dev);
3332 struct arg_dev_net_ip adni = {
3333 .dev = ifp->idev->dev,
3334 .net = net,
3335 .addr = &ifp->addr,
3336 };
0c3584d5 3337 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3338}
3339
be7a010d 3340#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3341
3342/* Remove routers and update dst entries when gateway turn into host. */
3343static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3344{
3345 struct in6_addr *gateway = (struct in6_addr *)arg;
3346
b16cb459
WW
3347 /* RTF_CACHE_GATEWAY case will be removed once the exception
3348 * table is hooked up to store all cached routes.
3349 */
be7a010d
DJ
3350 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
3351 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
3352 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
3353 return -1;
3354 }
b16cb459
WW
3355
3356 /* Further clean up cached routes in exception table.
3357 * This is needed because cached route may have a different
3358 * gateway than its 'parent' in the case of an ip redirect.
3359 */
3360 rt6_exceptions_clean_tohost(rt, gateway);
3361
be7a010d
DJ
3362 return 0;
3363}
3364
3365void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3366{
3367 fib6_clean_all(net, fib6_clean_tohost, gateway);
3368}
3369
8ed67789
DL
3370struct arg_dev_net {
3371 struct net_device *dev;
3372 struct net *net;
3373};
3374
a1a22c12 3375/* called with write lock held for table with rt */
1da177e4
LT
3376static int fib6_ifdown(struct rt6_info *rt, void *arg)
3377{
bc3ef660 3378 const struct arg_dev_net *adn = arg;
3379 const struct net_device *dev = adn->dev;
8ed67789 3380
d1918542 3381 if ((rt->dst.dev == dev || !dev) &&
a1a22c12
DA
3382 rt != adn->net->ipv6.ip6_null_entry &&
3383 (rt->rt6i_nsiblings == 0 ||
8397ed36 3384 (dev && netdev_unregistering(dev)) ||
a1a22c12 3385 !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
1da177e4 3386 return -1;
c159d30c 3387
1da177e4
LT
3388 return 0;
3389}
3390
f3db4851 3391void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 3392{
8ed67789
DL
3393 struct arg_dev_net adn = {
3394 .dev = dev,
3395 .net = net,
3396 };
3397
0c3584d5 3398 fib6_clean_all(net, fib6_ifdown, &adn);
e332bc67
EB
3399 if (dev)
3400 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
3401}
3402
95c96174 3403struct rt6_mtu_change_arg {
1da177e4 3404 struct net_device *dev;
95c96174 3405 unsigned int mtu;
1da177e4
LT
3406};
3407
3408static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3409{
3410 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3411 struct inet6_dev *idev;
3412
3413 /* In IPv6 pmtu discovery is not optional,
3414 so that RTAX_MTU lock cannot disable it.
3415 We still use this lock to block changes
3416 caused by addrconf/ndisc.
3417 */
3418
3419 idev = __in6_dev_get(arg->dev);
38308473 3420 if (!idev)
1da177e4
LT
3421 return 0;
3422
3423 /* For administrative MTU increase, there is no way to discover
3424 IPv6 PMTU increase, so PMTU increase should be updated here.
3425 Since RFC 1981 doesn't include administrative MTU increase
3426 update PMTU increase is a MUST. (i.e. jumbo frame)
3427 */
3428 /*
3429 If new MTU is less than route PMTU, this new MTU will be the
3430 lowest MTU in the path, update the route PMTU to reflect PMTU
3431 decreases; if new MTU is greater than route PMTU, and the
3432 old MTU is the lowest MTU in the path, update the route PMTU
3433 to reflect the increase. In this case if the other nodes' MTU
3434 also have the lowest MTU, TOO BIG MESSAGE will be lead to
67c408cf 3435 PMTU discovery.
1da177e4 3436 */
d1918542 3437 if (rt->dst.dev == arg->dev &&
fb56be83 3438 dst_metric_raw(&rt->dst, RTAX_MTU) &&
4b32b5ad 3439 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
f5bbe7ee
WW
3440 spin_lock_bh(&rt6_exception_lock);
3441 /* This case will be removed once the exception table
3442 * is hooked up.
3443 */
4b32b5ad
MKL
3444 if (rt->rt6i_flags & RTF_CACHE) {
3445 /* For RTF_CACHE with rt6i_pmtu == 0
3446 * (i.e. a redirected route),
3447 * the metrics of its rt->dst.from has already
3448 * been updated.
3449 */
3450 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
3451 rt->rt6i_pmtu = arg->mtu;
3452 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
3453 (dst_mtu(&rt->dst) < arg->mtu &&
3454 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
3455 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
3456 }
f5bbe7ee
WW
3457 rt6_exceptions_update_pmtu(rt, arg->mtu);
3458 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 3459 }
1da177e4
LT
3460 return 0;
3461}
3462
95c96174 3463void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 3464{
c71099ac
TG
3465 struct rt6_mtu_change_arg arg = {
3466 .dev = dev,
3467 .mtu = mtu,
3468 };
1da177e4 3469
0c3584d5 3470 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
3471}
3472
ef7c79ed 3473static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 3474 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 3475 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 3476 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
3477 [RTA_PRIORITY] = { .type = NLA_U32 },
3478 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 3479 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 3480 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
3481 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
3482 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 3483 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 3484 [RTA_UID] = { .type = NLA_U32 },
3b45a410 3485 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
3486};
3487
3488static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
3489 struct fib6_config *cfg,
3490 struct netlink_ext_ack *extack)
1da177e4 3491{
86872cb5
TG
3492 struct rtmsg *rtm;
3493 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 3494 unsigned int pref;
86872cb5 3495 int err;
1da177e4 3496
fceb6435
JB
3497 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3498 NULL);
86872cb5
TG
3499 if (err < 0)
3500 goto errout;
1da177e4 3501
86872cb5
TG
3502 err = -EINVAL;
3503 rtm = nlmsg_data(nlh);
3504 memset(cfg, 0, sizeof(*cfg));
3505
3506 cfg->fc_table = rtm->rtm_table;
3507 cfg->fc_dst_len = rtm->rtm_dst_len;
3508 cfg->fc_src_len = rtm->rtm_src_len;
3509 cfg->fc_flags = RTF_UP;
3510 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 3511 cfg->fc_type = rtm->rtm_type;
86872cb5 3512
ef2c7d7b
ND
3513 if (rtm->rtm_type == RTN_UNREACHABLE ||
3514 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
3515 rtm->rtm_type == RTN_PROHIBIT ||
3516 rtm->rtm_type == RTN_THROW)
86872cb5
TG
3517 cfg->fc_flags |= RTF_REJECT;
3518
ab79ad14
3519 if (rtm->rtm_type == RTN_LOCAL)
3520 cfg->fc_flags |= RTF_LOCAL;
3521
1f56a01f
MKL
3522 if (rtm->rtm_flags & RTM_F_CLONED)
3523 cfg->fc_flags |= RTF_CACHE;
3524
15e47304 3525 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 3526 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 3527 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
3528
3529 if (tb[RTA_GATEWAY]) {
67b61f6c 3530 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 3531 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 3532 }
86872cb5
TG
3533
3534 if (tb[RTA_DST]) {
3535 int plen = (rtm->rtm_dst_len + 7) >> 3;
3536
3537 if (nla_len(tb[RTA_DST]) < plen)
3538 goto errout;
3539
3540 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 3541 }
86872cb5
TG
3542
3543 if (tb[RTA_SRC]) {
3544 int plen = (rtm->rtm_src_len + 7) >> 3;
3545
3546 if (nla_len(tb[RTA_SRC]) < plen)
3547 goto errout;
3548
3549 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 3550 }
86872cb5 3551
c3968a85 3552 if (tb[RTA_PREFSRC])
67b61f6c 3553 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 3554
86872cb5
TG
3555 if (tb[RTA_OIF])
3556 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3557
3558 if (tb[RTA_PRIORITY])
3559 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3560
3561 if (tb[RTA_METRICS]) {
3562 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3563 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 3564 }
86872cb5
TG
3565
3566 if (tb[RTA_TABLE])
3567 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3568
51ebd318
ND
3569 if (tb[RTA_MULTIPATH]) {
3570 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3571 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
3572
3573 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 3574 cfg->fc_mp_len, extack);
9ed59592
DA
3575 if (err < 0)
3576 goto errout;
51ebd318
ND
3577 }
3578
c78ba6d6
LR
3579 if (tb[RTA_PREF]) {
3580 pref = nla_get_u8(tb[RTA_PREF]);
3581 if (pref != ICMPV6_ROUTER_PREF_LOW &&
3582 pref != ICMPV6_ROUTER_PREF_HIGH)
3583 pref = ICMPV6_ROUTER_PREF_MEDIUM;
3584 cfg->fc_flags |= RTF_PREF(pref);
3585 }
3586
19e42e45
RP
3587 if (tb[RTA_ENCAP])
3588 cfg->fc_encap = tb[RTA_ENCAP];
3589
9ed59592 3590 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
3591 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3592
c255bd68 3593 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
3594 if (err < 0)
3595 goto errout;
3596 }
3597
32bc201e
XL
3598 if (tb[RTA_EXPIRES]) {
3599 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3600
3601 if (addrconf_finite_timeout(timeout)) {
3602 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3603 cfg->fc_flags |= RTF_EXPIRES;
3604 }
3605 }
3606
86872cb5
TG
3607 err = 0;
3608errout:
3609 return err;
1da177e4
LT
3610}
3611
6b9ea5a6
RP
3612struct rt6_nh {
3613 struct rt6_info *rt6_info;
3614 struct fib6_config r_cfg;
3615 struct mx6_config mxc;
3616 struct list_head next;
3617};
3618
3619static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3620{
3621 struct rt6_nh *nh;
3622
3623 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 3624 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
3625 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3626 nh->r_cfg.fc_ifindex);
3627 }
3628}
3629
3630static int ip6_route_info_append(struct list_head *rt6_nh_list,
3631 struct rt6_info *rt, struct fib6_config *r_cfg)
3632{
3633 struct rt6_nh *nh;
6b9ea5a6
RP
3634 int err = -EEXIST;
3635
3636 list_for_each_entry(nh, rt6_nh_list, next) {
3637 /* check if rt6_info already exists */
f06b7549 3638 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
6b9ea5a6
RP
3639 return err;
3640 }
3641
3642 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3643 if (!nh)
3644 return -ENOMEM;
3645 nh->rt6_info = rt;
3646 err = ip6_convert_metrics(&nh->mxc, r_cfg);
3647 if (err) {
3648 kfree(nh);
3649 return err;
3650 }
3651 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3652 list_add_tail(&nh->next, rt6_nh_list);
3653
3654 return 0;
3655}
3656
3b1137fe
DA
3657static void ip6_route_mpath_notify(struct rt6_info *rt,
3658 struct rt6_info *rt_last,
3659 struct nl_info *info,
3660 __u16 nlflags)
3661{
3662 /* if this is an APPEND route, then rt points to the first route
3663 * inserted and rt_last points to last route inserted. Userspace
3664 * wants a consistent dump of the route which starts at the first
3665 * nexthop. Since sibling routes are always added at the end of
3666 * the list, find the first sibling of the last route appended
3667 */
3668 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3669 rt = list_first_entry(&rt_last->rt6i_siblings,
3670 struct rt6_info,
3671 rt6i_siblings);
3672 }
3673
3674 if (rt)
3675 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3676}
3677
333c4301
DA
3678static int ip6_route_multipath_add(struct fib6_config *cfg,
3679 struct netlink_ext_ack *extack)
51ebd318 3680{
3b1137fe
DA
3681 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3682 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
3683 struct fib6_config r_cfg;
3684 struct rtnexthop *rtnh;
6b9ea5a6
RP
3685 struct rt6_info *rt;
3686 struct rt6_nh *err_nh;
3687 struct rt6_nh *nh, *nh_safe;
3b1137fe 3688 __u16 nlflags;
51ebd318
ND
3689 int remaining;
3690 int attrlen;
6b9ea5a6
RP
3691 int err = 1;
3692 int nhn = 0;
3693 int replace = (cfg->fc_nlinfo.nlh &&
3694 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3695 LIST_HEAD(rt6_nh_list);
51ebd318 3696
3b1137fe
DA
3697 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3698 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3699 nlflags |= NLM_F_APPEND;
3700
35f1b4e9 3701 remaining = cfg->fc_mp_len;
51ebd318 3702 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 3703
6b9ea5a6
RP
3704 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3705 * rt6_info structs per nexthop
3706 */
51ebd318
ND
3707 while (rtnh_ok(rtnh, remaining)) {
3708 memcpy(&r_cfg, cfg, sizeof(*cfg));
3709 if (rtnh->rtnh_ifindex)
3710 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3711
3712 attrlen = rtnh_attrlen(rtnh);
3713 if (attrlen > 0) {
3714 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3715
3716 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3717 if (nla) {
67b61f6c 3718 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
3719 r_cfg.fc_flags |= RTF_GATEWAY;
3720 }
19e42e45
RP
3721 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3722 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3723 if (nla)
3724 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 3725 }
6b9ea5a6 3726
333c4301 3727 rt = ip6_route_info_create(&r_cfg, extack);
8c5b83f0
RP
3728 if (IS_ERR(rt)) {
3729 err = PTR_ERR(rt);
3730 rt = NULL;
6b9ea5a6 3731 goto cleanup;
8c5b83f0 3732 }
6b9ea5a6
RP
3733
3734 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 3735 if (err) {
587fea74 3736 dst_release_immediate(&rt->dst);
6b9ea5a6
RP
3737 goto cleanup;
3738 }
3739
3740 rtnh = rtnh_next(rtnh, &remaining);
3741 }
3742
3b1137fe
DA
3743 /* for add and replace send one notification with all nexthops.
3744 * Skip the notification in fib6_add_rt2node and send one with
3745 * the full route when done
3746 */
3747 info->skip_notify = 1;
3748
6b9ea5a6
RP
3749 err_nh = NULL;
3750 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe 3751 rt_last = nh->rt6_info;
333c4301 3752 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3b1137fe
DA
3753 /* save reference to first route for notification */
3754 if (!rt_notif && !err)
3755 rt_notif = nh->rt6_info;
3756
6b9ea5a6
RP
3757 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3758 nh->rt6_info = NULL;
3759 if (err) {
3760 if (replace && nhn)
3761 ip6_print_replace_route_err(&rt6_nh_list);
3762 err_nh = nh;
3763 goto add_errout;
51ebd318 3764 }
6b9ea5a6 3765
1a72418b 3766 /* Because each route is added like a single route we remove
27596472
MK
3767 * these flags after the first nexthop: if there is a collision,
3768 * we have already failed to add the first nexthop:
3769 * fib6_add_rt2node() has rejected it; when replacing, old
3770 * nexthops have been replaced by first new, the rest should
3771 * be added to it.
1a72418b 3772 */
27596472
MK
3773 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3774 NLM_F_REPLACE);
6b9ea5a6
RP
3775 nhn++;
3776 }
3777
3b1137fe
DA
3778 /* success ... tell user about new route */
3779 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
3780 goto cleanup;
3781
3782add_errout:
3b1137fe
DA
3783 /* send notification for routes that were added so that
3784 * the delete notifications sent by ip6_route_del are
3785 * coherent
3786 */
3787 if (rt_notif)
3788 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3789
6b9ea5a6
RP
3790 /* Delete routes that were already added */
3791 list_for_each_entry(nh, &rt6_nh_list, next) {
3792 if (err_nh == nh)
3793 break;
333c4301 3794 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
3795 }
3796
3797cleanup:
3798 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
587fea74
WW
3799 if (nh->rt6_info)
3800 dst_release_immediate(&nh->rt6_info->dst);
52fe51f8 3801 kfree(nh->mxc.mx);
6b9ea5a6
RP
3802 list_del(&nh->next);
3803 kfree(nh);
3804 }
3805
3806 return err;
3807}
3808
333c4301
DA
3809static int ip6_route_multipath_del(struct fib6_config *cfg,
3810 struct netlink_ext_ack *extack)
6b9ea5a6
RP
3811{
3812 struct fib6_config r_cfg;
3813 struct rtnexthop *rtnh;
3814 int remaining;
3815 int attrlen;
3816 int err = 1, last_err = 0;
3817
3818 remaining = cfg->fc_mp_len;
3819 rtnh = (struct rtnexthop *)cfg->fc_mp;
3820
3821 /* Parse a Multipath Entry */
3822 while (rtnh_ok(rtnh, remaining)) {
3823 memcpy(&r_cfg, cfg, sizeof(*cfg));
3824 if (rtnh->rtnh_ifindex)
3825 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3826
3827 attrlen = rtnh_attrlen(rtnh);
3828 if (attrlen > 0) {
3829 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3830
3831 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3832 if (nla) {
3833 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3834 r_cfg.fc_flags |= RTF_GATEWAY;
3835 }
3836 }
333c4301 3837 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
3838 if (err)
3839 last_err = err;
3840
51ebd318
ND
3841 rtnh = rtnh_next(rtnh, &remaining);
3842 }
3843
3844 return last_err;
3845}
3846
c21ef3e3
DA
3847static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3848 struct netlink_ext_ack *extack)
1da177e4 3849{
86872cb5
TG
3850 struct fib6_config cfg;
3851 int err;
1da177e4 3852
333c4301 3853 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
3854 if (err < 0)
3855 return err;
3856
51ebd318 3857 if (cfg.fc_mp)
333c4301 3858 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
3859 else {
3860 cfg.fc_delete_all_nh = 1;
333c4301 3861 return ip6_route_del(&cfg, extack);
0ae81335 3862 }
1da177e4
LT
3863}
3864
c21ef3e3
DA
3865static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3866 struct netlink_ext_ack *extack)
1da177e4 3867{
86872cb5
TG
3868 struct fib6_config cfg;
3869 int err;
1da177e4 3870
333c4301 3871 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
3872 if (err < 0)
3873 return err;
3874
51ebd318 3875 if (cfg.fc_mp)
333c4301 3876 return ip6_route_multipath_add(&cfg, extack);
51ebd318 3877 else
333c4301 3878 return ip6_route_add(&cfg, extack);
1da177e4
LT
3879}
3880
beb1afac 3881static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 3882{
beb1afac
DA
3883 int nexthop_len = 0;
3884
3885 if (rt->rt6i_nsiblings) {
3886 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
3887 + NLA_ALIGN(sizeof(struct rtnexthop))
3888 + nla_total_size(16) /* RTA_GATEWAY */
beb1afac
DA
3889 + lwtunnel_get_encap_size(rt->dst.lwtstate);
3890
3891 nexthop_len *= rt->rt6i_nsiblings;
3892 }
3893
339bf98f
TG
3894 return NLMSG_ALIGN(sizeof(struct rtmsg))
3895 + nla_total_size(16) /* RTA_SRC */
3896 + nla_total_size(16) /* RTA_DST */
3897 + nla_total_size(16) /* RTA_GATEWAY */
3898 + nla_total_size(16) /* RTA_PREFSRC */
3899 + nla_total_size(4) /* RTA_TABLE */
3900 + nla_total_size(4) /* RTA_IIF */
3901 + nla_total_size(4) /* RTA_OIF */
3902 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3903 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3904 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3905 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3906 + nla_total_size(1) /* RTA_PREF */
beb1afac
DA
3907 + lwtunnel_get_encap_size(rt->dst.lwtstate)
3908 + nexthop_len;
3909}
3910
3911static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 3912 unsigned int *flags, bool skip_oif)
beb1afac
DA
3913{
3914 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3915 *flags |= RTNH_F_LINKDOWN;
3916 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3917 *flags |= RTNH_F_DEAD;
3918 }
3919
3920 if (rt->rt6i_flags & RTF_GATEWAY) {
3921 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3922 goto nla_put_failure;
3923 }
3924
fe400799 3925 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
3926 *flags |= RTNH_F_OFFLOAD;
3927
5be083ce
DA
3928 /* not needed for multipath encoding b/c it has a rtnexthop struct */
3929 if (!skip_oif && rt->dst.dev &&
beb1afac
DA
3930 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3931 goto nla_put_failure;
3932
3933 if (rt->dst.lwtstate &&
3934 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3935 goto nla_put_failure;
3936
3937 return 0;
3938
3939nla_put_failure:
3940 return -EMSGSIZE;
3941}
3942
5be083ce 3943/* add multipath next hop */
beb1afac
DA
3944static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3945{
3946 struct rtnexthop *rtnh;
3947 unsigned int flags = 0;
3948
3949 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3950 if (!rtnh)
3951 goto nla_put_failure;
3952
3953 rtnh->rtnh_hops = 0;
3954 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3955
5be083ce 3956 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
3957 goto nla_put_failure;
3958
3959 rtnh->rtnh_flags = flags;
3960
3961 /* length of rtnetlink header + attributes */
3962 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3963
3964 return 0;
3965
3966nla_put_failure:
3967 return -EMSGSIZE;
339bf98f
TG
3968}
3969
191cd582
BH
3970static int rt6_fill_node(struct net *net,
3971 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 3972 struct in6_addr *dst, struct in6_addr *src,
15e47304 3973 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 3974 unsigned int flags)
1da177e4 3975{
4b32b5ad 3976 u32 metrics[RTAX_MAX];
1da177e4 3977 struct rtmsg *rtm;
2d7202bf 3978 struct nlmsghdr *nlh;
e3703b3d 3979 long expires;
9e762a4a 3980 u32 table;
1da177e4 3981
15e47304 3982 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 3983 if (!nlh)
26932566 3984 return -EMSGSIZE;
2d7202bf
TG
3985
3986 rtm = nlmsg_data(nlh);
1da177e4
LT
3987 rtm->rtm_family = AF_INET6;
3988 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3989 rtm->rtm_src_len = rt->rt6i_src.plen;
3990 rtm->rtm_tos = 0;
c71099ac 3991 if (rt->rt6i_table)
9e762a4a 3992 table = rt->rt6i_table->tb6_id;
c71099ac 3993 else
9e762a4a
PM
3994 table = RT6_TABLE_UNSPEC;
3995 rtm->rtm_table = table;
c78679e8
DM
3996 if (nla_put_u32(skb, RTA_TABLE, table))
3997 goto nla_put_failure;
ef2c7d7b
ND
3998 if (rt->rt6i_flags & RTF_REJECT) {
3999 switch (rt->dst.error) {
4000 case -EINVAL:
4001 rtm->rtm_type = RTN_BLACKHOLE;
4002 break;
4003 case -EACCES:
4004 rtm->rtm_type = RTN_PROHIBIT;
4005 break;
b4949ab2
ND
4006 case -EAGAIN:
4007 rtm->rtm_type = RTN_THROW;
4008 break;
ef2c7d7b
ND
4009 default:
4010 rtm->rtm_type = RTN_UNREACHABLE;
4011 break;
4012 }
4013 }
38308473 4014 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 4015 rtm->rtm_type = RTN_LOCAL;
4ee39733
DA
4016 else if (rt->rt6i_flags & RTF_ANYCAST)
4017 rtm->rtm_type = RTN_ANYCAST;
d1918542 4018 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
4019 rtm->rtm_type = RTN_LOCAL;
4020 else
4021 rtm->rtm_type = RTN_UNICAST;
4022 rtm->rtm_flags = 0;
4023 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4024 rtm->rtm_protocol = rt->rt6i_protocol;
1da177e4 4025
38308473 4026 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
4027 rtm->rtm_flags |= RTM_F_CLONED;
4028
4029 if (dst) {
930345ea 4030 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 4031 goto nla_put_failure;
1ab1457c 4032 rtm->rtm_dst_len = 128;
1da177e4 4033 } else if (rtm->rtm_dst_len)
930345ea 4034 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 4035 goto nla_put_failure;
1da177e4
LT
4036#ifdef CONFIG_IPV6_SUBTREES
4037 if (src) {
930345ea 4038 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4039 goto nla_put_failure;
1ab1457c 4040 rtm->rtm_src_len = 128;
c78679e8 4041 } else if (rtm->rtm_src_len &&
930345ea 4042 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 4043 goto nla_put_failure;
1da177e4 4044#endif
7bc570c8
YH
4045 if (iif) {
4046#ifdef CONFIG_IPV6_MROUTE
4047 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
4048 int err = ip6mr_get_route(net, skb, rtm, portid);
4049
4050 if (err == 0)
4051 return 0;
4052 if (err < 0)
4053 goto nla_put_failure;
7bc570c8
YH
4054 } else
4055#endif
c78679e8
DM
4056 if (nla_put_u32(skb, RTA_IIF, iif))
4057 goto nla_put_failure;
7bc570c8 4058 } else if (dst) {
1da177e4 4059 struct in6_addr saddr_buf;
c78679e8 4060 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 4061 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4062 goto nla_put_failure;
1da177e4 4063 }
2d7202bf 4064
c3968a85
DW
4065 if (rt->rt6i_prefsrc.plen) {
4066 struct in6_addr saddr_buf;
4e3fd7a0 4067 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 4068 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4069 goto nla_put_failure;
c3968a85
DW
4070 }
4071
4b32b5ad
MKL
4072 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
4073 if (rt->rt6i_pmtu)
4074 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
4075 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
4076 goto nla_put_failure;
4077
c78679e8
DM
4078 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4079 goto nla_put_failure;
8253947e 4080
beb1afac
DA
4081 /* For multipath routes, walk the siblings list and add
4082 * each as a nexthop within RTA_MULTIPATH.
4083 */
4084 if (rt->rt6i_nsiblings) {
4085 struct rt6_info *sibling, *next_sibling;
4086 struct nlattr *mp;
4087
4088 mp = nla_nest_start(skb, RTA_MULTIPATH);
4089 if (!mp)
4090 goto nla_put_failure;
4091
4092 if (rt6_add_nexthop(skb, rt) < 0)
4093 goto nla_put_failure;
4094
4095 list_for_each_entry_safe(sibling, next_sibling,
4096 &rt->rt6i_siblings, rt6i_siblings) {
4097 if (rt6_add_nexthop(skb, sibling) < 0)
4098 goto nla_put_failure;
4099 }
4100
4101 nla_nest_end(skb, mp);
4102 } else {
5be083ce 4103 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4104 goto nla_put_failure;
4105 }
4106
8253947e 4107 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 4108
87a50699 4109 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 4110 goto nla_put_failure;
2d7202bf 4111
c78ba6d6
LR
4112 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4113 goto nla_put_failure;
4114
19e42e45 4115
053c095a
JB
4116 nlmsg_end(skb, nlh);
4117 return 0;
2d7202bf
TG
4118
4119nla_put_failure:
26932566
PM
4120 nlmsg_cancel(skb, nlh);
4121 return -EMSGSIZE;
1da177e4
LT
4122}
4123
1b43af54 4124int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
4125{
4126 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4127 struct net *net = arg->net;
4128
4129 if (rt == net->ipv6.ip6_null_entry)
4130 return 0;
1da177e4 4131
2d7202bf
TG
4132 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4133 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4134
4135 /* user wants prefix routes only */
4136 if (rtm->rtm_flags & RTM_F_PREFIX &&
4137 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4138 /* success since this is not a prefix route */
4139 return 1;
4140 }
4141 }
1da177e4 4142
1f17e2f2 4143 return rt6_fill_node(net,
191cd582 4144 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 4145 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
f8cfe2ce 4146 NLM_F_MULTI);
1da177e4
LT
4147}
4148
c21ef3e3
DA
4149static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4150 struct netlink_ext_ack *extack)
1da177e4 4151{
3b1e0a65 4152 struct net *net = sock_net(in_skb->sk);
ab364a6f 4153 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
4154 int err, iif = 0, oif = 0;
4155 struct dst_entry *dst;
ab364a6f 4156 struct rt6_info *rt;
1da177e4 4157 struct sk_buff *skb;
ab364a6f 4158 struct rtmsg *rtm;
4c9483b2 4159 struct flowi6 fl6;
18c3a61c 4160 bool fibmatch;
1da177e4 4161
fceb6435 4162 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4163 extack);
ab364a6f
TG
4164 if (err < 0)
4165 goto errout;
1da177e4 4166
ab364a6f 4167 err = -EINVAL;
4c9483b2 4168 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4169 rtm = nlmsg_data(nlh);
4170 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4171 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4172
ab364a6f
TG
4173 if (tb[RTA_SRC]) {
4174 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4175 goto errout;
4176
4e3fd7a0 4177 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4178 }
4179
4180 if (tb[RTA_DST]) {
4181 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4182 goto errout;
4183
4e3fd7a0 4184 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4185 }
4186
4187 if (tb[RTA_IIF])
4188 iif = nla_get_u32(tb[RTA_IIF]);
4189
4190 if (tb[RTA_OIF])
72331bc0 4191 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4192
2e47b291
LC
4193 if (tb[RTA_MARK])
4194 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4195
622ec2c9
LC
4196 if (tb[RTA_UID])
4197 fl6.flowi6_uid = make_kuid(current_user_ns(),
4198 nla_get_u32(tb[RTA_UID]));
4199 else
4200 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4201
1da177e4
LT
4202 if (iif) {
4203 struct net_device *dev;
72331bc0
SL
4204 int flags = 0;
4205
121622db
FW
4206 rcu_read_lock();
4207
4208 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4209 if (!dev) {
121622db 4210 rcu_read_unlock();
1da177e4 4211 err = -ENODEV;
ab364a6f 4212 goto errout;
1da177e4 4213 }
72331bc0
SL
4214
4215 fl6.flowi6_iif = iif;
4216
4217 if (!ipv6_addr_any(&fl6.saddr))
4218 flags |= RT6_LOOKUP_F_HAS_SADDR;
4219
18c3a61c
RP
4220 if (!fibmatch)
4221 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
401481e0
AB
4222 else
4223 dst = ip6_route_lookup(net, &fl6, 0);
121622db
FW
4224
4225 rcu_read_unlock();
72331bc0
SL
4226 } else {
4227 fl6.flowi6_oif = oif;
4228
18c3a61c
RP
4229 if (!fibmatch)
4230 dst = ip6_route_output(net, NULL, &fl6);
401481e0
AB
4231 else
4232 dst = ip6_route_lookup(net, &fl6, 0);
18c3a61c
RP
4233 }
4234
18c3a61c
RP
4235
4236 rt = container_of(dst, struct rt6_info, dst);
4237 if (rt->dst.error) {
4238 err = rt->dst.error;
4239 ip6_rt_put(rt);
4240 goto errout;
1da177e4
LT
4241 }
4242
9d6acb3b
WC
4243 if (rt == net->ipv6.ip6_null_entry) {
4244 err = rt->dst.error;
4245 ip6_rt_put(rt);
4246 goto errout;
4247 }
4248
ab364a6f 4249 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4250 if (!skb) {
94e187c0 4251 ip6_rt_put(rt);
ab364a6f
TG
4252 err = -ENOBUFS;
4253 goto errout;
4254 }
1da177e4 4255
d8d1f30b 4256 skb_dst_set(skb, &rt->dst);
18c3a61c
RP
4257 if (fibmatch)
4258 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
4259 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4260 nlh->nlmsg_seq, 0);
4261 else
4262 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
4263 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4264 nlh->nlmsg_seq, 0);
1da177e4 4265 if (err < 0) {
ab364a6f
TG
4266 kfree_skb(skb);
4267 goto errout;
1da177e4
LT
4268 }
4269
15e47304 4270 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4271errout:
1da177e4 4272 return err;
1da177e4
LT
4273}
4274
37a1d361
RP
4275void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4276 unsigned int nlm_flags)
1da177e4
LT
4277{
4278 struct sk_buff *skb;
5578689a 4279 struct net *net = info->nl_net;
528c4ceb
DL
4280 u32 seq;
4281 int err;
4282
4283 err = -ENOBUFS;
38308473 4284 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4285
19e42e45 4286 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4287 if (!skb)
21713ebc
TG
4288 goto errout;
4289
191cd582 4290 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
f8cfe2ce 4291 event, info->portid, seq, nlm_flags);
26932566
PM
4292 if (err < 0) {
4293 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4294 WARN_ON(err == -EMSGSIZE);
4295 kfree_skb(skb);
4296 goto errout;
4297 }
15e47304 4298 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4299 info->nlh, gfp_any());
4300 return;
21713ebc
TG
4301errout:
4302 if (err < 0)
5578689a 4303 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4304}
4305
8ed67789 4306static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4307 unsigned long event, void *ptr)
8ed67789 4308{
351638e7 4309 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4310 struct net *net = dev_net(dev);
8ed67789 4311
242d3a49
WC
4312 if (!(dev->flags & IFF_LOOPBACK))
4313 return NOTIFY_OK;
4314
4315 if (event == NETDEV_REGISTER) {
d8d1f30b 4316 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4317 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4318#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4319 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4320 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4321 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4322 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4323#endif
76da0704
WC
4324 } else if (event == NETDEV_UNREGISTER &&
4325 dev->reg_state != NETREG_UNREGISTERED) {
4326 /* NETDEV_UNREGISTER could be fired for multiple times by
4327 * netdev_wait_allrefs(). Make sure we only call this once.
4328 */
12d94a80 4329 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4330#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4331 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4332 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4333#endif
4334 }
4335
4336 return NOTIFY_OK;
4337}
4338
1da177e4
LT
4339/*
4340 * /proc
4341 */
4342
4343#ifdef CONFIG_PROC_FS
4344
33120b30
AD
4345static const struct file_operations ipv6_route_proc_fops = {
4346 .owner = THIS_MODULE,
4347 .open = ipv6_route_open,
4348 .read = seq_read,
4349 .llseek = seq_lseek,
8d2ca1d7 4350 .release = seq_release_net,
33120b30
AD
4351};
4352
1da177e4
LT
4353static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4354{
69ddb805 4355 struct net *net = (struct net *)seq->private;
1da177e4 4356 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4357 net->ipv6.rt6_stats->fib_nodes,
4358 net->ipv6.rt6_stats->fib_route_nodes,
4359 net->ipv6.rt6_stats->fib_rt_alloc,
4360 net->ipv6.rt6_stats->fib_rt_entries,
4361 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4362 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4363 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4364
4365 return 0;
4366}
4367
4368static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4369{
de05c557 4370 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4371}
4372
9a32144e 4373static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4374 .owner = THIS_MODULE,
4375 .open = rt6_stats_seq_open,
4376 .read = seq_read,
4377 .llseek = seq_lseek,
b6fcbdb4 4378 .release = single_release_net,
1da177e4
LT
4379};
4380#endif /* CONFIG_PROC_FS */
4381
4382#ifdef CONFIG_SYSCTL
4383
1da177e4 4384static
fe2c6338 4385int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4386 void __user *buffer, size_t *lenp, loff_t *ppos)
4387{
c486da34
LAG
4388 struct net *net;
4389 int delay;
4390 if (!write)
1da177e4 4391 return -EINVAL;
c486da34
LAG
4392
4393 net = (struct net *)ctl->extra1;
4394 delay = net->ipv6.sysctl.flush_delay;
4395 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4396 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4397 return 0;
1da177e4
LT
4398}
4399
fe2c6338 4400struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4401 {
1da177e4 4402 .procname = "flush",
4990509f 4403 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4404 .maxlen = sizeof(int),
89c8b3a1 4405 .mode = 0200,
6d9f239a 4406 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4407 },
4408 {
1da177e4 4409 .procname = "gc_thresh",
9a7ec3a9 4410 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4411 .maxlen = sizeof(int),
4412 .mode = 0644,
6d9f239a 4413 .proc_handler = proc_dointvec,
1da177e4
LT
4414 },
4415 {
1da177e4 4416 .procname = "max_size",
4990509f 4417 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
4418 .maxlen = sizeof(int),
4419 .mode = 0644,
6d9f239a 4420 .proc_handler = proc_dointvec,
1da177e4
LT
4421 },
4422 {
1da177e4 4423 .procname = "gc_min_interval",
4990509f 4424 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4425 .maxlen = sizeof(int),
4426 .mode = 0644,
6d9f239a 4427 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4428 },
4429 {
1da177e4 4430 .procname = "gc_timeout",
4990509f 4431 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
4432 .maxlen = sizeof(int),
4433 .mode = 0644,
6d9f239a 4434 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4435 },
4436 {
1da177e4 4437 .procname = "gc_interval",
4990509f 4438 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
4439 .maxlen = sizeof(int),
4440 .mode = 0644,
6d9f239a 4441 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4442 },
4443 {
1da177e4 4444 .procname = "gc_elasticity",
4990509f 4445 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
4446 .maxlen = sizeof(int),
4447 .mode = 0644,
f3d3f616 4448 .proc_handler = proc_dointvec,
1da177e4
LT
4449 },
4450 {
1da177e4 4451 .procname = "mtu_expires",
4990509f 4452 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
4453 .maxlen = sizeof(int),
4454 .mode = 0644,
6d9f239a 4455 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4456 },
4457 {
1da177e4 4458 .procname = "min_adv_mss",
4990509f 4459 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
4460 .maxlen = sizeof(int),
4461 .mode = 0644,
f3d3f616 4462 .proc_handler = proc_dointvec,
1da177e4
LT
4463 },
4464 {
1da177e4 4465 .procname = "gc_min_interval_ms",
4990509f 4466 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4467 .maxlen = sizeof(int),
4468 .mode = 0644,
6d9f239a 4469 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 4470 },
f8572d8f 4471 { }
1da177e4
LT
4472};
4473
2c8c1e72 4474struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
4475{
4476 struct ctl_table *table;
4477
4478 table = kmemdup(ipv6_route_table_template,
4479 sizeof(ipv6_route_table_template),
4480 GFP_KERNEL);
5ee09105
YH
4481
4482 if (table) {
4483 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 4484 table[0].extra1 = net;
86393e52 4485 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
4486 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
4487 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4488 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
4489 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
4490 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
4491 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
4492 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 4493 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
4494
4495 /* Don't export sysctls to unprivileged users */
4496 if (net->user_ns != &init_user_ns)
4497 table[0].procname = NULL;
5ee09105
YH
4498 }
4499
760f2d01
DL
4500 return table;
4501}
1da177e4
LT
4502#endif
4503
2c8c1e72 4504static int __net_init ip6_route_net_init(struct net *net)
cdb18761 4505{
633d424b 4506 int ret = -ENOMEM;
8ed67789 4507
86393e52
AD
4508 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
4509 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 4510
fc66f95c
ED
4511 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
4512 goto out_ip6_dst_ops;
4513
8ed67789
DL
4514 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
4515 sizeof(*net->ipv6.ip6_null_entry),
4516 GFP_KERNEL);
4517 if (!net->ipv6.ip6_null_entry)
fc66f95c 4518 goto out_ip6_dst_entries;
d8d1f30b 4519 net->ipv6.ip6_null_entry->dst.path =
8ed67789 4520 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 4521 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4522 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4523 ip6_template_metrics, true);
8ed67789
DL
4524
4525#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 4526 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
4527 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4528 sizeof(*net->ipv6.ip6_prohibit_entry),
4529 GFP_KERNEL);
68fffc67
PZ
4530 if (!net->ipv6.ip6_prohibit_entry)
4531 goto out_ip6_null_entry;
d8d1f30b 4532 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 4533 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 4534 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4535 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4536 ip6_template_metrics, true);
8ed67789
DL
4537
4538 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4539 sizeof(*net->ipv6.ip6_blk_hole_entry),
4540 GFP_KERNEL);
68fffc67
PZ
4541 if (!net->ipv6.ip6_blk_hole_entry)
4542 goto out_ip6_prohibit_entry;
d8d1f30b 4543 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 4544 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 4545 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4546 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4547 ip6_template_metrics, true);
8ed67789
DL
4548#endif
4549
b339a47c
PZ
4550 net->ipv6.sysctl.flush_delay = 0;
4551 net->ipv6.sysctl.ip6_rt_max_size = 4096;
4552 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4553 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4554 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4555 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4556 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4557 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4558
6891a346
BT
4559 net->ipv6.ip6_rt_gc_expire = 30*HZ;
4560
8ed67789
DL
4561 ret = 0;
4562out:
4563 return ret;
f2fc6a54 4564
68fffc67
PZ
4565#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4566out_ip6_prohibit_entry:
4567 kfree(net->ipv6.ip6_prohibit_entry);
4568out_ip6_null_entry:
4569 kfree(net->ipv6.ip6_null_entry);
4570#endif
fc66f95c
ED
4571out_ip6_dst_entries:
4572 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 4573out_ip6_dst_ops:
f2fc6a54 4574 goto out;
cdb18761
DL
4575}
4576
2c8c1e72 4577static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 4578{
8ed67789
DL
4579 kfree(net->ipv6.ip6_null_entry);
4580#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4581 kfree(net->ipv6.ip6_prohibit_entry);
4582 kfree(net->ipv6.ip6_blk_hole_entry);
4583#endif
41bb78b4 4584 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
4585}
4586
d189634e
TG
4587static int __net_init ip6_route_net_init_late(struct net *net)
4588{
4589#ifdef CONFIG_PROC_FS
d4beaa66
G
4590 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4591 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
4592#endif
4593 return 0;
4594}
4595
4596static void __net_exit ip6_route_net_exit_late(struct net *net)
4597{
4598#ifdef CONFIG_PROC_FS
ece31ffd
G
4599 remove_proc_entry("ipv6_route", net->proc_net);
4600 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
4601#endif
4602}
4603
cdb18761
DL
4604static struct pernet_operations ip6_route_net_ops = {
4605 .init = ip6_route_net_init,
4606 .exit = ip6_route_net_exit,
4607};
4608
c3426b47
DM
4609static int __net_init ipv6_inetpeer_init(struct net *net)
4610{
4611 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4612
4613 if (!bp)
4614 return -ENOMEM;
4615 inet_peer_base_init(bp);
4616 net->ipv6.peers = bp;
4617 return 0;
4618}
4619
4620static void __net_exit ipv6_inetpeer_exit(struct net *net)
4621{
4622 struct inet_peer_base *bp = net->ipv6.peers;
4623
4624 net->ipv6.peers = NULL;
56a6b248 4625 inetpeer_invalidate_tree(bp);
c3426b47
DM
4626 kfree(bp);
4627}
4628
2b823f72 4629static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
4630 .init = ipv6_inetpeer_init,
4631 .exit = ipv6_inetpeer_exit,
4632};
4633
d189634e
TG
4634static struct pernet_operations ip6_route_net_late_ops = {
4635 .init = ip6_route_net_init_late,
4636 .exit = ip6_route_net_exit_late,
4637};
4638
8ed67789
DL
4639static struct notifier_block ip6_route_dev_notifier = {
4640 .notifier_call = ip6_route_dev_notify,
242d3a49 4641 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
4642};
4643
2f460933
WC
4644void __init ip6_route_init_special_entries(void)
4645{
4646 /* Registering of the loopback is done before this portion of code,
4647 * the loopback reference in rt6_info will not be taken, do it
4648 * manually for init_net */
4649 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4650 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4651 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4652 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4653 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4654 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4655 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4656 #endif
4657}
4658
433d49c3 4659int __init ip6_route_init(void)
1da177e4 4660{
433d49c3 4661 int ret;
8d0b94af 4662 int cpu;
433d49c3 4663
9a7ec3a9
DL
4664 ret = -ENOMEM;
4665 ip6_dst_ops_template.kmem_cachep =
e5d679f3 4666 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 4667 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 4668 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 4669 goto out;
14e50e57 4670
fc66f95c 4671 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 4672 if (ret)
bdb3289f 4673 goto out_kmem_cache;
bdb3289f 4674
c3426b47
DM
4675 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4676 if (ret)
e8803b6c 4677 goto out_dst_entries;
2a0c451a 4678
7e52b33b
DM
4679 ret = register_pernet_subsys(&ip6_route_net_ops);
4680 if (ret)
4681 goto out_register_inetpeer;
c3426b47 4682
5dc121e9
AE
4683 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4684
e8803b6c 4685 ret = fib6_init();
433d49c3 4686 if (ret)
8ed67789 4687 goto out_register_subsys;
433d49c3 4688
433d49c3
DL
4689 ret = xfrm6_init();
4690 if (ret)
e8803b6c 4691 goto out_fib6_init;
c35b7e72 4692
433d49c3
DL
4693 ret = fib6_rules_init();
4694 if (ret)
4695 goto xfrm6_init;
7e5449c2 4696
d189634e
TG
4697 ret = register_pernet_subsys(&ip6_route_net_late_ops);
4698 if (ret)
4699 goto fib6_rules_init;
4700
433d49c3 4701 ret = -ENOBUFS;
b97bac64
FW
4702 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
4703 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
e3a22b7f
FW
4704 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
4705 RTNL_FLAG_DOIT_UNLOCKED))
d189634e 4706 goto out_register_late_subsys;
c127ea2c 4707
8ed67789 4708 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 4709 if (ret)
d189634e 4710 goto out_register_late_subsys;
8ed67789 4711
8d0b94af
MKL
4712 for_each_possible_cpu(cpu) {
4713 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4714
4715 INIT_LIST_HEAD(&ul->head);
4716 spin_lock_init(&ul->lock);
4717 }
4718
433d49c3
DL
4719out:
4720 return ret;
4721
d189634e
TG
4722out_register_late_subsys:
4723 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 4724fib6_rules_init:
433d49c3
DL
4725 fib6_rules_cleanup();
4726xfrm6_init:
433d49c3 4727 xfrm6_fini();
2a0c451a
TG
4728out_fib6_init:
4729 fib6_gc_cleanup();
8ed67789
DL
4730out_register_subsys:
4731 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
4732out_register_inetpeer:
4733 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
4734out_dst_entries:
4735 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 4736out_kmem_cache:
f2fc6a54 4737 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 4738 goto out;
1da177e4
LT
4739}
4740
4741void ip6_route_cleanup(void)
4742{
8ed67789 4743 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 4744 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 4745 fib6_rules_cleanup();
1da177e4 4746 xfrm6_fini();
1da177e4 4747 fib6_gc_cleanup();
c3426b47 4748 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 4749 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 4750 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 4751 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 4752}