net: ipv6: Fix processing of RAs in presence of VRF
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
904af04d 57#include <net/dst_metadata.h>
1da177e4 58#include <net/xfrm.h>
8d71740c 59#include <net/netevent.h>
21713ebc 60#include <net/netlink.h>
51ebd318 61#include <net/nexthop.h>
19e42e45 62#include <net/lwtunnel.h>
904af04d 63#include <net/ip_tunnels.h>
ca254490 64#include <net/l3mdev.h>
b811580d 65#include <trace/events/fib6.h>
1da177e4
LT
66
67#include <asm/uaccess.h>
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
afc154e9 73enum rt6_nud_state {
7e980569
JB
74 RT6_NUD_FAIL_HARD = -3,
75 RT6_NUD_FAIL_PROBE = -2,
76 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
77 RT6_NUD_SUCCEED = 1
78};
79
83a09abd 80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
4b32b5ad 99static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
1da177e4 101
70ceb4f5 102#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 103static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 104 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
105 const struct in6_addr *gwaddr,
106 struct net_device *dev,
95c96174 107 unsigned int pref);
efa2cea0 108static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 109 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
110 const struct in6_addr *gwaddr,
111 struct net_device *dev);
70ceb4f5
YH
112#endif
113
8d0b94af
MKL
114struct uncached_list {
115 spinlock_t lock;
116 struct list_head head;
117};
118
119static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
120
121static void rt6_uncached_list_add(struct rt6_info *rt)
122{
123 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
124
125 rt->dst.flags |= DST_NOCACHE;
126 rt->rt6i_uncached_list = ul;
127
128 spin_lock_bh(&ul->lock);
129 list_add_tail(&rt->rt6i_uncached, &ul->head);
130 spin_unlock_bh(&ul->lock);
131}
132
133static void rt6_uncached_list_del(struct rt6_info *rt)
134{
135 if (!list_empty(&rt->rt6i_uncached)) {
136 struct uncached_list *ul = rt->rt6i_uncached_list;
137
138 spin_lock_bh(&ul->lock);
139 list_del(&rt->rt6i_uncached);
140 spin_unlock_bh(&ul->lock);
141 }
142}
143
144static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
145{
146 struct net_device *loopback_dev = net->loopback_dev;
147 int cpu;
148
e332bc67
EB
149 if (dev == loopback_dev)
150 return;
151
8d0b94af
MKL
152 for_each_possible_cpu(cpu) {
153 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
154 struct rt6_info *rt;
155
156 spin_lock_bh(&ul->lock);
157 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
158 struct inet6_dev *rt_idev = rt->rt6i_idev;
159 struct net_device *rt_dev = rt->dst.dev;
160
e332bc67 161 if (rt_idev->dev == dev) {
8d0b94af
MKL
162 rt->rt6i_idev = in6_dev_get(loopback_dev);
163 in6_dev_put(rt_idev);
164 }
165
e332bc67 166 if (rt_dev == dev) {
8d0b94af
MKL
167 rt->dst.dev = loopback_dev;
168 dev_hold(rt->dst.dev);
169 dev_put(rt_dev);
170 }
171 }
172 spin_unlock_bh(&ul->lock);
173 }
174}
175
d52d3997
MKL
176static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
177{
178 return dst_metrics_write_ptr(rt->dst.from);
179}
180
06582540
DM
181static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
182{
4b32b5ad 183 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 184
d52d3997
MKL
185 if (rt->rt6i_flags & RTF_PCPU)
186 return rt6_pcpu_cow_metrics(rt);
187 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
188 return NULL;
189 else
3b471175 190 return dst_cow_metrics_generic(dst, old);
06582540
DM
191}
192
f894cbf8
DM
193static inline const void *choose_neigh_daddr(struct rt6_info *rt,
194 struct sk_buff *skb,
195 const void *daddr)
39232973
DM
196{
197 struct in6_addr *p = &rt->rt6i_gateway;
198
a7563f34 199 if (!ipv6_addr_any(p))
39232973 200 return (const void *) p;
f894cbf8
DM
201 else if (skb)
202 return &ipv6_hdr(skb)->daddr;
39232973
DM
203 return daddr;
204}
205
f894cbf8
DM
206static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
207 struct sk_buff *skb,
208 const void *daddr)
d3aaeb38 209{
39232973
DM
210 struct rt6_info *rt = (struct rt6_info *) dst;
211 struct neighbour *n;
212
f894cbf8 213 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 214 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
215 if (n)
216 return n;
217 return neigh_create(&nd_tbl, daddr, dst->dev);
218}
219
9a7ec3a9 220static struct dst_ops ip6_dst_ops_template = {
1da177e4 221 .family = AF_INET6,
1da177e4
LT
222 .gc = ip6_dst_gc,
223 .gc_thresh = 1024,
224 .check = ip6_dst_check,
0dbaee3b 225 .default_advmss = ip6_default_advmss,
ebb762f2 226 .mtu = ip6_mtu,
06582540 227 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
228 .destroy = ip6_dst_destroy,
229 .ifdown = ip6_dst_ifdown,
230 .negative_advice = ip6_negative_advice,
231 .link_failure = ip6_link_failure,
232 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 233 .redirect = rt6_do_redirect,
9f8955cc 234 .local_out = __ip6_local_out,
d3aaeb38 235 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
236};
237
ebb762f2 238static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 239{
618f9bc7
SK
240 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
241
242 return mtu ? : dst->dev->mtu;
ec831ea7
RD
243}
244
6700c270
DM
245static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
246 struct sk_buff *skb, u32 mtu)
14e50e57
DM
247{
248}
249
6700c270
DM
250static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
251 struct sk_buff *skb)
b587ee3b
DM
252{
253}
254
14e50e57
DM
255static struct dst_ops ip6_dst_blackhole_ops = {
256 .family = AF_INET6,
14e50e57
DM
257 .destroy = ip6_dst_destroy,
258 .check = ip6_dst_check,
ebb762f2 259 .mtu = ip6_blackhole_mtu,
214f45c9 260 .default_advmss = ip6_default_advmss,
14e50e57 261 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 262 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 263 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 264 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
265};
266
62fa8a84 267static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 268 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
269};
270
fb0af4c7 271static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
272 .dst = {
273 .__refcnt = ATOMIC_INIT(1),
274 .__use = 1,
2c20cbd7 275 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 276 .error = -ENETUNREACH,
d8d1f30b
CG
277 .input = ip6_pkt_discard,
278 .output = ip6_pkt_discard_out,
1da177e4
LT
279 },
280 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 281 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
282 .rt6i_metric = ~(u32) 0,
283 .rt6i_ref = ATOMIC_INIT(1),
284};
285
101367c2
TG
286#ifdef CONFIG_IPV6_MULTIPLE_TABLES
287
fb0af4c7 288static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
289 .dst = {
290 .__refcnt = ATOMIC_INIT(1),
291 .__use = 1,
2c20cbd7 292 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 293 .error = -EACCES,
d8d1f30b
CG
294 .input = ip6_pkt_prohibit,
295 .output = ip6_pkt_prohibit_out,
101367c2
TG
296 },
297 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 298 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
299 .rt6i_metric = ~(u32) 0,
300 .rt6i_ref = ATOMIC_INIT(1),
301};
302
fb0af4c7 303static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
304 .dst = {
305 .__refcnt = ATOMIC_INIT(1),
306 .__use = 1,
2c20cbd7 307 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 308 .error = -EINVAL,
d8d1f30b 309 .input = dst_discard,
ede2059d 310 .output = dst_discard_out,
101367c2
TG
311 },
312 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 313 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
314 .rt6i_metric = ~(u32) 0,
315 .rt6i_ref = ATOMIC_INIT(1),
316};
317
318#endif
319
ebfa45f0
MKL
320static void rt6_info_init(struct rt6_info *rt)
321{
322 struct dst_entry *dst = &rt->dst;
323
324 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
325 INIT_LIST_HEAD(&rt->rt6i_siblings);
326 INIT_LIST_HEAD(&rt->rt6i_uncached);
327}
328
1da177e4 329/* allocate dst with ip6_dst_ops */
d52d3997
MKL
330static struct rt6_info *__ip6_dst_alloc(struct net *net,
331 struct net_device *dev,
ad706862 332 int flags)
1da177e4 333{
97bab73f 334 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 335 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 336
ebfa45f0
MKL
337 if (rt)
338 rt6_info_init(rt);
8104891b 339
cf911662 340 return rt;
1da177e4
LT
341}
342
9ab179d8
DA
343struct rt6_info *ip6_dst_alloc(struct net *net,
344 struct net_device *dev,
345 int flags)
d52d3997 346{
ad706862 347 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
348
349 if (rt) {
350 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
351 if (rt->rt6i_pcpu) {
352 int cpu;
353
354 for_each_possible_cpu(cpu) {
355 struct rt6_info **p;
356
357 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
358 /* no one shares rt */
359 *p = NULL;
360 }
361 } else {
362 dst_destroy((struct dst_entry *)rt);
363 return NULL;
364 }
365 }
366
367 return rt;
368}
9ab179d8 369EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 370
1da177e4
LT
371static void ip6_dst_destroy(struct dst_entry *dst)
372{
373 struct rt6_info *rt = (struct rt6_info *)dst;
ecd98837 374 struct dst_entry *from = dst->from;
8d0b94af 375 struct inet6_dev *idev;
1da177e4 376
4b32b5ad 377 dst_destroy_metrics_generic(dst);
87775312 378 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
379 rt6_uncached_list_del(rt);
380
381 idev = rt->rt6i_idev;
38308473 382 if (idev) {
1da177e4
LT
383 rt->rt6i_idev = NULL;
384 in6_dev_put(idev);
1ab1457c 385 }
1716a961 386
ecd98837
YH
387 dst->from = NULL;
388 dst_release(from);
b3419363
DM
389}
390
1da177e4
LT
391static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
392 int how)
393{
394 struct rt6_info *rt = (struct rt6_info *)dst;
395 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 396 struct net_device *loopback_dev =
c346dca1 397 dev_net(dev)->loopback_dev;
1da177e4 398
97cac082
DM
399 if (dev != loopback_dev) {
400 if (idev && idev->dev == dev) {
401 struct inet6_dev *loopback_idev =
402 in6_dev_get(loopback_dev);
403 if (loopback_idev) {
404 rt->rt6i_idev = loopback_idev;
405 in6_dev_put(idev);
406 }
407 }
1da177e4
LT
408 }
409}
410
5973fb1e
MKL
411static bool __rt6_check_expired(const struct rt6_info *rt)
412{
413 if (rt->rt6i_flags & RTF_EXPIRES)
414 return time_after(jiffies, rt->dst.expires);
415 else
416 return false;
417}
418
a50feda5 419static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 420{
1716a961
G
421 if (rt->rt6i_flags & RTF_EXPIRES) {
422 if (time_after(jiffies, rt->dst.expires))
a50feda5 423 return true;
1716a961 424 } else if (rt->dst.from) {
3fd91fb3 425 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 426 }
a50feda5 427 return false;
1da177e4
LT
428}
429
51ebd318
ND
430/* Multipath route selection:
431 * Hash based function using packet header and flowlabel.
432 * Adapted from fib_info_hashfn()
433 */
434static int rt6_info_hash_nhsfn(unsigned int candidate_count,
435 const struct flowi6 *fl6)
436{
644d0e65 437 return get_hash_from_flowi6(fl6) % candidate_count;
51ebd318
ND
438}
439
440static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
441 struct flowi6 *fl6, int oif,
442 int strict)
51ebd318
ND
443{
444 struct rt6_info *sibling, *next_sibling;
445 int route_choosen;
446
447 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
448 /* Don't change the route, if route_choosen == 0
449 * (siblings does not include ourself)
450 */
451 if (route_choosen)
452 list_for_each_entry_safe(sibling, next_sibling,
453 &match->rt6i_siblings, rt6i_siblings) {
454 route_choosen--;
455 if (route_choosen == 0) {
52bd4c0c
ND
456 if (rt6_score_route(sibling, oif, strict) < 0)
457 break;
51ebd318
ND
458 match = sibling;
459 break;
460 }
461 }
462 return match;
463}
464
1da177e4 465/*
c71099ac 466 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
467 */
468
8ed67789
DL
469static inline struct rt6_info *rt6_device_match(struct net *net,
470 struct rt6_info *rt,
b71d1d42 471 const struct in6_addr *saddr,
1da177e4 472 int oif,
d420895e 473 int flags)
1da177e4
LT
474{
475 struct rt6_info *local = NULL;
476 struct rt6_info *sprt;
477
dd3abc4e
YH
478 if (!oif && ipv6_addr_any(saddr))
479 goto out;
480
d8d1f30b 481 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 482 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
483
484 if (oif) {
1da177e4
LT
485 if (dev->ifindex == oif)
486 return sprt;
487 if (dev->flags & IFF_LOOPBACK) {
38308473 488 if (!sprt->rt6i_idev ||
1da177e4 489 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 490 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 491 continue;
17fb0b2b
DA
492 if (local &&
493 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
494 continue;
495 }
496 local = sprt;
497 }
dd3abc4e
YH
498 } else {
499 if (ipv6_chk_addr(net, saddr, dev,
500 flags & RT6_LOOKUP_F_IFACE))
501 return sprt;
1da177e4 502 }
dd3abc4e 503 }
1da177e4 504
dd3abc4e 505 if (oif) {
1da177e4
LT
506 if (local)
507 return local;
508
d420895e 509 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 510 return net->ipv6.ip6_null_entry;
1da177e4 511 }
dd3abc4e 512out:
1da177e4
LT
513 return rt;
514}
515
27097255 516#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
517struct __rt6_probe_work {
518 struct work_struct work;
519 struct in6_addr target;
520 struct net_device *dev;
521};
522
523static void rt6_probe_deferred(struct work_struct *w)
524{
525 struct in6_addr mcaddr;
526 struct __rt6_probe_work *work =
527 container_of(w, struct __rt6_probe_work, work);
528
529 addrconf_addr_solict_mult(&work->target, &mcaddr);
304d888b 530 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
c2f17e82 531 dev_put(work->dev);
662f5533 532 kfree(work);
c2f17e82
HFS
533}
534
27097255
YH
535static void rt6_probe(struct rt6_info *rt)
536{
990edb42 537 struct __rt6_probe_work *work;
f2c31e32 538 struct neighbour *neigh;
27097255
YH
539 /*
540 * Okay, this does not seem to be appropriate
541 * for now, however, we need to check if it
542 * is really so; aka Router Reachability Probing.
543 *
544 * Router Reachability Probe MUST be rate-limited
545 * to no more than one per minute.
546 */
2152caea 547 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 548 return;
2152caea
YH
549 rcu_read_lock_bh();
550 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
551 if (neigh) {
8d6c31bf
MKL
552 if (neigh->nud_state & NUD_VALID)
553 goto out;
554
990edb42 555 work = NULL;
2152caea 556 write_lock(&neigh->lock);
990edb42
MKL
557 if (!(neigh->nud_state & NUD_VALID) &&
558 time_after(jiffies,
559 neigh->updated +
560 rt->rt6i_idev->cnf.rtr_probe_interval)) {
561 work = kmalloc(sizeof(*work), GFP_ATOMIC);
562 if (work)
563 __neigh_set_probe_once(neigh);
c2f17e82 564 }
2152caea 565 write_unlock(&neigh->lock);
990edb42
MKL
566 } else {
567 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 568 }
990edb42
MKL
569
570 if (work) {
571 INIT_WORK(&work->work, rt6_probe_deferred);
572 work->target = rt->rt6i_gateway;
573 dev_hold(rt->dst.dev);
574 work->dev = rt->dst.dev;
575 schedule_work(&work->work);
576 }
577
8d6c31bf 578out:
2152caea 579 rcu_read_unlock_bh();
27097255
YH
580}
581#else
582static inline void rt6_probe(struct rt6_info *rt)
583{
27097255
YH
584}
585#endif
586
1da177e4 587/*
554cfb7e 588 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 589 */
b6f99a21 590static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 591{
d1918542 592 struct net_device *dev = rt->dst.dev;
161980f4 593 if (!oif || dev->ifindex == oif)
554cfb7e 594 return 2;
161980f4
DM
595 if ((dev->flags & IFF_LOOPBACK) &&
596 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
597 return 1;
598 return 0;
554cfb7e 599}
1da177e4 600
afc154e9 601static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 602{
f2c31e32 603 struct neighbour *neigh;
afc154e9 604 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 605
4d0c5911
YH
606 if (rt->rt6i_flags & RTF_NONEXTHOP ||
607 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 608 return RT6_NUD_SUCCEED;
145a3621
YH
609
610 rcu_read_lock_bh();
611 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
612 if (neigh) {
613 read_lock(&neigh->lock);
554cfb7e 614 if (neigh->nud_state & NUD_VALID)
afc154e9 615 ret = RT6_NUD_SUCCEED;
398bcbeb 616#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 617 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 618 ret = RT6_NUD_SUCCEED;
7e980569
JB
619 else
620 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 621#endif
145a3621 622 read_unlock(&neigh->lock);
afc154e9
HFS
623 } else {
624 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 625 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 626 }
145a3621
YH
627 rcu_read_unlock_bh();
628
a5a81f0b 629 return ret;
1da177e4
LT
630}
631
554cfb7e
YH
632static int rt6_score_route(struct rt6_info *rt, int oif,
633 int strict)
1da177e4 634{
a5a81f0b 635 int m;
1ab1457c 636
4d0c5911 637 m = rt6_check_dev(rt, oif);
77d16f45 638 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 639 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
640#ifdef CONFIG_IPV6_ROUTER_PREF
641 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
642#endif
afc154e9
HFS
643 if (strict & RT6_LOOKUP_F_REACHABLE) {
644 int n = rt6_check_neigh(rt);
645 if (n < 0)
646 return n;
647 }
554cfb7e
YH
648 return m;
649}
650
f11e6659 651static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
652 int *mpri, struct rt6_info *match,
653 bool *do_rr)
554cfb7e 654{
f11e6659 655 int m;
afc154e9 656 bool match_do_rr = false;
35103d11
AG
657 struct inet6_dev *idev = rt->rt6i_idev;
658 struct net_device *dev = rt->dst.dev;
659
660 if (dev && !netif_carrier_ok(dev) &&
661 idev->cnf.ignore_routes_with_linkdown)
662 goto out;
f11e6659
DM
663
664 if (rt6_check_expired(rt))
665 goto out;
666
667 m = rt6_score_route(rt, oif, strict);
7e980569 668 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
669 match_do_rr = true;
670 m = 0; /* lowest valid score */
7e980569 671 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 672 goto out;
afc154e9
HFS
673 }
674
675 if (strict & RT6_LOOKUP_F_REACHABLE)
676 rt6_probe(rt);
f11e6659 677
7e980569 678 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 679 if (m > *mpri) {
afc154e9 680 *do_rr = match_do_rr;
f11e6659
DM
681 *mpri = m;
682 match = rt;
f11e6659 683 }
f11e6659
DM
684out:
685 return match;
686}
687
688static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
689 struct rt6_info *rr_head,
afc154e9
HFS
690 u32 metric, int oif, int strict,
691 bool *do_rr)
f11e6659 692{
9fbdcfaf 693 struct rt6_info *rt, *match, *cont;
554cfb7e 694 int mpri = -1;
1da177e4 695
f11e6659 696 match = NULL;
9fbdcfaf
SK
697 cont = NULL;
698 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
699 if (rt->rt6i_metric != metric) {
700 cont = rt;
701 break;
702 }
703
704 match = find_match(rt, oif, strict, &mpri, match, do_rr);
705 }
706
707 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
708 if (rt->rt6i_metric != metric) {
709 cont = rt;
710 break;
711 }
712
afc154e9 713 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
714 }
715
716 if (match || !cont)
717 return match;
718
719 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 720 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 721
f11e6659
DM
722 return match;
723}
1da177e4 724
f11e6659
DM
725static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
726{
727 struct rt6_info *match, *rt0;
8ed67789 728 struct net *net;
afc154e9 729 bool do_rr = false;
1da177e4 730
f11e6659
DM
731 rt0 = fn->rr_ptr;
732 if (!rt0)
733 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 734
afc154e9
HFS
735 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
736 &do_rr);
1da177e4 737
afc154e9 738 if (do_rr) {
d8d1f30b 739 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 740
554cfb7e 741 /* no entries matched; do round-robin */
f11e6659
DM
742 if (!next || next->rt6i_metric != rt0->rt6i_metric)
743 next = fn->leaf;
744
745 if (next != rt0)
746 fn->rr_ptr = next;
1da177e4 747 }
1da177e4 748
d1918542 749 net = dev_net(rt0->dst.dev);
a02cec21 750 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
751}
752
8b9df265
MKL
753static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
754{
755 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
756}
757
70ceb4f5
YH
758#ifdef CONFIG_IPV6_ROUTE_INFO
759int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 760 const struct in6_addr *gwaddr)
70ceb4f5 761{
c346dca1 762 struct net *net = dev_net(dev);
70ceb4f5
YH
763 struct route_info *rinfo = (struct route_info *) opt;
764 struct in6_addr prefix_buf, *prefix;
765 unsigned int pref;
4bed72e4 766 unsigned long lifetime;
70ceb4f5
YH
767 struct rt6_info *rt;
768
769 if (len < sizeof(struct route_info)) {
770 return -EINVAL;
771 }
772
773 /* Sanity check for prefix_len and length */
774 if (rinfo->length > 3) {
775 return -EINVAL;
776 } else if (rinfo->prefix_len > 128) {
777 return -EINVAL;
778 } else if (rinfo->prefix_len > 64) {
779 if (rinfo->length < 2) {
780 return -EINVAL;
781 }
782 } else if (rinfo->prefix_len > 0) {
783 if (rinfo->length < 1) {
784 return -EINVAL;
785 }
786 }
787
788 pref = rinfo->route_pref;
789 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 790 return -EINVAL;
70ceb4f5 791
4bed72e4 792 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
793
794 if (rinfo->length == 3)
795 prefix = (struct in6_addr *)rinfo->prefix;
796 else {
797 /* this function is safe */
798 ipv6_addr_prefix(&prefix_buf,
799 (struct in6_addr *)rinfo->prefix,
800 rinfo->prefix_len);
801 prefix = &prefix_buf;
802 }
803
f104a567
DJ
804 if (rinfo->prefix_len == 0)
805 rt = rt6_get_dflt_router(gwaddr, dev);
806 else
807 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 808 gwaddr, dev);
70ceb4f5
YH
809
810 if (rt && !lifetime) {
e0a1ad73 811 ip6_del_rt(rt);
70ceb4f5
YH
812 rt = NULL;
813 }
814
815 if (!rt && lifetime)
830218c1
DA
816 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
817 dev, pref);
70ceb4f5
YH
818 else if (rt)
819 rt->rt6i_flags = RTF_ROUTEINFO |
820 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
821
822 if (rt) {
1716a961
G
823 if (!addrconf_finite_timeout(lifetime))
824 rt6_clean_expires(rt);
825 else
826 rt6_set_expires(rt, jiffies + HZ * lifetime);
827
94e187c0 828 ip6_rt_put(rt);
70ceb4f5
YH
829 }
830 return 0;
831}
832#endif
833
a3c00e46
MKL
834static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
835 struct in6_addr *saddr)
836{
837 struct fib6_node *pn;
838 while (1) {
839 if (fn->fn_flags & RTN_TL_ROOT)
840 return NULL;
841 pn = fn->parent;
842 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
843 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
844 else
845 fn = pn;
846 if (fn->fn_flags & RTN_RTINFO)
847 return fn;
848 }
849}
c71099ac 850
8ed67789
DL
851static struct rt6_info *ip6_pol_route_lookup(struct net *net,
852 struct fib6_table *table,
4c9483b2 853 struct flowi6 *fl6, int flags)
1da177e4
LT
854{
855 struct fib6_node *fn;
856 struct rt6_info *rt;
857
c71099ac 858 read_lock_bh(&table->tb6_lock);
4c9483b2 859 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
860restart:
861 rt = fn->leaf;
4c9483b2 862 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 863 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 864 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
865 if (rt == net->ipv6.ip6_null_entry) {
866 fn = fib6_backtrack(fn, &fl6->saddr);
867 if (fn)
868 goto restart;
869 }
d8d1f30b 870 dst_use(&rt->dst, jiffies);
c71099ac 871 read_unlock_bh(&table->tb6_lock);
b811580d
DA
872
873 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
874
c71099ac
TG
875 return rt;
876
877}
878
67ba4152 879struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
880 int flags)
881{
882 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
883}
884EXPORT_SYMBOL_GPL(ip6_route_lookup);
885
9acd9f3a
YH
886struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
887 const struct in6_addr *saddr, int oif, int strict)
c71099ac 888{
4c9483b2
DM
889 struct flowi6 fl6 = {
890 .flowi6_oif = oif,
891 .daddr = *daddr,
c71099ac
TG
892 };
893 struct dst_entry *dst;
77d16f45 894 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 895
adaa70bb 896 if (saddr) {
4c9483b2 897 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
898 flags |= RT6_LOOKUP_F_HAS_SADDR;
899 }
900
4c9483b2 901 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
902 if (dst->error == 0)
903 return (struct rt6_info *) dst;
904
905 dst_release(dst);
906
1da177e4
LT
907 return NULL;
908}
7159039a
YH
909EXPORT_SYMBOL(rt6_lookup);
910
c71099ac 911/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
912 It takes new route entry, the addition fails by any reason the
913 route is freed. In any case, if caller does not hold it, it may
914 be destroyed.
915 */
916
e5fd387a 917static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
e715b6d3 918 struct mx6_config *mxc)
1da177e4
LT
919{
920 int err;
c71099ac 921 struct fib6_table *table;
1da177e4 922
c71099ac
TG
923 table = rt->rt6i_table;
924 write_lock_bh(&table->tb6_lock);
e715b6d3 925 err = fib6_add(&table->tb6_root, rt, info, mxc);
c71099ac 926 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
927
928 return err;
929}
930
40e22e8f
TG
931int ip6_ins_rt(struct rt6_info *rt)
932{
e715b6d3
FW
933 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
934 struct mx6_config mxc = { .mx = NULL, };
935
936 return __ip6_ins_rt(rt, &info, &mxc);
40e22e8f
TG
937}
938
8b9df265
MKL
939static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
940 const struct in6_addr *daddr,
941 const struct in6_addr *saddr)
1da177e4 942{
1da177e4
LT
943 struct rt6_info *rt;
944
945 /*
946 * Clone the route.
947 */
948
d52d3997 949 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 950 ort = (struct rt6_info *)ort->dst.from;
1da177e4 951
ad706862 952 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
83a09abd
MKL
953
954 if (!rt)
955 return NULL;
956
957 ip6_rt_copy_init(rt, ort);
958 rt->rt6i_flags |= RTF_CACHE;
959 rt->rt6i_metric = 0;
960 rt->dst.flags |= DST_HOST;
961 rt->rt6i_dst.addr = *daddr;
962 rt->rt6i_dst.plen = 128;
1da177e4 963
83a09abd
MKL
964 if (!rt6_is_gw_or_nonexthop(ort)) {
965 if (ort->rt6i_dst.plen != 128 &&
966 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
967 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 968#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
969 if (rt->rt6i_src.plen && saddr) {
970 rt->rt6i_src.addr = *saddr;
971 rt->rt6i_src.plen = 128;
8b9df265 972 }
83a09abd 973#endif
95a9a5ba 974 }
1da177e4 975
95a9a5ba
YH
976 return rt;
977}
1da177e4 978
d52d3997
MKL
979static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
980{
981 struct rt6_info *pcpu_rt;
982
983 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
ad706862 984 rt->dst.dev, rt->dst.flags);
d52d3997
MKL
985
986 if (!pcpu_rt)
987 return NULL;
988 ip6_rt_copy_init(pcpu_rt, rt);
989 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
990 pcpu_rt->rt6i_flags |= RTF_PCPU;
991 return pcpu_rt;
992}
993
994/* It should be called with read_lock_bh(&tb6_lock) acquired */
995static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
996{
a73e4195 997 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
998
999 p = this_cpu_ptr(rt->rt6i_pcpu);
1000 pcpu_rt = *p;
1001
a73e4195
MKL
1002 if (pcpu_rt) {
1003 dst_hold(&pcpu_rt->dst);
1004 rt6_dst_from_metrics_check(pcpu_rt);
1005 }
1006 return pcpu_rt;
1007}
1008
1009static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1010{
9c7370a1 1011 struct fib6_table *table = rt->rt6i_table;
a73e4195 1012 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1013
1014 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1015 if (!pcpu_rt) {
1016 struct net *net = dev_net(rt->dst.dev);
1017
9c7370a1
MKL
1018 dst_hold(&net->ipv6.ip6_null_entry->dst);
1019 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1020 }
1021
9c7370a1
MKL
1022 read_lock_bh(&table->tb6_lock);
1023 if (rt->rt6i_pcpu) {
1024 p = this_cpu_ptr(rt->rt6i_pcpu);
1025 prev = cmpxchg(p, NULL, pcpu_rt);
1026 if (prev) {
1027 /* If someone did it before us, return prev instead */
1028 dst_destroy(&pcpu_rt->dst);
1029 pcpu_rt = prev;
1030 }
1031 } else {
1032 /* rt has been removed from the fib6 tree
1033 * before we have a chance to acquire the read_lock.
1034 * In this case, don't brother to create a pcpu rt
1035 * since rt is going away anyway. The next
1036 * dst_check() will trigger a re-lookup.
1037 */
d52d3997 1038 dst_destroy(&pcpu_rt->dst);
9c7370a1 1039 pcpu_rt = rt;
d52d3997 1040 }
d52d3997
MKL
1041 dst_hold(&pcpu_rt->dst);
1042 rt6_dst_from_metrics_check(pcpu_rt);
9c7370a1 1043 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1044 return pcpu_rt;
1045}
1046
9ff74384
DA
1047struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1048 int oif, struct flowi6 *fl6, int flags)
1da177e4 1049{
367efcb9 1050 struct fib6_node *fn, *saved_fn;
45e4fd26 1051 struct rt6_info *rt;
c71099ac 1052 int strict = 0;
1da177e4 1053
77d16f45 1054 strict |= flags & RT6_LOOKUP_F_IFACE;
367efcb9
MKL
1055 if (net->ipv6.devconf_all->forwarding == 0)
1056 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1057
c71099ac 1058 read_lock_bh(&table->tb6_lock);
1da177e4 1059
4c9483b2 1060 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1061 saved_fn = fn;
1da177e4 1062
ca254490
DA
1063 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1064 oif = 0;
1065
a3c00e46 1066redo_rt6_select:
367efcb9 1067 rt = rt6_select(fn, oif, strict);
52bd4c0c 1068 if (rt->rt6i_nsiblings)
367efcb9 1069 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1070 if (rt == net->ipv6.ip6_null_entry) {
1071 fn = fib6_backtrack(fn, &fl6->saddr);
1072 if (fn)
1073 goto redo_rt6_select;
367efcb9
MKL
1074 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1075 /* also consider unreachable route */
1076 strict &= ~RT6_LOOKUP_F_REACHABLE;
1077 fn = saved_fn;
1078 goto redo_rt6_select;
367efcb9 1079 }
a3c00e46
MKL
1080 }
1081
fb9de91e 1082
3da59bd9 1083 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1084 dst_use(&rt->dst, jiffies);
1085 read_unlock_bh(&table->tb6_lock);
1086
1087 rt6_dst_from_metrics_check(rt);
b811580d
DA
1088
1089 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
d52d3997 1090 return rt;
3da59bd9
MKL
1091 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1092 !(rt->rt6i_flags & RTF_GATEWAY))) {
1093 /* Create a RTF_CACHE clone which will not be
1094 * owned by the fib6 tree. It is for the special case where
1095 * the daddr in the skb during the neighbor look-up is different
1096 * from the fl6->daddr used to look-up route here.
1097 */
1098
1099 struct rt6_info *uncached_rt;
1100
d52d3997
MKL
1101 dst_use(&rt->dst, jiffies);
1102 read_unlock_bh(&table->tb6_lock);
1103
3da59bd9
MKL
1104 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1105 dst_release(&rt->dst);
c71099ac 1106
3da59bd9 1107 if (uncached_rt)
8d0b94af 1108 rt6_uncached_list_add(uncached_rt);
3da59bd9
MKL
1109 else
1110 uncached_rt = net->ipv6.ip6_null_entry;
d52d3997 1111
3da59bd9 1112 dst_hold(&uncached_rt->dst);
b811580d
DA
1113
1114 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
3da59bd9 1115 return uncached_rt;
3da59bd9 1116
d52d3997
MKL
1117 } else {
1118 /* Get a percpu copy */
1119
1120 struct rt6_info *pcpu_rt;
1121
1122 rt->dst.lastuse = jiffies;
1123 rt->dst.__use++;
1124 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1125
9c7370a1
MKL
1126 if (pcpu_rt) {
1127 read_unlock_bh(&table->tb6_lock);
1128 } else {
1129 /* We have to do the read_unlock first
1130 * because rt6_make_pcpu_route() may trigger
1131 * ip6_dst_gc() which will take the write_lock.
1132 */
1133 dst_hold(&rt->dst);
1134 read_unlock_bh(&table->tb6_lock);
a73e4195 1135 pcpu_rt = rt6_make_pcpu_route(rt);
9c7370a1
MKL
1136 dst_release(&rt->dst);
1137 }
d52d3997 1138
b811580d 1139 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
d52d3997 1140 return pcpu_rt;
9c7370a1 1141
d52d3997 1142 }
1da177e4 1143}
9ff74384 1144EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1145
8ed67789 1146static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1147 struct flowi6 *fl6, int flags)
4acad72d 1148{
4c9483b2 1149 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1150}
1151
d409b847
MB
1152struct dst_entry *ip6_route_input_lookup(struct net *net,
1153 struct net_device *dev,
1154 struct flowi6 *fl6, int flags)
72331bc0
SL
1155{
1156 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1157 flags |= RT6_LOOKUP_F_IFACE;
1158
1159 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1160}
d409b847 1161EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1162
c71099ac
TG
1163void ip6_route_input(struct sk_buff *skb)
1164{
b71d1d42 1165 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1166 struct net *net = dev_net(skb->dev);
adaa70bb 1167 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1168 struct ip_tunnel_info *tun_info;
4c9483b2 1169 struct flowi6 fl6 = {
e0d56fdd 1170 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
1171 .daddr = iph->daddr,
1172 .saddr = iph->saddr,
6502ca52 1173 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1174 .flowi6_mark = skb->mark,
1175 .flowi6_proto = iph->nexthdr,
c71099ac 1176 };
adaa70bb 1177
904af04d 1178 tun_info = skb_tunnel_info(skb);
46fa062a 1179 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1180 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
06e9d040 1181 skb_dst_drop(skb);
72331bc0 1182 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1183}
1184
8ed67789 1185static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1186 struct flowi6 *fl6, int flags)
1da177e4 1187{
4c9483b2 1188 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1189}
1190
6f21c96a
PA
1191struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1192 struct flowi6 *fl6, int flags)
c71099ac 1193{
d46a9d67 1194 bool any_src;
c71099ac 1195
4c1feac5
DA
1196 if (rt6_need_strict(&fl6->daddr)) {
1197 struct dst_entry *dst;
1198
1199 dst = l3mdev_link_scope_lookup(net, fl6);
1200 if (dst)
1201 return dst;
1202 }
ca254490 1203
1fb9489b 1204 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1205
d46a9d67 1206 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1207 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1208 (fl6->flowi6_oif && any_src))
77d16f45 1209 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1210
d46a9d67 1211 if (!any_src)
adaa70bb 1212 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1213 else if (sk)
1214 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1215
4c9483b2 1216 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1217}
6f21c96a 1218EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1219
2774c131 1220struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1221{
5c1e6aa3 1222 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1223 struct dst_entry *new = NULL;
1224
f5b0a874 1225 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1226 if (rt) {
0a1f5962 1227 rt6_info_init(rt);
8104891b 1228
0a1f5962 1229 new = &rt->dst;
14e50e57 1230 new->__use = 1;
352e512c 1231 new->input = dst_discard;
ede2059d 1232 new->output = dst_discard_out;
14e50e57 1233
0a1f5962 1234 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1235 rt->rt6i_idev = ort->rt6i_idev;
1236 if (rt->rt6i_idev)
1237 in6_dev_hold(rt->rt6i_idev);
14e50e57 1238
4e3fd7a0 1239 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1240 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1241 rt->rt6i_metric = 0;
1242
1243 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1244#ifdef CONFIG_IPV6_SUBTREES
1245 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1246#endif
1247
1248 dst_free(new);
1249 }
1250
69ead7af
DM
1251 dst_release(dst_orig);
1252 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1253}
14e50e57 1254
1da177e4
LT
1255/*
1256 * Destination cache support functions
1257 */
1258
4b32b5ad
MKL
1259static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1260{
1261 if (rt->dst.from &&
1262 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1263 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1264}
1265
3da59bd9
MKL
1266static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1267{
1268 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1269 return NULL;
1270
1271 if (rt6_check_expired(rt))
1272 return NULL;
1273
1274 return &rt->dst;
1275}
1276
1277static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1278{
5973fb1e
MKL
1279 if (!__rt6_check_expired(rt) &&
1280 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3da59bd9
MKL
1281 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1282 return &rt->dst;
1283 else
1284 return NULL;
1285}
1286
1da177e4
LT
1287static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1288{
1289 struct rt6_info *rt;
1290
1291 rt = (struct rt6_info *) dst;
1292
6f3118b5
ND
1293 /* All IPV6 dsts are created with ->obsolete set to the value
1294 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1295 * into this function always.
1296 */
e3bc10bd 1297
4b32b5ad
MKL
1298 rt6_dst_from_metrics_check(rt);
1299
02bcf4e0
MKL
1300 if (rt->rt6i_flags & RTF_PCPU ||
1301 (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
3da59bd9
MKL
1302 return rt6_dst_from_check(rt, cookie);
1303 else
1304 return rt6_check(rt, cookie);
1da177e4
LT
1305}
1306
1307static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1308{
1309 struct rt6_info *rt = (struct rt6_info *) dst;
1310
1311 if (rt) {
54c1a859
YH
1312 if (rt->rt6i_flags & RTF_CACHE) {
1313 if (rt6_check_expired(rt)) {
1314 ip6_del_rt(rt);
1315 dst = NULL;
1316 }
1317 } else {
1da177e4 1318 dst_release(dst);
54c1a859
YH
1319 dst = NULL;
1320 }
1da177e4 1321 }
54c1a859 1322 return dst;
1da177e4
LT
1323}
1324
1325static void ip6_link_failure(struct sk_buff *skb)
1326{
1327 struct rt6_info *rt;
1328
3ffe533c 1329 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1330
adf30907 1331 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1332 if (rt) {
1eb4f758
HFS
1333 if (rt->rt6i_flags & RTF_CACHE) {
1334 dst_hold(&rt->dst);
8e3d5be7 1335 ip6_del_rt(rt);
1eb4f758 1336 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1337 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1338 }
1da177e4
LT
1339 }
1340}
1341
45e4fd26
MKL
1342static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1343{
1344 struct net *net = dev_net(rt->dst.dev);
1345
1346 rt->rt6i_flags |= RTF_MODIFIED;
1347 rt->rt6i_pmtu = mtu;
1348 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1349}
1350
0d3f6d29
MKL
1351static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1352{
1353 return !(rt->rt6i_flags & RTF_CACHE) &&
1354 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1355}
1356
45e4fd26
MKL
1357static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1358 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1359{
67ba4152 1360 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1361
45e4fd26
MKL
1362 if (rt6->rt6i_flags & RTF_LOCAL)
1363 return;
81aded24 1364
45e4fd26
MKL
1365 dst_confirm(dst);
1366 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1367 if (mtu >= dst_mtu(dst))
1368 return;
9d289715 1369
0d3f6d29 1370 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26
MKL
1371 rt6_do_update_pmtu(rt6, mtu);
1372 } else {
1373 const struct in6_addr *daddr, *saddr;
1374 struct rt6_info *nrt6;
1375
1376 if (iph) {
1377 daddr = &iph->daddr;
1378 saddr = &iph->saddr;
1379 } else if (sk) {
1380 daddr = &sk->sk_v6_daddr;
1381 saddr = &inet6_sk(sk)->saddr;
1382 } else {
1383 return;
1384 }
1385 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1386 if (nrt6) {
1387 rt6_do_update_pmtu(nrt6, mtu);
1388
1389 /* ip6_ins_rt(nrt6) will bump the
1390 * rt6->rt6i_node->fn_sernum
1391 * which will fail the next rt6_check() and
1392 * invalidate the sk->sk_dst_cache.
1393 */
1394 ip6_ins_rt(nrt6);
1395 }
1da177e4
LT
1396 }
1397}
1398
45e4fd26
MKL
1399static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1400 struct sk_buff *skb, u32 mtu)
1401{
1402 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1403}
1404
42ae66c8
DM
1405void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1406 int oif, u32 mark)
81aded24
DM
1407{
1408 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1409 struct dst_entry *dst;
1410 struct flowi6 fl6;
1411
1412 memset(&fl6, 0, sizeof(fl6));
1413 fl6.flowi6_oif = oif;
1b3c61dc 1414 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1415 fl6.daddr = iph->daddr;
1416 fl6.saddr = iph->saddr;
6502ca52 1417 fl6.flowlabel = ip6_flowinfo(iph);
81aded24
DM
1418
1419 dst = ip6_route_output(net, NULL, &fl6);
1420 if (!dst->error)
45e4fd26 1421 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1422 dst_release(dst);
1423}
1424EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1425
1426void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1427{
33c162a9
MKL
1428 struct dst_entry *dst;
1429
81aded24
DM
1430 ip6_update_pmtu(skb, sock_net(sk), mtu,
1431 sk->sk_bound_dev_if, sk->sk_mark);
33c162a9
MKL
1432
1433 dst = __sk_dst_get(sk);
1434 if (!dst || !dst->obsolete ||
1435 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1436 return;
1437
1438 bh_lock_sock(sk);
1439 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1440 ip6_datagram_dst_update(sk, false);
1441 bh_unlock_sock(sk);
81aded24
DM
1442}
1443EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1444
b55b76b2
DJ
1445/* Handle redirects */
1446struct ip6rd_flowi {
1447 struct flowi6 fl6;
1448 struct in6_addr gateway;
1449};
1450
1451static struct rt6_info *__ip6_route_redirect(struct net *net,
1452 struct fib6_table *table,
1453 struct flowi6 *fl6,
1454 int flags)
1455{
1456 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1457 struct rt6_info *rt;
1458 struct fib6_node *fn;
1459
1460 /* Get the "current" route for this destination and
1461 * check if the redirect has come from approriate router.
1462 *
1463 * RFC 4861 specifies that redirects should only be
1464 * accepted if they come from the nexthop to the target.
1465 * Due to the way the routes are chosen, this notion
1466 * is a bit fuzzy and one might need to check all possible
1467 * routes.
1468 */
1469
1470 read_lock_bh(&table->tb6_lock);
1471 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1472restart:
1473 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1474 if (rt6_check_expired(rt))
1475 continue;
1476 if (rt->dst.error)
1477 break;
1478 if (!(rt->rt6i_flags & RTF_GATEWAY))
1479 continue;
1480 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1481 continue;
1482 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1483 continue;
1484 break;
1485 }
1486
1487 if (!rt)
1488 rt = net->ipv6.ip6_null_entry;
1489 else if (rt->dst.error) {
1490 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1491 goto out;
1492 }
1493
1494 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1495 fn = fib6_backtrack(fn, &fl6->saddr);
1496 if (fn)
1497 goto restart;
b55b76b2 1498 }
a3c00e46 1499
b0a1ba59 1500out:
b55b76b2
DJ
1501 dst_hold(&rt->dst);
1502
1503 read_unlock_bh(&table->tb6_lock);
1504
b811580d 1505 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
b55b76b2
DJ
1506 return rt;
1507};
1508
1509static struct dst_entry *ip6_route_redirect(struct net *net,
1510 const struct flowi6 *fl6,
1511 const struct in6_addr *gateway)
1512{
1513 int flags = RT6_LOOKUP_F_HAS_SADDR;
1514 struct ip6rd_flowi rdfl;
1515
1516 rdfl.fl6 = *fl6;
1517 rdfl.gateway = *gateway;
1518
1519 return fib6_rule_lookup(net, &rdfl.fl6,
1520 flags, __ip6_route_redirect);
1521}
1522
3a5ad2ee
DM
1523void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1524{
1525 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1526 struct dst_entry *dst;
1527 struct flowi6 fl6;
1528
1529 memset(&fl6, 0, sizeof(fl6));
e374c618 1530 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1531 fl6.flowi6_oif = oif;
1532 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1533 fl6.daddr = iph->daddr;
1534 fl6.saddr = iph->saddr;
6502ca52 1535 fl6.flowlabel = ip6_flowinfo(iph);
3a5ad2ee 1536
b55b76b2
DJ
1537 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1538 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1539 dst_release(dst);
1540}
1541EXPORT_SYMBOL_GPL(ip6_redirect);
1542
c92a59ec
DJ
1543void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1544 u32 mark)
1545{
1546 const struct ipv6hdr *iph = ipv6_hdr(skb);
1547 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1548 struct dst_entry *dst;
1549 struct flowi6 fl6;
1550
1551 memset(&fl6, 0, sizeof(fl6));
e374c618 1552 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1553 fl6.flowi6_oif = oif;
1554 fl6.flowi6_mark = mark;
c92a59ec
DJ
1555 fl6.daddr = msg->dest;
1556 fl6.saddr = iph->daddr;
1557
b55b76b2
DJ
1558 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1559 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1560 dst_release(dst);
1561}
1562
3a5ad2ee
DM
1563void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1564{
1565 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1566}
1567EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1568
0dbaee3b 1569static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1570{
0dbaee3b
DM
1571 struct net_device *dev = dst->dev;
1572 unsigned int mtu = dst_mtu(dst);
1573 struct net *net = dev_net(dev);
1574
1da177e4
LT
1575 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1576
5578689a
DL
1577 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1578 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1579
1580 /*
1ab1457c
YH
1581 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1582 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1583 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1584 * rely only on pmtu discovery"
1585 */
1586 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1587 mtu = IPV6_MAXPLEN;
1588 return mtu;
1589}
1590
ebb762f2 1591static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1592{
4b32b5ad
MKL
1593 const struct rt6_info *rt = (const struct rt6_info *)dst;
1594 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1595 struct inet6_dev *idev;
618f9bc7 1596
4b32b5ad
MKL
1597 if (mtu)
1598 goto out;
1599
1600 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1601 if (mtu)
30f78d8e 1602 goto out;
618f9bc7
SK
1603
1604 mtu = IPV6_MIN_MTU;
d33e4553
DM
1605
1606 rcu_read_lock();
1607 idev = __in6_dev_get(dst->dev);
1608 if (idev)
1609 mtu = idev->cnf.mtu6;
1610 rcu_read_unlock();
1611
30f78d8e 1612out:
14972cbd
RP
1613 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1614
1615 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
1616}
1617
3b00944c
YH
1618static struct dst_entry *icmp6_dst_gc_list;
1619static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1620
3b00944c 1621struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1622 struct flowi6 *fl6)
1da177e4 1623{
87a11578 1624 struct dst_entry *dst;
1da177e4
LT
1625 struct rt6_info *rt;
1626 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1627 struct net *net = dev_net(dev);
1da177e4 1628
38308473 1629 if (unlikely(!idev))
122bdf67 1630 return ERR_PTR(-ENODEV);
1da177e4 1631
ad706862 1632 rt = ip6_dst_alloc(net, dev, 0);
38308473 1633 if (unlikely(!rt)) {
1da177e4 1634 in6_dev_put(idev);
87a11578 1635 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1636 goto out;
1637 }
1638
8e2ec639
YZ
1639 rt->dst.flags |= DST_HOST;
1640 rt->dst.output = ip6_output;
d8d1f30b 1641 atomic_set(&rt->dst.__refcnt, 1);
550bab42 1642 rt->rt6i_gateway = fl6->daddr;
87a11578 1643 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1644 rt->rt6i_dst.plen = 128;
1645 rt->rt6i_idev = idev;
14edd87d 1646 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1647
3b00944c 1648 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1649 rt->dst.next = icmp6_dst_gc_list;
1650 icmp6_dst_gc_list = &rt->dst;
3b00944c 1651 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1652
5578689a 1653 fib6_force_start_gc(net);
1da177e4 1654
87a11578
DM
1655 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1656
1da177e4 1657out:
87a11578 1658 return dst;
1da177e4
LT
1659}
1660
3d0f24a7 1661int icmp6_dst_gc(void)
1da177e4 1662{
e9476e95 1663 struct dst_entry *dst, **pprev;
3d0f24a7 1664 int more = 0;
1da177e4 1665
3b00944c
YH
1666 spin_lock_bh(&icmp6_dst_lock);
1667 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1668
1da177e4
LT
1669 while ((dst = *pprev) != NULL) {
1670 if (!atomic_read(&dst->__refcnt)) {
1671 *pprev = dst->next;
1672 dst_free(dst);
1da177e4
LT
1673 } else {
1674 pprev = &dst->next;
3d0f24a7 1675 ++more;
1da177e4
LT
1676 }
1677 }
1678
3b00944c 1679 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1680
3d0f24a7 1681 return more;
1da177e4
LT
1682}
1683
1e493d19
DM
1684static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1685 void *arg)
1686{
1687 struct dst_entry *dst, **pprev;
1688
1689 spin_lock_bh(&icmp6_dst_lock);
1690 pprev = &icmp6_dst_gc_list;
1691 while ((dst = *pprev) != NULL) {
1692 struct rt6_info *rt = (struct rt6_info *) dst;
1693 if (func(rt, arg)) {
1694 *pprev = dst->next;
1695 dst_free(dst);
1696 } else {
1697 pprev = &dst->next;
1698 }
1699 }
1700 spin_unlock_bh(&icmp6_dst_lock);
1701}
1702
569d3645 1703static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1704{
86393e52 1705 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1706 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1707 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1708 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1709 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1710 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1711 int entries;
7019b78e 1712
fc66f95c 1713 entries = dst_entries_get_fast(ops);
49a18d86 1714 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1715 entries <= rt_max_size)
1da177e4
LT
1716 goto out;
1717
6891a346 1718 net->ipv6.ip6_rt_gc_expire++;
14956643 1719 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1720 entries = dst_entries_get_slow(ops);
1721 if (entries < ops->gc_thresh)
7019b78e 1722 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1723out:
7019b78e 1724 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1725 return entries > rt_max_size;
1da177e4
LT
1726}
1727
e715b6d3
FW
1728static int ip6_convert_metrics(struct mx6_config *mxc,
1729 const struct fib6_config *cfg)
1730{
c3a8d947 1731 bool ecn_ca = false;
e715b6d3
FW
1732 struct nlattr *nla;
1733 int remaining;
1734 u32 *mp;
1735
63159f29 1736 if (!cfg->fc_mx)
e715b6d3
FW
1737 return 0;
1738
1739 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1740 if (unlikely(!mp))
1741 return -ENOMEM;
1742
1743 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1744 int type = nla_type(nla);
1bb14807 1745 u32 val;
e715b6d3 1746
1bb14807
DB
1747 if (!type)
1748 continue;
1749 if (unlikely(type > RTAX_MAX))
1750 goto err;
ea697639 1751
1bb14807
DB
1752 if (type == RTAX_CC_ALGO) {
1753 char tmp[TCP_CA_NAME_MAX];
e715b6d3 1754
1bb14807 1755 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 1756 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
1757 if (val == TCP_CA_UNSPEC)
1758 goto err;
1759 } else {
1760 val = nla_get_u32(nla);
e715b6d3 1761 }
626abd59
PA
1762 if (type == RTAX_HOPLIMIT && val > 255)
1763 val = 255;
b8d3e416
DB
1764 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1765 goto err;
1bb14807
DB
1766
1767 mp[type - 1] = val;
1768 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
1769 }
1770
c3a8d947
DB
1771 if (ecn_ca) {
1772 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1773 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1774 }
e715b6d3 1775
c3a8d947 1776 mxc->mx = mp;
e715b6d3
FW
1777 return 0;
1778 err:
1779 kfree(mp);
1780 return -EINVAL;
1781}
1da177e4 1782
8c14586f
DA
1783static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1784 struct fib6_config *cfg,
1785 const struct in6_addr *gw_addr)
1786{
1787 struct flowi6 fl6 = {
1788 .flowi6_oif = cfg->fc_ifindex,
1789 .daddr = *gw_addr,
1790 .saddr = cfg->fc_prefsrc,
1791 };
1792 struct fib6_table *table;
1793 struct rt6_info *rt;
48f1dcb5 1794 int flags = RT6_LOOKUP_F_IFACE;
8c14586f
DA
1795
1796 table = fib6_get_table(net, cfg->fc_table);
1797 if (!table)
1798 return NULL;
1799
1800 if (!ipv6_addr_any(&cfg->fc_prefsrc))
1801 flags |= RT6_LOOKUP_F_HAS_SADDR;
1802
1803 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1804
1805 /* if table lookup failed, fall back to full lookup */
1806 if (rt == net->ipv6.ip6_null_entry) {
1807 ip6_rt_put(rt);
1808 rt = NULL;
1809 }
1810
1811 return rt;
1812}
1813
8c5b83f0 1814static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1da177e4 1815{
5578689a 1816 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1817 struct rt6_info *rt = NULL;
1818 struct net_device *dev = NULL;
1819 struct inet6_dev *idev = NULL;
c71099ac 1820 struct fib6_table *table;
1da177e4 1821 int addr_type;
8c5b83f0 1822 int err = -EINVAL;
1da177e4 1823
86872cb5 1824 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
8c5b83f0 1825 goto out;
1da177e4 1826#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1827 if (cfg->fc_src_len)
8c5b83f0 1828 goto out;
1da177e4 1829#endif
86872cb5 1830 if (cfg->fc_ifindex) {
1da177e4 1831 err = -ENODEV;
5578689a 1832 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1833 if (!dev)
1834 goto out;
1835 idev = in6_dev_get(dev);
1836 if (!idev)
1837 goto out;
1838 }
1839
86872cb5
TG
1840 if (cfg->fc_metric == 0)
1841 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1842
d71314b4 1843 err = -ENOBUFS;
38308473
DM
1844 if (cfg->fc_nlinfo.nlh &&
1845 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1846 table = fib6_get_table(net, cfg->fc_table);
38308473 1847 if (!table) {
f3213831 1848 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1849 table = fib6_new_table(net, cfg->fc_table);
1850 }
1851 } else {
1852 table = fib6_new_table(net, cfg->fc_table);
1853 }
38308473
DM
1854
1855 if (!table)
c71099ac 1856 goto out;
c71099ac 1857
ad706862
MKL
1858 rt = ip6_dst_alloc(net, NULL,
1859 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 1860
38308473 1861 if (!rt) {
1da177e4
LT
1862 err = -ENOMEM;
1863 goto out;
1864 }
1865
1716a961
G
1866 if (cfg->fc_flags & RTF_EXPIRES)
1867 rt6_set_expires(rt, jiffies +
1868 clock_t_to_jiffies(cfg->fc_expires));
1869 else
1870 rt6_clean_expires(rt);
1da177e4 1871
86872cb5
TG
1872 if (cfg->fc_protocol == RTPROT_UNSPEC)
1873 cfg->fc_protocol = RTPROT_BOOT;
1874 rt->rt6i_protocol = cfg->fc_protocol;
1875
1876 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1877
1878 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1879 rt->dst.input = ip6_mc_input;
ab79ad14
1880 else if (cfg->fc_flags & RTF_LOCAL)
1881 rt->dst.input = ip6_input;
1da177e4 1882 else
d8d1f30b 1883 rt->dst.input = ip6_forward;
1da177e4 1884
d8d1f30b 1885 rt->dst.output = ip6_output;
1da177e4 1886
19e42e45
RP
1887 if (cfg->fc_encap) {
1888 struct lwtunnel_state *lwtstate;
1889
1890 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
127eb7cd
TH
1891 cfg->fc_encap, AF_INET6, cfg,
1892 &lwtstate);
19e42e45
RP
1893 if (err)
1894 goto out;
61adedf3
JB
1895 rt->dst.lwtstate = lwtstate_get(lwtstate);
1896 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1897 rt->dst.lwtstate->orig_output = rt->dst.output;
1898 rt->dst.output = lwtunnel_output;
25368623 1899 }
61adedf3
JB
1900 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1901 rt->dst.lwtstate->orig_input = rt->dst.input;
1902 rt->dst.input = lwtunnel_input;
25368623 1903 }
19e42e45
RP
1904 }
1905
86872cb5
TG
1906 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1907 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 1908 if (rt->rt6i_dst.plen == 128)
e5fd387a 1909 rt->dst.flags |= DST_HOST;
e5fd387a 1910
1da177e4 1911#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1912 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1913 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1914#endif
1915
86872cb5 1916 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1917
1918 /* We cannot add true routes via loopback here,
1919 they would result in kernel looping; promote them to reject routes
1920 */
86872cb5 1921 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1922 (dev && (dev->flags & IFF_LOOPBACK) &&
1923 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1924 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1925 /* hold loopback dev/idev if we haven't done so. */
5578689a 1926 if (dev != net->loopback_dev) {
1da177e4
LT
1927 if (dev) {
1928 dev_put(dev);
1929 in6_dev_put(idev);
1930 }
5578689a 1931 dev = net->loopback_dev;
1da177e4
LT
1932 dev_hold(dev);
1933 idev = in6_dev_get(dev);
1934 if (!idev) {
1935 err = -ENODEV;
1936 goto out;
1937 }
1938 }
1da177e4 1939 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1940 switch (cfg->fc_type) {
1941 case RTN_BLACKHOLE:
1942 rt->dst.error = -EINVAL;
ede2059d 1943 rt->dst.output = dst_discard_out;
7150aede 1944 rt->dst.input = dst_discard;
ef2c7d7b
ND
1945 break;
1946 case RTN_PROHIBIT:
1947 rt->dst.error = -EACCES;
7150aede
K
1948 rt->dst.output = ip6_pkt_prohibit_out;
1949 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 1950 break;
b4949ab2 1951 case RTN_THROW:
0315e382 1952 case RTN_UNREACHABLE:
ef2c7d7b 1953 default:
7150aede 1954 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
1955 : (cfg->fc_type == RTN_UNREACHABLE)
1956 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
1957 rt->dst.output = ip6_pkt_discard_out;
1958 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
1959 break;
1960 }
1da177e4
LT
1961 goto install_route;
1962 }
1963
86872cb5 1964 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1965 const struct in6_addr *gw_addr;
1da177e4
LT
1966 int gwa_type;
1967
86872cb5 1968 gw_addr = &cfg->fc_gateway;
330567b7 1969 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
1970
1971 /* if gw_addr is local we will fail to detect this in case
1972 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1973 * will return already-added prefix route via interface that
1974 * prefix route was assigned to, which might be non-loopback.
1975 */
1976 err = -EINVAL;
330567b7
FW
1977 if (ipv6_chk_addr_and_flags(net, gw_addr,
1978 gwa_type & IPV6_ADDR_LINKLOCAL ?
1979 dev : NULL, 0, 0))
48ed7b26
FW
1980 goto out;
1981
4e3fd7a0 1982 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1983
1984 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
8c14586f 1985 struct rt6_info *grt = NULL;
1da177e4
LT
1986
1987 /* IPv6 strictly inhibits using not link-local
1988 addresses as nexthop address.
1989 Otherwise, router will not able to send redirects.
1990 It is very good, but in some (rare!) circumstances
1991 (SIT, PtP, NBMA NOARP links) it is handy to allow
1992 some exceptions. --ANK
1993 */
38308473 1994 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1995 goto out;
1996
a435a07f 1997 if (cfg->fc_table) {
8c14586f
DA
1998 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
1999
a435a07f
VB
2000 if (grt) {
2001 if (grt->rt6i_flags & RTF_GATEWAY ||
2002 (dev && dev != grt->dst.dev)) {
2003 ip6_rt_put(grt);
2004 grt = NULL;
2005 }
2006 }
2007 }
2008
8c14586f
DA
2009 if (!grt)
2010 grt = rt6_lookup(net, gw_addr, NULL,
2011 cfg->fc_ifindex, 1);
1da177e4
LT
2012
2013 err = -EHOSTUNREACH;
38308473 2014 if (!grt)
1da177e4
LT
2015 goto out;
2016 if (dev) {
d1918542 2017 if (dev != grt->dst.dev) {
94e187c0 2018 ip6_rt_put(grt);
1da177e4
LT
2019 goto out;
2020 }
2021 } else {
d1918542 2022 dev = grt->dst.dev;
1da177e4
LT
2023 idev = grt->rt6i_idev;
2024 dev_hold(dev);
2025 in6_dev_hold(grt->rt6i_idev);
2026 }
38308473 2027 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 2028 err = 0;
94e187c0 2029 ip6_rt_put(grt);
1da177e4
LT
2030
2031 if (err)
2032 goto out;
2033 }
2034 err = -EINVAL;
38308473 2035 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
2036 goto out;
2037 }
2038
2039 err = -ENODEV;
38308473 2040 if (!dev)
1da177e4
LT
2041 goto out;
2042
c3968a85
DW
2043 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2044 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2045 err = -EINVAL;
2046 goto out;
2047 }
4e3fd7a0 2048 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2049 rt->rt6i_prefsrc.plen = 128;
2050 } else
2051 rt->rt6i_prefsrc.plen = 0;
2052
86872cb5 2053 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2054
2055install_route:
d8d1f30b 2056 rt->dst.dev = dev;
1da177e4 2057 rt->rt6i_idev = idev;
c71099ac 2058 rt->rt6i_table = table;
63152fc0 2059
c346dca1 2060 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2061
8c5b83f0 2062 return rt;
6b9ea5a6
RP
2063out:
2064 if (dev)
2065 dev_put(dev);
2066 if (idev)
2067 in6_dev_put(idev);
2068 if (rt)
2069 dst_free(&rt->dst);
2070
8c5b83f0 2071 return ERR_PTR(err);
6b9ea5a6
RP
2072}
2073
2074int ip6_route_add(struct fib6_config *cfg)
2075{
2076 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2077 struct rt6_info *rt;
6b9ea5a6
RP
2078 int err;
2079
8c5b83f0
RP
2080 rt = ip6_route_info_create(cfg);
2081 if (IS_ERR(rt)) {
2082 err = PTR_ERR(rt);
2083 rt = NULL;
6b9ea5a6 2084 goto out;
8c5b83f0 2085 }
6b9ea5a6 2086
e715b6d3
FW
2087 err = ip6_convert_metrics(&mxc, cfg);
2088 if (err)
2089 goto out;
1da177e4 2090
e715b6d3
FW
2091 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2092
2093 kfree(mxc.mx);
6b9ea5a6 2094
e715b6d3 2095 return err;
1da177e4 2096out:
1da177e4 2097 if (rt)
d8d1f30b 2098 dst_free(&rt->dst);
6b9ea5a6 2099
1da177e4
LT
2100 return err;
2101}
2102
86872cb5 2103static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2104{
2105 int err;
c71099ac 2106 struct fib6_table *table;
d1918542 2107 struct net *net = dev_net(rt->dst.dev);
1da177e4 2108
8e3d5be7
MKL
2109 if (rt == net->ipv6.ip6_null_entry ||
2110 rt->dst.flags & DST_NOCACHE) {
6825a26c
G
2111 err = -ENOENT;
2112 goto out;
2113 }
6c813a72 2114
c71099ac
TG
2115 table = rt->rt6i_table;
2116 write_lock_bh(&table->tb6_lock);
86872cb5 2117 err = fib6_del(rt, info);
c71099ac 2118 write_unlock_bh(&table->tb6_lock);
1da177e4 2119
6825a26c 2120out:
94e187c0 2121 ip6_rt_put(rt);
1da177e4
LT
2122 return err;
2123}
2124
e0a1ad73
TG
2125int ip6_del_rt(struct rt6_info *rt)
2126{
4d1169c1 2127 struct nl_info info = {
d1918542 2128 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2129 };
528c4ceb 2130 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2131}
2132
86872cb5 2133static int ip6_route_del(struct fib6_config *cfg)
1da177e4 2134{
c71099ac 2135 struct fib6_table *table;
1da177e4
LT
2136 struct fib6_node *fn;
2137 struct rt6_info *rt;
2138 int err = -ESRCH;
2139
5578689a 2140 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 2141 if (!table)
c71099ac
TG
2142 return err;
2143
2144 read_lock_bh(&table->tb6_lock);
1da177e4 2145
c71099ac 2146 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
2147 &cfg->fc_dst, cfg->fc_dst_len,
2148 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 2149
1da177e4 2150 if (fn) {
d8d1f30b 2151 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2152 if ((rt->rt6i_flags & RTF_CACHE) &&
2153 !(cfg->fc_flags & RTF_CACHE))
2154 continue;
86872cb5 2155 if (cfg->fc_ifindex &&
d1918542
DM
2156 (!rt->dst.dev ||
2157 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2158 continue;
86872cb5
TG
2159 if (cfg->fc_flags & RTF_GATEWAY &&
2160 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2161 continue;
86872cb5 2162 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2163 continue;
d8d1f30b 2164 dst_hold(&rt->dst);
c71099ac 2165 read_unlock_bh(&table->tb6_lock);
1da177e4 2166
86872cb5 2167 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
2168 }
2169 }
c71099ac 2170 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2171
2172 return err;
2173}
2174
6700c270 2175static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2176{
a6279458 2177 struct netevent_redirect netevent;
e8599ff4 2178 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2179 struct ndisc_options ndopts;
2180 struct inet6_dev *in6_dev;
2181 struct neighbour *neigh;
71bcdba0 2182 struct rd_msg *msg;
6e157b6a
DM
2183 int optlen, on_link;
2184 u8 *lladdr;
e8599ff4 2185
29a3cad5 2186 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2187 optlen -= sizeof(*msg);
e8599ff4
DM
2188
2189 if (optlen < 0) {
6e157b6a 2190 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2191 return;
2192 }
2193
71bcdba0 2194 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2195
71bcdba0 2196 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2197 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2198 return;
2199 }
2200
6e157b6a 2201 on_link = 0;
71bcdba0 2202 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2203 on_link = 1;
71bcdba0 2204 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2205 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2206 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2207 return;
2208 }
2209
2210 in6_dev = __in6_dev_get(skb->dev);
2211 if (!in6_dev)
2212 return;
2213 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2214 return;
2215
2216 /* RFC2461 8.1:
2217 * The IP source address of the Redirect MUST be the same as the current
2218 * first-hop router for the specified ICMP Destination Address.
2219 */
2220
f997c55c 2221 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2222 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2223 return;
2224 }
6e157b6a
DM
2225
2226 lladdr = NULL;
e8599ff4
DM
2227 if (ndopts.nd_opts_tgt_lladdr) {
2228 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2229 skb->dev);
2230 if (!lladdr) {
2231 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2232 return;
2233 }
2234 }
2235
6e157b6a 2236 rt = (struct rt6_info *) dst;
ec13ad1d 2237 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 2238 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2239 return;
6e157b6a 2240 }
e8599ff4 2241
6e157b6a
DM
2242 /* Redirect received -> path was valid.
2243 * Look, redirects are sent only in response to data packets,
2244 * so that this nexthop apparently is reachable. --ANK
2245 */
2246 dst_confirm(&rt->dst);
a6279458 2247
71bcdba0 2248 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2249 if (!neigh)
2250 return;
a6279458 2251
1da177e4
LT
2252 /*
2253 * We have finally decided to accept it.
2254 */
2255
f997c55c 2256 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
2257 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2258 NEIGH_UPDATE_F_OVERRIDE|
2259 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
2260 NEIGH_UPDATE_F_ISROUTER)),
2261 NDISC_REDIRECT, &ndopts);
1da177e4 2262
83a09abd 2263 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2264 if (!nrt)
1da177e4
LT
2265 goto out;
2266
2267 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2268 if (on_link)
2269 nrt->rt6i_flags &= ~RTF_GATEWAY;
2270
4e3fd7a0 2271 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2272
40e22e8f 2273 if (ip6_ins_rt(nrt))
1da177e4
LT
2274 goto out;
2275
d8d1f30b
CG
2276 netevent.old = &rt->dst;
2277 netevent.new = &nrt->dst;
71bcdba0 2278 netevent.daddr = &msg->dest;
60592833 2279 netevent.neigh = neigh;
8d71740c
TT
2280 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2281
38308473 2282 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2283 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2284 ip6_del_rt(rt);
1da177e4
LT
2285 }
2286
2287out:
e8599ff4 2288 neigh_release(neigh);
6e157b6a
DM
2289}
2290
1da177e4
LT
2291/*
2292 * Misc support functions
2293 */
2294
4b32b5ad
MKL
2295static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2296{
2297 BUG_ON(from->dst.from);
2298
2299 rt->rt6i_flags &= ~RTF_EXPIRES;
2300 dst_hold(&from->dst);
2301 rt->dst.from = &from->dst;
2302 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2303}
2304
83a09abd
MKL
2305static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2306{
2307 rt->dst.input = ort->dst.input;
2308 rt->dst.output = ort->dst.output;
2309 rt->rt6i_dst = ort->rt6i_dst;
2310 rt->dst.error = ort->dst.error;
2311 rt->rt6i_idev = ort->rt6i_idev;
2312 if (rt->rt6i_idev)
2313 in6_dev_hold(rt->rt6i_idev);
2314 rt->dst.lastuse = jiffies;
2315 rt->rt6i_gateway = ort->rt6i_gateway;
2316 rt->rt6i_flags = ort->rt6i_flags;
2317 rt6_set_from(rt, ort);
2318 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2319#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2320 rt->rt6i_src = ort->rt6i_src;
1da177e4 2321#endif
83a09abd
MKL
2322 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2323 rt->rt6i_table = ort->rt6i_table;
61adedf3 2324 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2325}
2326
70ceb4f5 2327#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2328static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 2329 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2330 const struct in6_addr *gwaddr,
2331 struct net_device *dev)
70ceb4f5 2332{
830218c1
DA
2333 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2334 int ifindex = dev->ifindex;
70ceb4f5
YH
2335 struct fib6_node *fn;
2336 struct rt6_info *rt = NULL;
c71099ac
TG
2337 struct fib6_table *table;
2338
830218c1 2339 table = fib6_get_table(net, tb_id);
38308473 2340 if (!table)
c71099ac 2341 return NULL;
70ceb4f5 2342
5744dd9b 2343 read_lock_bh(&table->tb6_lock);
67ba4152 2344 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2345 if (!fn)
2346 goto out;
2347
d8d1f30b 2348 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2349 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2350 continue;
2351 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2352 continue;
2353 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2354 continue;
d8d1f30b 2355 dst_hold(&rt->dst);
70ceb4f5
YH
2356 break;
2357 }
2358out:
5744dd9b 2359 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2360 return rt;
2361}
2362
efa2cea0 2363static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 2364 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2365 const struct in6_addr *gwaddr,
2366 struct net_device *dev,
95c96174 2367 unsigned int pref)
70ceb4f5 2368{
86872cb5 2369 struct fib6_config cfg = {
238fc7ea 2370 .fc_metric = IP6_RT_PRIO_USER,
830218c1 2371 .fc_ifindex = dev->ifindex,
86872cb5
TG
2372 .fc_dst_len = prefixlen,
2373 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2374 RTF_UP | RTF_PREF(pref),
15e47304 2375 .fc_nlinfo.portid = 0,
efa2cea0
DL
2376 .fc_nlinfo.nlh = NULL,
2377 .fc_nlinfo.nl_net = net,
86872cb5
TG
2378 };
2379
830218c1 2380 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
2381 cfg.fc_dst = *prefix;
2382 cfg.fc_gateway = *gwaddr;
70ceb4f5 2383
e317da96
YH
2384 /* We should treat it as a default route if prefix length is 0. */
2385 if (!prefixlen)
86872cb5 2386 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2387
86872cb5 2388 ip6_route_add(&cfg);
70ceb4f5 2389
830218c1 2390 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
2391}
2392#endif
2393
b71d1d42 2394struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2395{
830218c1 2396 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 2397 struct rt6_info *rt;
c71099ac 2398 struct fib6_table *table;
1da177e4 2399
830218c1 2400 table = fib6_get_table(dev_net(dev), tb_id);
38308473 2401 if (!table)
c71099ac 2402 return NULL;
1da177e4 2403
5744dd9b 2404 read_lock_bh(&table->tb6_lock);
67ba4152 2405 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2406 if (dev == rt->dst.dev &&
045927ff 2407 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2408 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2409 break;
2410 }
2411 if (rt)
d8d1f30b 2412 dst_hold(&rt->dst);
5744dd9b 2413 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2414 return rt;
2415}
2416
b71d1d42 2417struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2418 struct net_device *dev,
2419 unsigned int pref)
1da177e4 2420{
86872cb5 2421 struct fib6_config cfg = {
ca254490 2422 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 2423 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2424 .fc_ifindex = dev->ifindex,
2425 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2426 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2427 .fc_nlinfo.portid = 0,
5578689a 2428 .fc_nlinfo.nlh = NULL,
c346dca1 2429 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2430 };
1da177e4 2431
4e3fd7a0 2432 cfg.fc_gateway = *gwaddr;
1da177e4 2433
830218c1
DA
2434 if (!ip6_route_add(&cfg)) {
2435 struct fib6_table *table;
2436
2437 table = fib6_get_table(dev_net(dev), cfg.fc_table);
2438 if (table)
2439 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2440 }
1da177e4 2441
1da177e4
LT
2442 return rt6_get_dflt_router(gwaddr, dev);
2443}
2444
830218c1 2445static void __rt6_purge_dflt_routers(struct fib6_table *table)
1da177e4
LT
2446{
2447 struct rt6_info *rt;
2448
2449restart:
c71099ac 2450 read_lock_bh(&table->tb6_lock);
d8d1f30b 2451 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2452 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2453 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2454 dst_hold(&rt->dst);
c71099ac 2455 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2456 ip6_del_rt(rt);
1da177e4
LT
2457 goto restart;
2458 }
2459 }
c71099ac 2460 read_unlock_bh(&table->tb6_lock);
830218c1
DA
2461
2462 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2463}
2464
2465void rt6_purge_dflt_routers(struct net *net)
2466{
2467 struct fib6_table *table;
2468 struct hlist_head *head;
2469 unsigned int h;
2470
2471 rcu_read_lock();
2472
2473 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2474 head = &net->ipv6.fib_table_hash[h];
2475 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2476 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2477 __rt6_purge_dflt_routers(table);
2478 }
2479 }
2480
2481 rcu_read_unlock();
1da177e4
LT
2482}
2483
5578689a
DL
2484static void rtmsg_to_fib6_config(struct net *net,
2485 struct in6_rtmsg *rtmsg,
86872cb5
TG
2486 struct fib6_config *cfg)
2487{
2488 memset(cfg, 0, sizeof(*cfg));
2489
ca254490
DA
2490 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2491 : RT6_TABLE_MAIN;
86872cb5
TG
2492 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2493 cfg->fc_metric = rtmsg->rtmsg_metric;
2494 cfg->fc_expires = rtmsg->rtmsg_info;
2495 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2496 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2497 cfg->fc_flags = rtmsg->rtmsg_flags;
2498
5578689a 2499 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2500
4e3fd7a0
AD
2501 cfg->fc_dst = rtmsg->rtmsg_dst;
2502 cfg->fc_src = rtmsg->rtmsg_src;
2503 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2504}
2505
5578689a 2506int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2507{
86872cb5 2508 struct fib6_config cfg;
1da177e4
LT
2509 struct in6_rtmsg rtmsg;
2510 int err;
2511
67ba4152 2512 switch (cmd) {
1da177e4
LT
2513 case SIOCADDRT: /* Add a route */
2514 case SIOCDELRT: /* Delete a route */
af31f412 2515 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2516 return -EPERM;
2517 err = copy_from_user(&rtmsg, arg,
2518 sizeof(struct in6_rtmsg));
2519 if (err)
2520 return -EFAULT;
86872cb5 2521
5578689a 2522 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2523
1da177e4
LT
2524 rtnl_lock();
2525 switch (cmd) {
2526 case SIOCADDRT:
86872cb5 2527 err = ip6_route_add(&cfg);
1da177e4
LT
2528 break;
2529 case SIOCDELRT:
86872cb5 2530 err = ip6_route_del(&cfg);
1da177e4
LT
2531 break;
2532 default:
2533 err = -EINVAL;
2534 }
2535 rtnl_unlock();
2536
2537 return err;
3ff50b79 2538 }
1da177e4
LT
2539
2540 return -EINVAL;
2541}
2542
2543/*
2544 * Drop the packet on the floor
2545 */
2546
d5fdd6ba 2547static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2548{
612f09e8 2549 int type;
adf30907 2550 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2551 switch (ipstats_mib_noroutes) {
2552 case IPSTATS_MIB_INNOROUTES:
0660e03f 2553 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2554 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2555 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2556 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2557 break;
2558 }
2559 /* FALLTHROUGH */
2560 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2561 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2562 ipstats_mib_noroutes);
612f09e8
YH
2563 break;
2564 }
3ffe533c 2565 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2566 kfree_skb(skb);
2567 return 0;
2568}
2569
9ce8ade0
TG
2570static int ip6_pkt_discard(struct sk_buff *skb)
2571{
612f09e8 2572 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2573}
2574
ede2059d 2575static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 2576{
adf30907 2577 skb->dev = skb_dst(skb)->dev;
612f09e8 2578 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2579}
2580
9ce8ade0
TG
2581static int ip6_pkt_prohibit(struct sk_buff *skb)
2582{
612f09e8 2583 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2584}
2585
ede2059d 2586static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 2587{
adf30907 2588 skb->dev = skb_dst(skb)->dev;
612f09e8 2589 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2590}
2591
1da177e4
LT
2592/*
2593 * Allocate a dst for local (unicast / anycast) address.
2594 */
2595
2596struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2597 const struct in6_addr *addr,
8f031519 2598 bool anycast)
1da177e4 2599{
ca254490 2600 u32 tb_id;
c346dca1 2601 struct net *net = dev_net(idev->dev);
5f02ce24
DA
2602 struct net_device *dev = net->loopback_dev;
2603 struct rt6_info *rt;
2604
2605 /* use L3 Master device as loopback for host routes if device
2606 * is enslaved and address is not link local or multicast
2607 */
2608 if (!rt6_need_strict(addr))
2609 dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2610
2611 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 2612 if (!rt)
1da177e4
LT
2613 return ERR_PTR(-ENOMEM);
2614
1da177e4
LT
2615 in6_dev_hold(idev);
2616
11d53b49 2617 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2618 rt->dst.input = ip6_input;
2619 rt->dst.output = ip6_output;
1da177e4 2620 rt->rt6i_idev = idev;
1da177e4
LT
2621
2622 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2623 if (anycast)
2624 rt->rt6i_flags |= RTF_ANYCAST;
2625 else
1da177e4 2626 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2627
550bab42 2628 rt->rt6i_gateway = *addr;
4e3fd7a0 2629 rt->rt6i_dst.addr = *addr;
1da177e4 2630 rt->rt6i_dst.plen = 128;
ca254490
DA
2631 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2632 rt->rt6i_table = fib6_get_table(net, tb_id);
8e3d5be7 2633 rt->dst.flags |= DST_NOCACHE;
1da177e4 2634
d8d1f30b 2635 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2636
2637 return rt;
2638}
2639
c3968a85
DW
2640/* remove deleted ip from prefsrc entries */
2641struct arg_dev_net_ip {
2642 struct net_device *dev;
2643 struct net *net;
2644 struct in6_addr *addr;
2645};
2646
2647static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2648{
2649 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2650 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2651 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2652
d1918542 2653 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2654 rt != net->ipv6.ip6_null_entry &&
2655 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2656 /* remove prefsrc entry */
2657 rt->rt6i_prefsrc.plen = 0;
2658 }
2659 return 0;
2660}
2661
2662void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2663{
2664 struct net *net = dev_net(ifp->idev->dev);
2665 struct arg_dev_net_ip adni = {
2666 .dev = ifp->idev->dev,
2667 .net = net,
2668 .addr = &ifp->addr,
2669 };
0c3584d5 2670 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2671}
2672
be7a010d
DJ
2673#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2674#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2675
2676/* Remove routers and update dst entries when gateway turn into host. */
2677static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2678{
2679 struct in6_addr *gateway = (struct in6_addr *)arg;
2680
2681 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2682 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2683 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2684 return -1;
2685 }
2686 return 0;
2687}
2688
2689void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2690{
2691 fib6_clean_all(net, fib6_clean_tohost, gateway);
2692}
2693
8ed67789
DL
2694struct arg_dev_net {
2695 struct net_device *dev;
2696 struct net *net;
2697};
2698
1da177e4
LT
2699static int fib6_ifdown(struct rt6_info *rt, void *arg)
2700{
bc3ef660 2701 const struct arg_dev_net *adn = arg;
2702 const struct net_device *dev = adn->dev;
8ed67789 2703
d1918542 2704 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2705 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2706 return -1;
c159d30c 2707
1da177e4
LT
2708 return 0;
2709}
2710
f3db4851 2711void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2712{
8ed67789
DL
2713 struct arg_dev_net adn = {
2714 .dev = dev,
2715 .net = net,
2716 };
2717
0c3584d5 2718 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2719 icmp6_clean_all(fib6_ifdown, &adn);
e332bc67
EB
2720 if (dev)
2721 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
2722}
2723
95c96174 2724struct rt6_mtu_change_arg {
1da177e4 2725 struct net_device *dev;
95c96174 2726 unsigned int mtu;
1da177e4
LT
2727};
2728
2729static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2730{
2731 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2732 struct inet6_dev *idev;
2733
2734 /* In IPv6 pmtu discovery is not optional,
2735 so that RTAX_MTU lock cannot disable it.
2736 We still use this lock to block changes
2737 caused by addrconf/ndisc.
2738 */
2739
2740 idev = __in6_dev_get(arg->dev);
38308473 2741 if (!idev)
1da177e4
LT
2742 return 0;
2743
2744 /* For administrative MTU increase, there is no way to discover
2745 IPv6 PMTU increase, so PMTU increase should be updated here.
2746 Since RFC 1981 doesn't include administrative MTU increase
2747 update PMTU increase is a MUST. (i.e. jumbo frame)
2748 */
2749 /*
2750 If new MTU is less than route PMTU, this new MTU will be the
2751 lowest MTU in the path, update the route PMTU to reflect PMTU
2752 decreases; if new MTU is greater than route PMTU, and the
2753 old MTU is the lowest MTU in the path, update the route PMTU
2754 to reflect the increase. In this case if the other nodes' MTU
2755 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2756 PMTU discouvery.
2757 */
d1918542 2758 if (rt->dst.dev == arg->dev &&
4b32b5ad
MKL
2759 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2760 if (rt->rt6i_flags & RTF_CACHE) {
2761 /* For RTF_CACHE with rt6i_pmtu == 0
2762 * (i.e. a redirected route),
2763 * the metrics of its rt->dst.from has already
2764 * been updated.
2765 */
2766 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2767 rt->rt6i_pmtu = arg->mtu;
2768 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2769 (dst_mtu(&rt->dst) < arg->mtu &&
2770 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2771 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2772 }
566cfd8f 2773 }
1da177e4
LT
2774 return 0;
2775}
2776
95c96174 2777void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2778{
c71099ac
TG
2779 struct rt6_mtu_change_arg arg = {
2780 .dev = dev,
2781 .mtu = mtu,
2782 };
1da177e4 2783
0c3584d5 2784 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2785}
2786
ef7c79ed 2787static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2788 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2789 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2790 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2791 [RTA_PRIORITY] = { .type = NLA_U32 },
2792 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2793 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2794 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
2795 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2796 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 2797 [RTA_EXPIRES] = { .type = NLA_U32 },
86872cb5
TG
2798};
2799
2800static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2801 struct fib6_config *cfg)
1da177e4 2802{
86872cb5
TG
2803 struct rtmsg *rtm;
2804 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2805 unsigned int pref;
86872cb5 2806 int err;
1da177e4 2807
86872cb5
TG
2808 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2809 if (err < 0)
2810 goto errout;
1da177e4 2811
86872cb5
TG
2812 err = -EINVAL;
2813 rtm = nlmsg_data(nlh);
2814 memset(cfg, 0, sizeof(*cfg));
2815
2816 cfg->fc_table = rtm->rtm_table;
2817 cfg->fc_dst_len = rtm->rtm_dst_len;
2818 cfg->fc_src_len = rtm->rtm_src_len;
2819 cfg->fc_flags = RTF_UP;
2820 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2821 cfg->fc_type = rtm->rtm_type;
86872cb5 2822
ef2c7d7b
ND
2823 if (rtm->rtm_type == RTN_UNREACHABLE ||
2824 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2825 rtm->rtm_type == RTN_PROHIBIT ||
2826 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2827 cfg->fc_flags |= RTF_REJECT;
2828
ab79ad14
2829 if (rtm->rtm_type == RTN_LOCAL)
2830 cfg->fc_flags |= RTF_LOCAL;
2831
1f56a01f
MKL
2832 if (rtm->rtm_flags & RTM_F_CLONED)
2833 cfg->fc_flags |= RTF_CACHE;
2834
15e47304 2835 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2836 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2837 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2838
2839 if (tb[RTA_GATEWAY]) {
67b61f6c 2840 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2841 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2842 }
86872cb5
TG
2843
2844 if (tb[RTA_DST]) {
2845 int plen = (rtm->rtm_dst_len + 7) >> 3;
2846
2847 if (nla_len(tb[RTA_DST]) < plen)
2848 goto errout;
2849
2850 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2851 }
86872cb5
TG
2852
2853 if (tb[RTA_SRC]) {
2854 int plen = (rtm->rtm_src_len + 7) >> 3;
2855
2856 if (nla_len(tb[RTA_SRC]) < plen)
2857 goto errout;
2858
2859 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2860 }
86872cb5 2861
c3968a85 2862 if (tb[RTA_PREFSRC])
67b61f6c 2863 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 2864
86872cb5
TG
2865 if (tb[RTA_OIF])
2866 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2867
2868 if (tb[RTA_PRIORITY])
2869 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2870
2871 if (tb[RTA_METRICS]) {
2872 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2873 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2874 }
86872cb5
TG
2875
2876 if (tb[RTA_TABLE])
2877 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2878
51ebd318
ND
2879 if (tb[RTA_MULTIPATH]) {
2880 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2881 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2882 }
2883
c78ba6d6
LR
2884 if (tb[RTA_PREF]) {
2885 pref = nla_get_u8(tb[RTA_PREF]);
2886 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2887 pref != ICMPV6_ROUTER_PREF_HIGH)
2888 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2889 cfg->fc_flags |= RTF_PREF(pref);
2890 }
2891
19e42e45
RP
2892 if (tb[RTA_ENCAP])
2893 cfg->fc_encap = tb[RTA_ENCAP];
2894
2895 if (tb[RTA_ENCAP_TYPE])
2896 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2897
32bc201e
XL
2898 if (tb[RTA_EXPIRES]) {
2899 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2900
2901 if (addrconf_finite_timeout(timeout)) {
2902 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2903 cfg->fc_flags |= RTF_EXPIRES;
2904 }
2905 }
2906
86872cb5
TG
2907 err = 0;
2908errout:
2909 return err;
1da177e4
LT
2910}
2911
6b9ea5a6
RP
2912struct rt6_nh {
2913 struct rt6_info *rt6_info;
2914 struct fib6_config r_cfg;
2915 struct mx6_config mxc;
2916 struct list_head next;
2917};
2918
2919static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2920{
2921 struct rt6_nh *nh;
2922
2923 list_for_each_entry(nh, rt6_nh_list, next) {
2924 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2925 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2926 nh->r_cfg.fc_ifindex);
2927 }
2928}
2929
2930static int ip6_route_info_append(struct list_head *rt6_nh_list,
2931 struct rt6_info *rt, struct fib6_config *r_cfg)
2932{
2933 struct rt6_nh *nh;
2934 struct rt6_info *rtnh;
2935 int err = -EEXIST;
2936
2937 list_for_each_entry(nh, rt6_nh_list, next) {
2938 /* check if rt6_info already exists */
2939 rtnh = nh->rt6_info;
2940
2941 if (rtnh->dst.dev == rt->dst.dev &&
2942 rtnh->rt6i_idev == rt->rt6i_idev &&
2943 ipv6_addr_equal(&rtnh->rt6i_gateway,
2944 &rt->rt6i_gateway))
2945 return err;
2946 }
2947
2948 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2949 if (!nh)
2950 return -ENOMEM;
2951 nh->rt6_info = rt;
2952 err = ip6_convert_metrics(&nh->mxc, r_cfg);
2953 if (err) {
2954 kfree(nh);
2955 return err;
2956 }
2957 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2958 list_add_tail(&nh->next, rt6_nh_list);
2959
2960 return 0;
2961}
2962
2963static int ip6_route_multipath_add(struct fib6_config *cfg)
51ebd318
ND
2964{
2965 struct fib6_config r_cfg;
2966 struct rtnexthop *rtnh;
6b9ea5a6
RP
2967 struct rt6_info *rt;
2968 struct rt6_nh *err_nh;
2969 struct rt6_nh *nh, *nh_safe;
51ebd318
ND
2970 int remaining;
2971 int attrlen;
6b9ea5a6
RP
2972 int err = 1;
2973 int nhn = 0;
2974 int replace = (cfg->fc_nlinfo.nlh &&
2975 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2976 LIST_HEAD(rt6_nh_list);
51ebd318 2977
35f1b4e9 2978 remaining = cfg->fc_mp_len;
51ebd318 2979 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 2980
6b9ea5a6
RP
2981 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2982 * rt6_info structs per nexthop
2983 */
51ebd318
ND
2984 while (rtnh_ok(rtnh, remaining)) {
2985 memcpy(&r_cfg, cfg, sizeof(*cfg));
2986 if (rtnh->rtnh_ifindex)
2987 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2988
2989 attrlen = rtnh_attrlen(rtnh);
2990 if (attrlen > 0) {
2991 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2992
2993 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2994 if (nla) {
67b61f6c 2995 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
2996 r_cfg.fc_flags |= RTF_GATEWAY;
2997 }
19e42e45
RP
2998 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2999 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3000 if (nla)
3001 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 3002 }
6b9ea5a6 3003
8c5b83f0
RP
3004 rt = ip6_route_info_create(&r_cfg);
3005 if (IS_ERR(rt)) {
3006 err = PTR_ERR(rt);
3007 rt = NULL;
6b9ea5a6 3008 goto cleanup;
8c5b83f0 3009 }
6b9ea5a6
RP
3010
3011 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 3012 if (err) {
6b9ea5a6
RP
3013 dst_free(&rt->dst);
3014 goto cleanup;
3015 }
3016
3017 rtnh = rtnh_next(rtnh, &remaining);
3018 }
3019
3020 err_nh = NULL;
3021 list_for_each_entry(nh, &rt6_nh_list, next) {
3022 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
3023 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3024 nh->rt6_info = NULL;
3025 if (err) {
3026 if (replace && nhn)
3027 ip6_print_replace_route_err(&rt6_nh_list);
3028 err_nh = nh;
3029 goto add_errout;
51ebd318 3030 }
6b9ea5a6 3031
1a72418b 3032 /* Because each route is added like a single route we remove
27596472
MK
3033 * these flags after the first nexthop: if there is a collision,
3034 * we have already failed to add the first nexthop:
3035 * fib6_add_rt2node() has rejected it; when replacing, old
3036 * nexthops have been replaced by first new, the rest should
3037 * be added to it.
1a72418b 3038 */
27596472
MK
3039 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3040 NLM_F_REPLACE);
6b9ea5a6
RP
3041 nhn++;
3042 }
3043
3044 goto cleanup;
3045
3046add_errout:
3047 /* Delete routes that were already added */
3048 list_for_each_entry(nh, &rt6_nh_list, next) {
3049 if (err_nh == nh)
3050 break;
3051 ip6_route_del(&nh->r_cfg);
3052 }
3053
3054cleanup:
3055 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3056 if (nh->rt6_info)
3057 dst_free(&nh->rt6_info->dst);
52fe51f8 3058 kfree(nh->mxc.mx);
6b9ea5a6
RP
3059 list_del(&nh->next);
3060 kfree(nh);
3061 }
3062
3063 return err;
3064}
3065
3066static int ip6_route_multipath_del(struct fib6_config *cfg)
3067{
3068 struct fib6_config r_cfg;
3069 struct rtnexthop *rtnh;
3070 int remaining;
3071 int attrlen;
3072 int err = 1, last_err = 0;
3073
3074 remaining = cfg->fc_mp_len;
3075 rtnh = (struct rtnexthop *)cfg->fc_mp;
3076
3077 /* Parse a Multipath Entry */
3078 while (rtnh_ok(rtnh, remaining)) {
3079 memcpy(&r_cfg, cfg, sizeof(*cfg));
3080 if (rtnh->rtnh_ifindex)
3081 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3082
3083 attrlen = rtnh_attrlen(rtnh);
3084 if (attrlen > 0) {
3085 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3086
3087 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3088 if (nla) {
3089 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3090 r_cfg.fc_flags |= RTF_GATEWAY;
3091 }
3092 }
3093 err = ip6_route_del(&r_cfg);
3094 if (err)
3095 last_err = err;
3096
51ebd318
ND
3097 rtnh = rtnh_next(rtnh, &remaining);
3098 }
3099
3100 return last_err;
3101}
3102
67ba4152 3103static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3104{
86872cb5
TG
3105 struct fib6_config cfg;
3106 int err;
1da177e4 3107
86872cb5
TG
3108 err = rtm_to_fib6_config(skb, nlh, &cfg);
3109 if (err < 0)
3110 return err;
3111
51ebd318 3112 if (cfg.fc_mp)
6b9ea5a6 3113 return ip6_route_multipath_del(&cfg);
51ebd318
ND
3114 else
3115 return ip6_route_del(&cfg);
1da177e4
LT
3116}
3117
67ba4152 3118static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3119{
86872cb5
TG
3120 struct fib6_config cfg;
3121 int err;
1da177e4 3122
86872cb5
TG
3123 err = rtm_to_fib6_config(skb, nlh, &cfg);
3124 if (err < 0)
3125 return err;
3126
51ebd318 3127 if (cfg.fc_mp)
6b9ea5a6 3128 return ip6_route_multipath_add(&cfg);
51ebd318
ND
3129 else
3130 return ip6_route_add(&cfg);
1da177e4
LT
3131}
3132
19e42e45 3133static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f
TG
3134{
3135 return NLMSG_ALIGN(sizeof(struct rtmsg))
3136 + nla_total_size(16) /* RTA_SRC */
3137 + nla_total_size(16) /* RTA_DST */
3138 + nla_total_size(16) /* RTA_GATEWAY */
3139 + nla_total_size(16) /* RTA_PREFSRC */
3140 + nla_total_size(4) /* RTA_TABLE */
3141 + nla_total_size(4) /* RTA_IIF */
3142 + nla_total_size(4) /* RTA_OIF */
3143 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3144 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3145 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3146 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3147 + nla_total_size(1) /* RTA_PREF */
61adedf3 3148 + lwtunnel_get_encap_size(rt->dst.lwtstate);
339bf98f
TG
3149}
3150
191cd582
BH
3151static int rt6_fill_node(struct net *net,
3152 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 3153 struct in6_addr *dst, struct in6_addr *src,
15e47304 3154 int iif, int type, u32 portid, u32 seq,
7bc570c8 3155 int prefix, int nowait, unsigned int flags)
1da177e4 3156{
4b32b5ad 3157 u32 metrics[RTAX_MAX];
1da177e4 3158 struct rtmsg *rtm;
2d7202bf 3159 struct nlmsghdr *nlh;
e3703b3d 3160 long expires;
9e762a4a 3161 u32 table;
1da177e4
LT
3162
3163 if (prefix) { /* user wants prefix routes only */
3164 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3165 /* success since this is not a prefix route */
3166 return 1;
3167 }
3168 }
3169
15e47304 3170 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 3171 if (!nlh)
26932566 3172 return -EMSGSIZE;
2d7202bf
TG
3173
3174 rtm = nlmsg_data(nlh);
1da177e4
LT
3175 rtm->rtm_family = AF_INET6;
3176 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3177 rtm->rtm_src_len = rt->rt6i_src.plen;
3178 rtm->rtm_tos = 0;
c71099ac 3179 if (rt->rt6i_table)
9e762a4a 3180 table = rt->rt6i_table->tb6_id;
c71099ac 3181 else
9e762a4a
PM
3182 table = RT6_TABLE_UNSPEC;
3183 rtm->rtm_table = table;
c78679e8
DM
3184 if (nla_put_u32(skb, RTA_TABLE, table))
3185 goto nla_put_failure;
ef2c7d7b
ND
3186 if (rt->rt6i_flags & RTF_REJECT) {
3187 switch (rt->dst.error) {
3188 case -EINVAL:
3189 rtm->rtm_type = RTN_BLACKHOLE;
3190 break;
3191 case -EACCES:
3192 rtm->rtm_type = RTN_PROHIBIT;
3193 break;
b4949ab2
ND
3194 case -EAGAIN:
3195 rtm->rtm_type = RTN_THROW;
3196 break;
ef2c7d7b
ND
3197 default:
3198 rtm->rtm_type = RTN_UNREACHABLE;
3199 break;
3200 }
3201 }
38308473 3202 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 3203 rtm->rtm_type = RTN_LOCAL;
d1918542 3204 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
3205 rtm->rtm_type = RTN_LOCAL;
3206 else
3207 rtm->rtm_type = RTN_UNICAST;
3208 rtm->rtm_flags = 0;
35103d11 3209 if (!netif_carrier_ok(rt->dst.dev)) {
cea45e20 3210 rtm->rtm_flags |= RTNH_F_LINKDOWN;
35103d11
AG
3211 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3212 rtm->rtm_flags |= RTNH_F_DEAD;
3213 }
1da177e4
LT
3214 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3215 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 3216 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 3217 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
3218 else if (rt->rt6i_flags & RTF_ADDRCONF) {
3219 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3220 rtm->rtm_protocol = RTPROT_RA;
3221 else
3222 rtm->rtm_protocol = RTPROT_KERNEL;
3223 }
1da177e4 3224
38308473 3225 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
3226 rtm->rtm_flags |= RTM_F_CLONED;
3227
3228 if (dst) {
930345ea 3229 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 3230 goto nla_put_failure;
1ab1457c 3231 rtm->rtm_dst_len = 128;
1da177e4 3232 } else if (rtm->rtm_dst_len)
930345ea 3233 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 3234 goto nla_put_failure;
1da177e4
LT
3235#ifdef CONFIG_IPV6_SUBTREES
3236 if (src) {
930345ea 3237 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 3238 goto nla_put_failure;
1ab1457c 3239 rtm->rtm_src_len = 128;
c78679e8 3240 } else if (rtm->rtm_src_len &&
930345ea 3241 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 3242 goto nla_put_failure;
1da177e4 3243#endif
7bc570c8
YH
3244 if (iif) {
3245#ifdef CONFIG_IPV6_MROUTE
3246 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2cf75070
NA
3247 int err = ip6mr_get_route(net, skb, rtm, nowait,
3248 portid);
3249
7bc570c8
YH
3250 if (err <= 0) {
3251 if (!nowait) {
3252 if (err == 0)
3253 return 0;
3254 goto nla_put_failure;
3255 } else {
3256 if (err == -EMSGSIZE)
3257 goto nla_put_failure;
3258 }
3259 }
3260 } else
3261#endif
c78679e8
DM
3262 if (nla_put_u32(skb, RTA_IIF, iif))
3263 goto nla_put_failure;
7bc570c8 3264 } else if (dst) {
1da177e4 3265 struct in6_addr saddr_buf;
c78679e8 3266 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 3267 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3268 goto nla_put_failure;
1da177e4 3269 }
2d7202bf 3270
c3968a85
DW
3271 if (rt->rt6i_prefsrc.plen) {
3272 struct in6_addr saddr_buf;
4e3fd7a0 3273 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 3274 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3275 goto nla_put_failure;
c3968a85
DW
3276 }
3277
4b32b5ad
MKL
3278 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3279 if (rt->rt6i_pmtu)
3280 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3281 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
3282 goto nla_put_failure;
3283
dd0cbf29 3284 if (rt->rt6i_flags & RTF_GATEWAY) {
930345ea 3285 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
94f826b8 3286 goto nla_put_failure;
94f826b8 3287 }
2d7202bf 3288
c78679e8
DM
3289 if (rt->dst.dev &&
3290 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3291 goto nla_put_failure;
3292 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3293 goto nla_put_failure;
8253947e
LW
3294
3295 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 3296
87a50699 3297 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 3298 goto nla_put_failure;
2d7202bf 3299
c78ba6d6
LR
3300 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3301 goto nla_put_failure;
3302
61adedf3 3303 lwtunnel_fill_encap(skb, rt->dst.lwtstate);
19e42e45 3304
053c095a
JB
3305 nlmsg_end(skb, nlh);
3306 return 0;
2d7202bf
TG
3307
3308nla_put_failure:
26932566
PM
3309 nlmsg_cancel(skb, nlh);
3310 return -EMSGSIZE;
1da177e4
LT
3311}
3312
1b43af54 3313int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
3314{
3315 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3316 int prefix;
3317
2d7202bf
TG
3318 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3319 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
3320 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3321 } else
3322 prefix = 0;
3323
191cd582
BH
3324 return rt6_fill_node(arg->net,
3325 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 3326 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 3327 prefix, 0, NLM_F_MULTI);
1da177e4
LT
3328}
3329
67ba4152 3330static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 3331{
3b1e0a65 3332 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
3333 struct nlattr *tb[RTA_MAX+1];
3334 struct rt6_info *rt;
1da177e4 3335 struct sk_buff *skb;
ab364a6f 3336 struct rtmsg *rtm;
4c9483b2 3337 struct flowi6 fl6;
72331bc0 3338 int err, iif = 0, oif = 0;
1da177e4 3339
ab364a6f
TG
3340 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3341 if (err < 0)
3342 goto errout;
1da177e4 3343
ab364a6f 3344 err = -EINVAL;
4c9483b2 3345 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
3346 rtm = nlmsg_data(nlh);
3347 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
1da177e4 3348
ab364a6f
TG
3349 if (tb[RTA_SRC]) {
3350 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3351 goto errout;
3352
4e3fd7a0 3353 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
3354 }
3355
3356 if (tb[RTA_DST]) {
3357 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3358 goto errout;
3359
4e3fd7a0 3360 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
3361 }
3362
3363 if (tb[RTA_IIF])
3364 iif = nla_get_u32(tb[RTA_IIF]);
3365
3366 if (tb[RTA_OIF])
72331bc0 3367 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 3368
2e47b291
LC
3369 if (tb[RTA_MARK])
3370 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3371
1da177e4
LT
3372 if (iif) {
3373 struct net_device *dev;
72331bc0
SL
3374 int flags = 0;
3375
5578689a 3376 dev = __dev_get_by_index(net, iif);
1da177e4
LT
3377 if (!dev) {
3378 err = -ENODEV;
ab364a6f 3379 goto errout;
1da177e4 3380 }
72331bc0
SL
3381
3382 fl6.flowi6_iif = iif;
3383
3384 if (!ipv6_addr_any(&fl6.saddr))
3385 flags |= RT6_LOOKUP_F_HAS_SADDR;
3386
3387 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3388 flags);
3389 } else {
3390 fl6.flowi6_oif = oif;
3391
3392 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
3393 }
3394
ab364a6f 3395 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 3396 if (!skb) {
94e187c0 3397 ip6_rt_put(rt);
ab364a6f
TG
3398 err = -ENOBUFS;
3399 goto errout;
3400 }
1da177e4 3401
ab364a6f
TG
3402 /* Reserve room for dummy headers, this skb can pass
3403 through good chunk of routing engine.
3404 */
459a98ed 3405 skb_reset_mac_header(skb);
ab364a6f 3406 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 3407
d8d1f30b 3408 skb_dst_set(skb, &rt->dst);
1da177e4 3409
4c9483b2 3410 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 3411 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 3412 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 3413 if (err < 0) {
ab364a6f
TG
3414 kfree_skb(skb);
3415 goto errout;
1da177e4
LT
3416 }
3417
15e47304 3418 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 3419errout:
1da177e4 3420 return err;
1da177e4
LT
3421}
3422
37a1d361
RP
3423void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3424 unsigned int nlm_flags)
1da177e4
LT
3425{
3426 struct sk_buff *skb;
5578689a 3427 struct net *net = info->nl_net;
528c4ceb
DL
3428 u32 seq;
3429 int err;
3430
3431 err = -ENOBUFS;
38308473 3432 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 3433
19e42e45 3434 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 3435 if (!skb)
21713ebc
TG
3436 goto errout;
3437
191cd582 3438 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
37a1d361 3439 event, info->portid, seq, 0, 0, nlm_flags);
26932566
PM
3440 if (err < 0) {
3441 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3442 WARN_ON(err == -EMSGSIZE);
3443 kfree_skb(skb);
3444 goto errout;
3445 }
15e47304 3446 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
3447 info->nlh, gfp_any());
3448 return;
21713ebc
TG
3449errout:
3450 if (err < 0)
5578689a 3451 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
3452}
3453
8ed67789 3454static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 3455 unsigned long event, void *ptr)
8ed67789 3456{
351638e7 3457 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 3458 struct net *net = dev_net(dev);
8ed67789
DL
3459
3460 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 3461 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
3462 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3463#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3464 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 3465 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 3466 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
3467 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3468#endif
3469 }
3470
3471 return NOTIFY_OK;
3472}
3473
1da177e4
LT
3474/*
3475 * /proc
3476 */
3477
3478#ifdef CONFIG_PROC_FS
3479
33120b30
AD
3480static const struct file_operations ipv6_route_proc_fops = {
3481 .owner = THIS_MODULE,
3482 .open = ipv6_route_open,
3483 .read = seq_read,
3484 .llseek = seq_lseek,
8d2ca1d7 3485 .release = seq_release_net,
33120b30
AD
3486};
3487
1da177e4
LT
3488static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3489{
69ddb805 3490 struct net *net = (struct net *)seq->private;
1da177e4 3491 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
3492 net->ipv6.rt6_stats->fib_nodes,
3493 net->ipv6.rt6_stats->fib_route_nodes,
3494 net->ipv6.rt6_stats->fib_rt_alloc,
3495 net->ipv6.rt6_stats->fib_rt_entries,
3496 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 3497 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 3498 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
3499
3500 return 0;
3501}
3502
3503static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3504{
de05c557 3505 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
3506}
3507
9a32144e 3508static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
3509 .owner = THIS_MODULE,
3510 .open = rt6_stats_seq_open,
3511 .read = seq_read,
3512 .llseek = seq_lseek,
b6fcbdb4 3513 .release = single_release_net,
1da177e4
LT
3514};
3515#endif /* CONFIG_PROC_FS */
3516
3517#ifdef CONFIG_SYSCTL
3518
1da177e4 3519static
fe2c6338 3520int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
3521 void __user *buffer, size_t *lenp, loff_t *ppos)
3522{
c486da34
LAG
3523 struct net *net;
3524 int delay;
3525 if (!write)
1da177e4 3526 return -EINVAL;
c486da34
LAG
3527
3528 net = (struct net *)ctl->extra1;
3529 delay = net->ipv6.sysctl.flush_delay;
3530 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3531 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3532 return 0;
1da177e4
LT
3533}
3534
fe2c6338 3535struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3536 {
1da177e4 3537 .procname = "flush",
4990509f 3538 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3539 .maxlen = sizeof(int),
89c8b3a1 3540 .mode = 0200,
6d9f239a 3541 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3542 },
3543 {
1da177e4 3544 .procname = "gc_thresh",
9a7ec3a9 3545 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3546 .maxlen = sizeof(int),
3547 .mode = 0644,
6d9f239a 3548 .proc_handler = proc_dointvec,
1da177e4
LT
3549 },
3550 {
1da177e4 3551 .procname = "max_size",
4990509f 3552 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3553 .maxlen = sizeof(int),
3554 .mode = 0644,
6d9f239a 3555 .proc_handler = proc_dointvec,
1da177e4
LT
3556 },
3557 {
1da177e4 3558 .procname = "gc_min_interval",
4990509f 3559 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3560 .maxlen = sizeof(int),
3561 .mode = 0644,
6d9f239a 3562 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3563 },
3564 {
1da177e4 3565 .procname = "gc_timeout",
4990509f 3566 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3567 .maxlen = sizeof(int),
3568 .mode = 0644,
6d9f239a 3569 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3570 },
3571 {
1da177e4 3572 .procname = "gc_interval",
4990509f 3573 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3574 .maxlen = sizeof(int),
3575 .mode = 0644,
6d9f239a 3576 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3577 },
3578 {
1da177e4 3579 .procname = "gc_elasticity",
4990509f 3580 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3581 .maxlen = sizeof(int),
3582 .mode = 0644,
f3d3f616 3583 .proc_handler = proc_dointvec,
1da177e4
LT
3584 },
3585 {
1da177e4 3586 .procname = "mtu_expires",
4990509f 3587 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3588 .maxlen = sizeof(int),
3589 .mode = 0644,
6d9f239a 3590 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3591 },
3592 {
1da177e4 3593 .procname = "min_adv_mss",
4990509f 3594 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3595 .maxlen = sizeof(int),
3596 .mode = 0644,
f3d3f616 3597 .proc_handler = proc_dointvec,
1da177e4
LT
3598 },
3599 {
1da177e4 3600 .procname = "gc_min_interval_ms",
4990509f 3601 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3602 .maxlen = sizeof(int),
3603 .mode = 0644,
6d9f239a 3604 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3605 },
f8572d8f 3606 { }
1da177e4
LT
3607};
3608
2c8c1e72 3609struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3610{
3611 struct ctl_table *table;
3612
3613 table = kmemdup(ipv6_route_table_template,
3614 sizeof(ipv6_route_table_template),
3615 GFP_KERNEL);
5ee09105
YH
3616
3617 if (table) {
3618 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3619 table[0].extra1 = net;
86393e52 3620 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3621 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3622 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3623 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3624 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3625 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3626 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3627 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3628 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3629
3630 /* Don't export sysctls to unprivileged users */
3631 if (net->user_ns != &init_user_ns)
3632 table[0].procname = NULL;
5ee09105
YH
3633 }
3634
760f2d01
DL
3635 return table;
3636}
1da177e4
LT
3637#endif
3638
2c8c1e72 3639static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3640{
633d424b 3641 int ret = -ENOMEM;
8ed67789 3642
86393e52
AD
3643 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3644 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3645
fc66f95c
ED
3646 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3647 goto out_ip6_dst_ops;
3648
8ed67789
DL
3649 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3650 sizeof(*net->ipv6.ip6_null_entry),
3651 GFP_KERNEL);
3652 if (!net->ipv6.ip6_null_entry)
fc66f95c 3653 goto out_ip6_dst_entries;
d8d1f30b 3654 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3655 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3656 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3657 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3658 ip6_template_metrics, true);
8ed67789
DL
3659
3660#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3661 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3662 sizeof(*net->ipv6.ip6_prohibit_entry),
3663 GFP_KERNEL);
68fffc67
PZ
3664 if (!net->ipv6.ip6_prohibit_entry)
3665 goto out_ip6_null_entry;
d8d1f30b 3666 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3667 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3668 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3669 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3670 ip6_template_metrics, true);
8ed67789
DL
3671
3672 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3673 sizeof(*net->ipv6.ip6_blk_hole_entry),
3674 GFP_KERNEL);
68fffc67
PZ
3675 if (!net->ipv6.ip6_blk_hole_entry)
3676 goto out_ip6_prohibit_entry;
d8d1f30b 3677 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3678 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3679 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3680 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3681 ip6_template_metrics, true);
8ed67789
DL
3682#endif
3683
b339a47c
PZ
3684 net->ipv6.sysctl.flush_delay = 0;
3685 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3686 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3687 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3688 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3689 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3690 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3691 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3692
6891a346
BT
3693 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3694
8ed67789
DL
3695 ret = 0;
3696out:
3697 return ret;
f2fc6a54 3698
68fffc67
PZ
3699#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3700out_ip6_prohibit_entry:
3701 kfree(net->ipv6.ip6_prohibit_entry);
3702out_ip6_null_entry:
3703 kfree(net->ipv6.ip6_null_entry);
3704#endif
fc66f95c
ED
3705out_ip6_dst_entries:
3706 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3707out_ip6_dst_ops:
f2fc6a54 3708 goto out;
cdb18761
DL
3709}
3710
2c8c1e72 3711static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3712{
8ed67789
DL
3713 kfree(net->ipv6.ip6_null_entry);
3714#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3715 kfree(net->ipv6.ip6_prohibit_entry);
3716 kfree(net->ipv6.ip6_blk_hole_entry);
3717#endif
41bb78b4 3718 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3719}
3720
d189634e
TG
3721static int __net_init ip6_route_net_init_late(struct net *net)
3722{
3723#ifdef CONFIG_PROC_FS
d4beaa66
G
3724 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3725 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
3726#endif
3727 return 0;
3728}
3729
3730static void __net_exit ip6_route_net_exit_late(struct net *net)
3731{
3732#ifdef CONFIG_PROC_FS
ece31ffd
G
3733 remove_proc_entry("ipv6_route", net->proc_net);
3734 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
3735#endif
3736}
3737
cdb18761
DL
3738static struct pernet_operations ip6_route_net_ops = {
3739 .init = ip6_route_net_init,
3740 .exit = ip6_route_net_exit,
3741};
3742
c3426b47
DM
3743static int __net_init ipv6_inetpeer_init(struct net *net)
3744{
3745 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3746
3747 if (!bp)
3748 return -ENOMEM;
3749 inet_peer_base_init(bp);
3750 net->ipv6.peers = bp;
3751 return 0;
3752}
3753
3754static void __net_exit ipv6_inetpeer_exit(struct net *net)
3755{
3756 struct inet_peer_base *bp = net->ipv6.peers;
3757
3758 net->ipv6.peers = NULL;
56a6b248 3759 inetpeer_invalidate_tree(bp);
c3426b47
DM
3760 kfree(bp);
3761}
3762
2b823f72 3763static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3764 .init = ipv6_inetpeer_init,
3765 .exit = ipv6_inetpeer_exit,
3766};
3767
d189634e
TG
3768static struct pernet_operations ip6_route_net_late_ops = {
3769 .init = ip6_route_net_init_late,
3770 .exit = ip6_route_net_exit_late,
3771};
3772
8ed67789
DL
3773static struct notifier_block ip6_route_dev_notifier = {
3774 .notifier_call = ip6_route_dev_notify,
3775 .priority = 0,
3776};
3777
433d49c3 3778int __init ip6_route_init(void)
1da177e4 3779{
433d49c3 3780 int ret;
8d0b94af 3781 int cpu;
433d49c3 3782
9a7ec3a9
DL
3783 ret = -ENOMEM;
3784 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3785 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3786 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3787 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3788 goto out;
14e50e57 3789
fc66f95c 3790 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3791 if (ret)
bdb3289f 3792 goto out_kmem_cache;
bdb3289f 3793
c3426b47
DM
3794 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3795 if (ret)
e8803b6c 3796 goto out_dst_entries;
2a0c451a 3797
7e52b33b
DM
3798 ret = register_pernet_subsys(&ip6_route_net_ops);
3799 if (ret)
3800 goto out_register_inetpeer;
c3426b47 3801
5dc121e9
AE
3802 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3803
8ed67789
DL
3804 /* Registering of the loopback is done before this portion of code,
3805 * the loopback reference in rt6_info will not be taken, do it
3806 * manually for init_net */
d8d1f30b 3807 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3808 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3809 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3810 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3811 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3812 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3813 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3814 #endif
e8803b6c 3815 ret = fib6_init();
433d49c3 3816 if (ret)
8ed67789 3817 goto out_register_subsys;
433d49c3 3818
433d49c3
DL
3819 ret = xfrm6_init();
3820 if (ret)
e8803b6c 3821 goto out_fib6_init;
c35b7e72 3822
433d49c3
DL
3823 ret = fib6_rules_init();
3824 if (ret)
3825 goto xfrm6_init;
7e5449c2 3826
d189634e
TG
3827 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3828 if (ret)
3829 goto fib6_rules_init;
3830
433d49c3 3831 ret = -ENOBUFS;
c7ac8679
GR
3832 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3833 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3834 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3835 goto out_register_late_subsys;
c127ea2c 3836
8ed67789 3837 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3838 if (ret)
d189634e 3839 goto out_register_late_subsys;
8ed67789 3840
8d0b94af
MKL
3841 for_each_possible_cpu(cpu) {
3842 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3843
3844 INIT_LIST_HEAD(&ul->head);
3845 spin_lock_init(&ul->lock);
3846 }
3847
433d49c3
DL
3848out:
3849 return ret;
3850
d189634e
TG
3851out_register_late_subsys:
3852 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3853fib6_rules_init:
433d49c3
DL
3854 fib6_rules_cleanup();
3855xfrm6_init:
433d49c3 3856 xfrm6_fini();
2a0c451a
TG
3857out_fib6_init:
3858 fib6_gc_cleanup();
8ed67789
DL
3859out_register_subsys:
3860 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3861out_register_inetpeer:
3862 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3863out_dst_entries:
3864 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3865out_kmem_cache:
f2fc6a54 3866 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3867 goto out;
1da177e4
LT
3868}
3869
3870void ip6_route_cleanup(void)
3871{
8ed67789 3872 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3873 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3874 fib6_rules_cleanup();
1da177e4 3875 xfrm6_fini();
1da177e4 3876 fib6_gc_cleanup();
c3426b47 3877 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3878 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3879 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3880 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3881}