net: ipv6: Do not add multicast route for l3 master devices
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
904af04d 57#include <net/dst_metadata.h>
1da177e4 58#include <net/xfrm.h>
8d71740c 59#include <net/netevent.h>
21713ebc 60#include <net/netlink.h>
51ebd318 61#include <net/nexthop.h>
19e42e45 62#include <net/lwtunnel.h>
904af04d 63#include <net/ip_tunnels.h>
ca254490 64#include <net/l3mdev.h>
b811580d 65#include <trace/events/fib6.h>
1da177e4
LT
66
67#include <asm/uaccess.h>
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
afc154e9 73enum rt6_nud_state {
7e980569
JB
74 RT6_NUD_FAIL_HARD = -3,
75 RT6_NUD_FAIL_PROBE = -2,
76 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
77 RT6_NUD_SUCCEED = 1
78};
79
83a09abd 80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
4b32b5ad 99static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
1da177e4 101
70ceb4f5 102#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 103static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
104 const struct in6_addr *prefix, int prefixlen,
105 const struct in6_addr *gwaddr, int ifindex,
95c96174 106 unsigned int pref);
efa2cea0 107static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
108 const struct in6_addr *prefix, int prefixlen,
109 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
110#endif
111
8d0b94af
MKL
112struct uncached_list {
113 spinlock_t lock;
114 struct list_head head;
115};
116
117static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
118
119static void rt6_uncached_list_add(struct rt6_info *rt)
120{
121 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
122
123 rt->dst.flags |= DST_NOCACHE;
124 rt->rt6i_uncached_list = ul;
125
126 spin_lock_bh(&ul->lock);
127 list_add_tail(&rt->rt6i_uncached, &ul->head);
128 spin_unlock_bh(&ul->lock);
129}
130
131static void rt6_uncached_list_del(struct rt6_info *rt)
132{
133 if (!list_empty(&rt->rt6i_uncached)) {
134 struct uncached_list *ul = rt->rt6i_uncached_list;
135
136 spin_lock_bh(&ul->lock);
137 list_del(&rt->rt6i_uncached);
138 spin_unlock_bh(&ul->lock);
139 }
140}
141
142static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
143{
144 struct net_device *loopback_dev = net->loopback_dev;
145 int cpu;
146
e332bc67
EB
147 if (dev == loopback_dev)
148 return;
149
8d0b94af
MKL
150 for_each_possible_cpu(cpu) {
151 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
152 struct rt6_info *rt;
153
154 spin_lock_bh(&ul->lock);
155 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
156 struct inet6_dev *rt_idev = rt->rt6i_idev;
157 struct net_device *rt_dev = rt->dst.dev;
158
e332bc67 159 if (rt_idev->dev == dev) {
8d0b94af
MKL
160 rt->rt6i_idev = in6_dev_get(loopback_dev);
161 in6_dev_put(rt_idev);
162 }
163
e332bc67 164 if (rt_dev == dev) {
8d0b94af
MKL
165 rt->dst.dev = loopback_dev;
166 dev_hold(rt->dst.dev);
167 dev_put(rt_dev);
168 }
169 }
170 spin_unlock_bh(&ul->lock);
171 }
172}
173
d52d3997
MKL
174static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
175{
176 return dst_metrics_write_ptr(rt->dst.from);
177}
178
06582540
DM
179static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
180{
4b32b5ad 181 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 182
d52d3997
MKL
183 if (rt->rt6i_flags & RTF_PCPU)
184 return rt6_pcpu_cow_metrics(rt);
185 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
186 return NULL;
187 else
3b471175 188 return dst_cow_metrics_generic(dst, old);
06582540
DM
189}
190
f894cbf8
DM
191static inline const void *choose_neigh_daddr(struct rt6_info *rt,
192 struct sk_buff *skb,
193 const void *daddr)
39232973
DM
194{
195 struct in6_addr *p = &rt->rt6i_gateway;
196
a7563f34 197 if (!ipv6_addr_any(p))
39232973 198 return (const void *) p;
f894cbf8
DM
199 else if (skb)
200 return &ipv6_hdr(skb)->daddr;
39232973
DM
201 return daddr;
202}
203
f894cbf8
DM
204static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
205 struct sk_buff *skb,
206 const void *daddr)
d3aaeb38 207{
39232973
DM
208 struct rt6_info *rt = (struct rt6_info *) dst;
209 struct neighbour *n;
210
f894cbf8 211 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 212 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
213 if (n)
214 return n;
215 return neigh_create(&nd_tbl, daddr, dst->dev);
216}
217
9a7ec3a9 218static struct dst_ops ip6_dst_ops_template = {
1da177e4 219 .family = AF_INET6,
1da177e4
LT
220 .gc = ip6_dst_gc,
221 .gc_thresh = 1024,
222 .check = ip6_dst_check,
0dbaee3b 223 .default_advmss = ip6_default_advmss,
ebb762f2 224 .mtu = ip6_mtu,
06582540 225 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
226 .destroy = ip6_dst_destroy,
227 .ifdown = ip6_dst_ifdown,
228 .negative_advice = ip6_negative_advice,
229 .link_failure = ip6_link_failure,
230 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 231 .redirect = rt6_do_redirect,
9f8955cc 232 .local_out = __ip6_local_out,
d3aaeb38 233 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
234};
235
ebb762f2 236static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 237{
618f9bc7
SK
238 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
239
240 return mtu ? : dst->dev->mtu;
ec831ea7
RD
241}
242
6700c270
DM
243static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
244 struct sk_buff *skb, u32 mtu)
14e50e57
DM
245{
246}
247
6700c270
DM
248static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
249 struct sk_buff *skb)
b587ee3b
DM
250{
251}
252
14e50e57
DM
253static struct dst_ops ip6_dst_blackhole_ops = {
254 .family = AF_INET6,
14e50e57
DM
255 .destroy = ip6_dst_destroy,
256 .check = ip6_dst_check,
ebb762f2 257 .mtu = ip6_blackhole_mtu,
214f45c9 258 .default_advmss = ip6_default_advmss,
14e50e57 259 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 260 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 261 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 262 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
263};
264
62fa8a84 265static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 266 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
267};
268
fb0af4c7 269static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
270 .dst = {
271 .__refcnt = ATOMIC_INIT(1),
272 .__use = 1,
2c20cbd7 273 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 274 .error = -ENETUNREACH,
d8d1f30b
CG
275 .input = ip6_pkt_discard,
276 .output = ip6_pkt_discard_out,
1da177e4
LT
277 },
278 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 279 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
280 .rt6i_metric = ~(u32) 0,
281 .rt6i_ref = ATOMIC_INIT(1),
282};
283
101367c2
TG
284#ifdef CONFIG_IPV6_MULTIPLE_TABLES
285
fb0af4c7 286static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
287 .dst = {
288 .__refcnt = ATOMIC_INIT(1),
289 .__use = 1,
2c20cbd7 290 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 291 .error = -EACCES,
d8d1f30b
CG
292 .input = ip6_pkt_prohibit,
293 .output = ip6_pkt_prohibit_out,
101367c2
TG
294 },
295 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 296 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
297 .rt6i_metric = ~(u32) 0,
298 .rt6i_ref = ATOMIC_INIT(1),
299};
300
fb0af4c7 301static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
302 .dst = {
303 .__refcnt = ATOMIC_INIT(1),
304 .__use = 1,
2c20cbd7 305 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 306 .error = -EINVAL,
d8d1f30b 307 .input = dst_discard,
ede2059d 308 .output = dst_discard_out,
101367c2
TG
309 },
310 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 311 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
312 .rt6i_metric = ~(u32) 0,
313 .rt6i_ref = ATOMIC_INIT(1),
314};
315
316#endif
317
ebfa45f0
MKL
318static void rt6_info_init(struct rt6_info *rt)
319{
320 struct dst_entry *dst = &rt->dst;
321
322 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
323 INIT_LIST_HEAD(&rt->rt6i_siblings);
324 INIT_LIST_HEAD(&rt->rt6i_uncached);
325}
326
1da177e4 327/* allocate dst with ip6_dst_ops */
d52d3997
MKL
328static struct rt6_info *__ip6_dst_alloc(struct net *net,
329 struct net_device *dev,
ad706862 330 int flags)
1da177e4 331{
97bab73f 332 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 333 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 334
ebfa45f0
MKL
335 if (rt)
336 rt6_info_init(rt);
8104891b 337
cf911662 338 return rt;
1da177e4
LT
339}
340
9ab179d8
DA
341struct rt6_info *ip6_dst_alloc(struct net *net,
342 struct net_device *dev,
343 int flags)
d52d3997 344{
ad706862 345 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
346
347 if (rt) {
348 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
349 if (rt->rt6i_pcpu) {
350 int cpu;
351
352 for_each_possible_cpu(cpu) {
353 struct rt6_info **p;
354
355 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
356 /* no one shares rt */
357 *p = NULL;
358 }
359 } else {
360 dst_destroy((struct dst_entry *)rt);
361 return NULL;
362 }
363 }
364
365 return rt;
366}
9ab179d8 367EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 368
1da177e4
LT
369static void ip6_dst_destroy(struct dst_entry *dst)
370{
371 struct rt6_info *rt = (struct rt6_info *)dst;
ecd98837 372 struct dst_entry *from = dst->from;
8d0b94af 373 struct inet6_dev *idev;
1da177e4 374
4b32b5ad 375 dst_destroy_metrics_generic(dst);
87775312 376 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
377 rt6_uncached_list_del(rt);
378
379 idev = rt->rt6i_idev;
38308473 380 if (idev) {
1da177e4
LT
381 rt->rt6i_idev = NULL;
382 in6_dev_put(idev);
1ab1457c 383 }
1716a961 384
ecd98837
YH
385 dst->from = NULL;
386 dst_release(from);
b3419363
DM
387}
388
1da177e4
LT
389static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
390 int how)
391{
392 struct rt6_info *rt = (struct rt6_info *)dst;
393 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 394 struct net_device *loopback_dev =
c346dca1 395 dev_net(dev)->loopback_dev;
1da177e4 396
97cac082
DM
397 if (dev != loopback_dev) {
398 if (idev && idev->dev == dev) {
399 struct inet6_dev *loopback_idev =
400 in6_dev_get(loopback_dev);
401 if (loopback_idev) {
402 rt->rt6i_idev = loopback_idev;
403 in6_dev_put(idev);
404 }
405 }
1da177e4
LT
406 }
407}
408
5973fb1e
MKL
409static bool __rt6_check_expired(const struct rt6_info *rt)
410{
411 if (rt->rt6i_flags & RTF_EXPIRES)
412 return time_after(jiffies, rt->dst.expires);
413 else
414 return false;
415}
416
a50feda5 417static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 418{
1716a961
G
419 if (rt->rt6i_flags & RTF_EXPIRES) {
420 if (time_after(jiffies, rt->dst.expires))
a50feda5 421 return true;
1716a961 422 } else if (rt->dst.from) {
3fd91fb3 423 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 424 }
a50feda5 425 return false;
1da177e4
LT
426}
427
51ebd318
ND
428/* Multipath route selection:
429 * Hash based function using packet header and flowlabel.
430 * Adapted from fib_info_hashfn()
431 */
432static int rt6_info_hash_nhsfn(unsigned int candidate_count,
433 const struct flowi6 *fl6)
434{
644d0e65 435 return get_hash_from_flowi6(fl6) % candidate_count;
51ebd318
ND
436}
437
438static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
439 struct flowi6 *fl6, int oif,
440 int strict)
51ebd318
ND
441{
442 struct rt6_info *sibling, *next_sibling;
443 int route_choosen;
444
445 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
446 /* Don't change the route, if route_choosen == 0
447 * (siblings does not include ourself)
448 */
449 if (route_choosen)
450 list_for_each_entry_safe(sibling, next_sibling,
451 &match->rt6i_siblings, rt6i_siblings) {
452 route_choosen--;
453 if (route_choosen == 0) {
52bd4c0c
ND
454 if (rt6_score_route(sibling, oif, strict) < 0)
455 break;
51ebd318
ND
456 match = sibling;
457 break;
458 }
459 }
460 return match;
461}
462
1da177e4 463/*
c71099ac 464 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
465 */
466
8ed67789
DL
467static inline struct rt6_info *rt6_device_match(struct net *net,
468 struct rt6_info *rt,
b71d1d42 469 const struct in6_addr *saddr,
1da177e4 470 int oif,
d420895e 471 int flags)
1da177e4
LT
472{
473 struct rt6_info *local = NULL;
474 struct rt6_info *sprt;
475
dd3abc4e
YH
476 if (!oif && ipv6_addr_any(saddr))
477 goto out;
478
d8d1f30b 479 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 480 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
481
482 if (oif) {
1da177e4
LT
483 if (dev->ifindex == oif)
484 return sprt;
485 if (dev->flags & IFF_LOOPBACK) {
38308473 486 if (!sprt->rt6i_idev ||
1da177e4 487 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 488 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 489 continue;
17fb0b2b
DA
490 if (local &&
491 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
492 continue;
493 }
494 local = sprt;
495 }
dd3abc4e
YH
496 } else {
497 if (ipv6_chk_addr(net, saddr, dev,
498 flags & RT6_LOOKUP_F_IFACE))
499 return sprt;
1da177e4 500 }
dd3abc4e 501 }
1da177e4 502
dd3abc4e 503 if (oif) {
1da177e4
LT
504 if (local)
505 return local;
506
d420895e 507 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 508 return net->ipv6.ip6_null_entry;
1da177e4 509 }
dd3abc4e 510out:
1da177e4
LT
511 return rt;
512}
513
27097255 514#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
515struct __rt6_probe_work {
516 struct work_struct work;
517 struct in6_addr target;
518 struct net_device *dev;
519};
520
521static void rt6_probe_deferred(struct work_struct *w)
522{
523 struct in6_addr mcaddr;
524 struct __rt6_probe_work *work =
525 container_of(w, struct __rt6_probe_work, work);
526
527 addrconf_addr_solict_mult(&work->target, &mcaddr);
304d888b 528 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
c2f17e82 529 dev_put(work->dev);
662f5533 530 kfree(work);
c2f17e82
HFS
531}
532
27097255
YH
533static void rt6_probe(struct rt6_info *rt)
534{
990edb42 535 struct __rt6_probe_work *work;
f2c31e32 536 struct neighbour *neigh;
27097255
YH
537 /*
538 * Okay, this does not seem to be appropriate
539 * for now, however, we need to check if it
540 * is really so; aka Router Reachability Probing.
541 *
542 * Router Reachability Probe MUST be rate-limited
543 * to no more than one per minute.
544 */
2152caea 545 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 546 return;
2152caea
YH
547 rcu_read_lock_bh();
548 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
549 if (neigh) {
8d6c31bf
MKL
550 if (neigh->nud_state & NUD_VALID)
551 goto out;
552
990edb42 553 work = NULL;
2152caea 554 write_lock(&neigh->lock);
990edb42
MKL
555 if (!(neigh->nud_state & NUD_VALID) &&
556 time_after(jiffies,
557 neigh->updated +
558 rt->rt6i_idev->cnf.rtr_probe_interval)) {
559 work = kmalloc(sizeof(*work), GFP_ATOMIC);
560 if (work)
561 __neigh_set_probe_once(neigh);
c2f17e82 562 }
2152caea 563 write_unlock(&neigh->lock);
990edb42
MKL
564 } else {
565 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 566 }
990edb42
MKL
567
568 if (work) {
569 INIT_WORK(&work->work, rt6_probe_deferred);
570 work->target = rt->rt6i_gateway;
571 dev_hold(rt->dst.dev);
572 work->dev = rt->dst.dev;
573 schedule_work(&work->work);
574 }
575
8d6c31bf 576out:
2152caea 577 rcu_read_unlock_bh();
27097255
YH
578}
579#else
580static inline void rt6_probe(struct rt6_info *rt)
581{
27097255
YH
582}
583#endif
584
1da177e4 585/*
554cfb7e 586 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 587 */
b6f99a21 588static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 589{
d1918542 590 struct net_device *dev = rt->dst.dev;
161980f4 591 if (!oif || dev->ifindex == oif)
554cfb7e 592 return 2;
161980f4
DM
593 if ((dev->flags & IFF_LOOPBACK) &&
594 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
595 return 1;
596 return 0;
554cfb7e 597}
1da177e4 598
afc154e9 599static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 600{
f2c31e32 601 struct neighbour *neigh;
afc154e9 602 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 603
4d0c5911
YH
604 if (rt->rt6i_flags & RTF_NONEXTHOP ||
605 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 606 return RT6_NUD_SUCCEED;
145a3621
YH
607
608 rcu_read_lock_bh();
609 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
610 if (neigh) {
611 read_lock(&neigh->lock);
554cfb7e 612 if (neigh->nud_state & NUD_VALID)
afc154e9 613 ret = RT6_NUD_SUCCEED;
398bcbeb 614#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 615 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 616 ret = RT6_NUD_SUCCEED;
7e980569
JB
617 else
618 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 619#endif
145a3621 620 read_unlock(&neigh->lock);
afc154e9
HFS
621 } else {
622 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 623 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 624 }
145a3621
YH
625 rcu_read_unlock_bh();
626
a5a81f0b 627 return ret;
1da177e4
LT
628}
629
554cfb7e
YH
630static int rt6_score_route(struct rt6_info *rt, int oif,
631 int strict)
1da177e4 632{
a5a81f0b 633 int m;
1ab1457c 634
4d0c5911 635 m = rt6_check_dev(rt, oif);
77d16f45 636 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 637 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
638#ifdef CONFIG_IPV6_ROUTER_PREF
639 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
640#endif
afc154e9
HFS
641 if (strict & RT6_LOOKUP_F_REACHABLE) {
642 int n = rt6_check_neigh(rt);
643 if (n < 0)
644 return n;
645 }
554cfb7e
YH
646 return m;
647}
648
f11e6659 649static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
650 int *mpri, struct rt6_info *match,
651 bool *do_rr)
554cfb7e 652{
f11e6659 653 int m;
afc154e9 654 bool match_do_rr = false;
35103d11
AG
655 struct inet6_dev *idev = rt->rt6i_idev;
656 struct net_device *dev = rt->dst.dev;
657
658 if (dev && !netif_carrier_ok(dev) &&
659 idev->cnf.ignore_routes_with_linkdown)
660 goto out;
f11e6659
DM
661
662 if (rt6_check_expired(rt))
663 goto out;
664
665 m = rt6_score_route(rt, oif, strict);
7e980569 666 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
667 match_do_rr = true;
668 m = 0; /* lowest valid score */
7e980569 669 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 670 goto out;
afc154e9
HFS
671 }
672
673 if (strict & RT6_LOOKUP_F_REACHABLE)
674 rt6_probe(rt);
f11e6659 675
7e980569 676 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 677 if (m > *mpri) {
afc154e9 678 *do_rr = match_do_rr;
f11e6659
DM
679 *mpri = m;
680 match = rt;
f11e6659 681 }
f11e6659
DM
682out:
683 return match;
684}
685
686static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
687 struct rt6_info *rr_head,
afc154e9
HFS
688 u32 metric, int oif, int strict,
689 bool *do_rr)
f11e6659 690{
9fbdcfaf 691 struct rt6_info *rt, *match, *cont;
554cfb7e 692 int mpri = -1;
1da177e4 693
f11e6659 694 match = NULL;
9fbdcfaf
SK
695 cont = NULL;
696 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
697 if (rt->rt6i_metric != metric) {
698 cont = rt;
699 break;
700 }
701
702 match = find_match(rt, oif, strict, &mpri, match, do_rr);
703 }
704
705 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
706 if (rt->rt6i_metric != metric) {
707 cont = rt;
708 break;
709 }
710
afc154e9 711 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
712 }
713
714 if (match || !cont)
715 return match;
716
717 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 718 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 719
f11e6659
DM
720 return match;
721}
1da177e4 722
f11e6659
DM
723static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
724{
725 struct rt6_info *match, *rt0;
8ed67789 726 struct net *net;
afc154e9 727 bool do_rr = false;
1da177e4 728
f11e6659
DM
729 rt0 = fn->rr_ptr;
730 if (!rt0)
731 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 732
afc154e9
HFS
733 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
734 &do_rr);
1da177e4 735
afc154e9 736 if (do_rr) {
d8d1f30b 737 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 738
554cfb7e 739 /* no entries matched; do round-robin */
f11e6659
DM
740 if (!next || next->rt6i_metric != rt0->rt6i_metric)
741 next = fn->leaf;
742
743 if (next != rt0)
744 fn->rr_ptr = next;
1da177e4 745 }
1da177e4 746
d1918542 747 net = dev_net(rt0->dst.dev);
a02cec21 748 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
749}
750
8b9df265
MKL
751static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
752{
753 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
754}
755
70ceb4f5
YH
756#ifdef CONFIG_IPV6_ROUTE_INFO
757int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 758 const struct in6_addr *gwaddr)
70ceb4f5 759{
c346dca1 760 struct net *net = dev_net(dev);
70ceb4f5
YH
761 struct route_info *rinfo = (struct route_info *) opt;
762 struct in6_addr prefix_buf, *prefix;
763 unsigned int pref;
4bed72e4 764 unsigned long lifetime;
70ceb4f5
YH
765 struct rt6_info *rt;
766
767 if (len < sizeof(struct route_info)) {
768 return -EINVAL;
769 }
770
771 /* Sanity check for prefix_len and length */
772 if (rinfo->length > 3) {
773 return -EINVAL;
774 } else if (rinfo->prefix_len > 128) {
775 return -EINVAL;
776 } else if (rinfo->prefix_len > 64) {
777 if (rinfo->length < 2) {
778 return -EINVAL;
779 }
780 } else if (rinfo->prefix_len > 0) {
781 if (rinfo->length < 1) {
782 return -EINVAL;
783 }
784 }
785
786 pref = rinfo->route_pref;
787 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 788 return -EINVAL;
70ceb4f5 789
4bed72e4 790 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
791
792 if (rinfo->length == 3)
793 prefix = (struct in6_addr *)rinfo->prefix;
794 else {
795 /* this function is safe */
796 ipv6_addr_prefix(&prefix_buf,
797 (struct in6_addr *)rinfo->prefix,
798 rinfo->prefix_len);
799 prefix = &prefix_buf;
800 }
801
f104a567
DJ
802 if (rinfo->prefix_len == 0)
803 rt = rt6_get_dflt_router(gwaddr, dev);
804 else
805 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
806 gwaddr, dev->ifindex);
70ceb4f5
YH
807
808 if (rt && !lifetime) {
e0a1ad73 809 ip6_del_rt(rt);
70ceb4f5
YH
810 rt = NULL;
811 }
812
813 if (!rt && lifetime)
efa2cea0 814 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
815 pref);
816 else if (rt)
817 rt->rt6i_flags = RTF_ROUTEINFO |
818 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
819
820 if (rt) {
1716a961
G
821 if (!addrconf_finite_timeout(lifetime))
822 rt6_clean_expires(rt);
823 else
824 rt6_set_expires(rt, jiffies + HZ * lifetime);
825
94e187c0 826 ip6_rt_put(rt);
70ceb4f5
YH
827 }
828 return 0;
829}
830#endif
831
a3c00e46
MKL
832static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
833 struct in6_addr *saddr)
834{
835 struct fib6_node *pn;
836 while (1) {
837 if (fn->fn_flags & RTN_TL_ROOT)
838 return NULL;
839 pn = fn->parent;
840 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
841 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
842 else
843 fn = pn;
844 if (fn->fn_flags & RTN_RTINFO)
845 return fn;
846 }
847}
c71099ac 848
8ed67789
DL
849static struct rt6_info *ip6_pol_route_lookup(struct net *net,
850 struct fib6_table *table,
4c9483b2 851 struct flowi6 *fl6, int flags)
1da177e4
LT
852{
853 struct fib6_node *fn;
854 struct rt6_info *rt;
855
c71099ac 856 read_lock_bh(&table->tb6_lock);
4c9483b2 857 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
858restart:
859 rt = fn->leaf;
4c9483b2 860 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 861 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 862 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
863 if (rt == net->ipv6.ip6_null_entry) {
864 fn = fib6_backtrack(fn, &fl6->saddr);
865 if (fn)
866 goto restart;
867 }
d8d1f30b 868 dst_use(&rt->dst, jiffies);
c71099ac 869 read_unlock_bh(&table->tb6_lock);
b811580d
DA
870
871 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
872
c71099ac
TG
873 return rt;
874
875}
876
67ba4152 877struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
878 int flags)
879{
880 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
881}
882EXPORT_SYMBOL_GPL(ip6_route_lookup);
883
9acd9f3a
YH
884struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
885 const struct in6_addr *saddr, int oif, int strict)
c71099ac 886{
4c9483b2
DM
887 struct flowi6 fl6 = {
888 .flowi6_oif = oif,
889 .daddr = *daddr,
c71099ac
TG
890 };
891 struct dst_entry *dst;
77d16f45 892 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 893
adaa70bb 894 if (saddr) {
4c9483b2 895 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
896 flags |= RT6_LOOKUP_F_HAS_SADDR;
897 }
898
4c9483b2 899 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
900 if (dst->error == 0)
901 return (struct rt6_info *) dst;
902
903 dst_release(dst);
904
1da177e4
LT
905 return NULL;
906}
7159039a
YH
907EXPORT_SYMBOL(rt6_lookup);
908
c71099ac 909/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
910 It takes new route entry, the addition fails by any reason the
911 route is freed. In any case, if caller does not hold it, it may
912 be destroyed.
913 */
914
e5fd387a 915static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
e715b6d3 916 struct mx6_config *mxc)
1da177e4
LT
917{
918 int err;
c71099ac 919 struct fib6_table *table;
1da177e4 920
c71099ac
TG
921 table = rt->rt6i_table;
922 write_lock_bh(&table->tb6_lock);
e715b6d3 923 err = fib6_add(&table->tb6_root, rt, info, mxc);
c71099ac 924 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
925
926 return err;
927}
928
40e22e8f
TG
929int ip6_ins_rt(struct rt6_info *rt)
930{
e715b6d3
FW
931 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
932 struct mx6_config mxc = { .mx = NULL, };
933
934 return __ip6_ins_rt(rt, &info, &mxc);
40e22e8f
TG
935}
936
8b9df265
MKL
937static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
938 const struct in6_addr *daddr,
939 const struct in6_addr *saddr)
1da177e4 940{
1da177e4
LT
941 struct rt6_info *rt;
942
943 /*
944 * Clone the route.
945 */
946
d52d3997 947 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 948 ort = (struct rt6_info *)ort->dst.from;
1da177e4 949
ad706862 950 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
83a09abd
MKL
951
952 if (!rt)
953 return NULL;
954
955 ip6_rt_copy_init(rt, ort);
956 rt->rt6i_flags |= RTF_CACHE;
957 rt->rt6i_metric = 0;
958 rt->dst.flags |= DST_HOST;
959 rt->rt6i_dst.addr = *daddr;
960 rt->rt6i_dst.plen = 128;
1da177e4 961
83a09abd
MKL
962 if (!rt6_is_gw_or_nonexthop(ort)) {
963 if (ort->rt6i_dst.plen != 128 &&
964 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
965 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 966#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
967 if (rt->rt6i_src.plen && saddr) {
968 rt->rt6i_src.addr = *saddr;
969 rt->rt6i_src.plen = 128;
8b9df265 970 }
83a09abd 971#endif
95a9a5ba 972 }
1da177e4 973
95a9a5ba
YH
974 return rt;
975}
1da177e4 976
d52d3997
MKL
977static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
978{
979 struct rt6_info *pcpu_rt;
980
981 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
ad706862 982 rt->dst.dev, rt->dst.flags);
d52d3997
MKL
983
984 if (!pcpu_rt)
985 return NULL;
986 ip6_rt_copy_init(pcpu_rt, rt);
987 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
988 pcpu_rt->rt6i_flags |= RTF_PCPU;
989 return pcpu_rt;
990}
991
992/* It should be called with read_lock_bh(&tb6_lock) acquired */
993static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
994{
a73e4195 995 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
996
997 p = this_cpu_ptr(rt->rt6i_pcpu);
998 pcpu_rt = *p;
999
a73e4195
MKL
1000 if (pcpu_rt) {
1001 dst_hold(&pcpu_rt->dst);
1002 rt6_dst_from_metrics_check(pcpu_rt);
1003 }
1004 return pcpu_rt;
1005}
1006
1007static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1008{
9c7370a1 1009 struct fib6_table *table = rt->rt6i_table;
a73e4195 1010 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1011
1012 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1013 if (!pcpu_rt) {
1014 struct net *net = dev_net(rt->dst.dev);
1015
9c7370a1
MKL
1016 dst_hold(&net->ipv6.ip6_null_entry->dst);
1017 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1018 }
1019
9c7370a1
MKL
1020 read_lock_bh(&table->tb6_lock);
1021 if (rt->rt6i_pcpu) {
1022 p = this_cpu_ptr(rt->rt6i_pcpu);
1023 prev = cmpxchg(p, NULL, pcpu_rt);
1024 if (prev) {
1025 /* If someone did it before us, return prev instead */
1026 dst_destroy(&pcpu_rt->dst);
1027 pcpu_rt = prev;
1028 }
1029 } else {
1030 /* rt has been removed from the fib6 tree
1031 * before we have a chance to acquire the read_lock.
1032 * In this case, don't brother to create a pcpu rt
1033 * since rt is going away anyway. The next
1034 * dst_check() will trigger a re-lookup.
1035 */
d52d3997 1036 dst_destroy(&pcpu_rt->dst);
9c7370a1 1037 pcpu_rt = rt;
d52d3997 1038 }
d52d3997
MKL
1039 dst_hold(&pcpu_rt->dst);
1040 rt6_dst_from_metrics_check(pcpu_rt);
9c7370a1 1041 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1042 return pcpu_rt;
1043}
1044
8ed67789 1045static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 1046 struct flowi6 *fl6, int flags)
1da177e4 1047{
367efcb9 1048 struct fib6_node *fn, *saved_fn;
45e4fd26 1049 struct rt6_info *rt;
c71099ac 1050 int strict = 0;
1da177e4 1051
77d16f45 1052 strict |= flags & RT6_LOOKUP_F_IFACE;
367efcb9
MKL
1053 if (net->ipv6.devconf_all->forwarding == 0)
1054 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1055
c71099ac 1056 read_lock_bh(&table->tb6_lock);
1da177e4 1057
4c9483b2 1058 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1059 saved_fn = fn;
1da177e4 1060
ca254490
DA
1061 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1062 oif = 0;
1063
a3c00e46 1064redo_rt6_select:
367efcb9 1065 rt = rt6_select(fn, oif, strict);
52bd4c0c 1066 if (rt->rt6i_nsiblings)
367efcb9 1067 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1068 if (rt == net->ipv6.ip6_null_entry) {
1069 fn = fib6_backtrack(fn, &fl6->saddr);
1070 if (fn)
1071 goto redo_rt6_select;
367efcb9
MKL
1072 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1073 /* also consider unreachable route */
1074 strict &= ~RT6_LOOKUP_F_REACHABLE;
1075 fn = saved_fn;
1076 goto redo_rt6_select;
367efcb9 1077 }
a3c00e46
MKL
1078 }
1079
fb9de91e 1080
3da59bd9 1081 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1082 dst_use(&rt->dst, jiffies);
1083 read_unlock_bh(&table->tb6_lock);
1084
1085 rt6_dst_from_metrics_check(rt);
b811580d
DA
1086
1087 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
d52d3997 1088 return rt;
3da59bd9
MKL
1089 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1090 !(rt->rt6i_flags & RTF_GATEWAY))) {
1091 /* Create a RTF_CACHE clone which will not be
1092 * owned by the fib6 tree. It is for the special case where
1093 * the daddr in the skb during the neighbor look-up is different
1094 * from the fl6->daddr used to look-up route here.
1095 */
1096
1097 struct rt6_info *uncached_rt;
1098
d52d3997
MKL
1099 dst_use(&rt->dst, jiffies);
1100 read_unlock_bh(&table->tb6_lock);
1101
3da59bd9
MKL
1102 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1103 dst_release(&rt->dst);
c71099ac 1104
3da59bd9 1105 if (uncached_rt)
8d0b94af 1106 rt6_uncached_list_add(uncached_rt);
3da59bd9
MKL
1107 else
1108 uncached_rt = net->ipv6.ip6_null_entry;
d52d3997 1109
3da59bd9 1110 dst_hold(&uncached_rt->dst);
b811580d
DA
1111
1112 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
3da59bd9 1113 return uncached_rt;
3da59bd9 1114
d52d3997
MKL
1115 } else {
1116 /* Get a percpu copy */
1117
1118 struct rt6_info *pcpu_rt;
1119
1120 rt->dst.lastuse = jiffies;
1121 rt->dst.__use++;
1122 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1123
9c7370a1
MKL
1124 if (pcpu_rt) {
1125 read_unlock_bh(&table->tb6_lock);
1126 } else {
1127 /* We have to do the read_unlock first
1128 * because rt6_make_pcpu_route() may trigger
1129 * ip6_dst_gc() which will take the write_lock.
1130 */
1131 dst_hold(&rt->dst);
1132 read_unlock_bh(&table->tb6_lock);
a73e4195 1133 pcpu_rt = rt6_make_pcpu_route(rt);
9c7370a1
MKL
1134 dst_release(&rt->dst);
1135 }
d52d3997 1136
b811580d 1137 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
d52d3997 1138 return pcpu_rt;
9c7370a1 1139
d52d3997 1140 }
1da177e4
LT
1141}
1142
8ed67789 1143static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1144 struct flowi6 *fl6, int flags)
4acad72d 1145{
4c9483b2 1146 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1147}
1148
72331bc0
SL
1149static struct dst_entry *ip6_route_input_lookup(struct net *net,
1150 struct net_device *dev,
1151 struct flowi6 *fl6, int flags)
1152{
1153 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1154 flags |= RT6_LOOKUP_F_IFACE;
1155
1156 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1157}
1158
c71099ac
TG
1159void ip6_route_input(struct sk_buff *skb)
1160{
b71d1d42 1161 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1162 struct net *net = dev_net(skb->dev);
adaa70bb 1163 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1164 struct ip_tunnel_info *tun_info;
4c9483b2 1165 struct flowi6 fl6 = {
ca254490 1166 .flowi6_iif = l3mdev_fib_oif(skb->dev),
4c9483b2
DM
1167 .daddr = iph->daddr,
1168 .saddr = iph->saddr,
6502ca52 1169 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1170 .flowi6_mark = skb->mark,
1171 .flowi6_proto = iph->nexthdr,
c71099ac 1172 };
adaa70bb 1173
904af04d 1174 tun_info = skb_tunnel_info(skb);
46fa062a 1175 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1176 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
06e9d040 1177 skb_dst_drop(skb);
72331bc0 1178 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1179}
1180
8ed67789 1181static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1182 struct flowi6 *fl6, int flags)
1da177e4 1183{
4c9483b2 1184 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1185}
1186
6f21c96a
PA
1187struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1188 struct flowi6 *fl6, int flags)
c71099ac 1189{
ca254490 1190 struct dst_entry *dst;
d46a9d67 1191 bool any_src;
c71099ac 1192
4a65896f 1193 dst = l3mdev_get_rt6_dst(net, fl6);
ca254490
DA
1194 if (dst)
1195 return dst;
1196
1fb9489b 1197 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1198
d46a9d67 1199 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1200 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1201 (fl6->flowi6_oif && any_src))
77d16f45 1202 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1203
d46a9d67 1204 if (!any_src)
adaa70bb 1205 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1206 else if (sk)
1207 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1208
4c9483b2 1209 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1210}
6f21c96a 1211EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1212
2774c131 1213struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1214{
5c1e6aa3 1215 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1216 struct dst_entry *new = NULL;
1217
f5b0a874 1218 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1219 if (rt) {
0a1f5962 1220 rt6_info_init(rt);
8104891b 1221
0a1f5962 1222 new = &rt->dst;
14e50e57 1223 new->__use = 1;
352e512c 1224 new->input = dst_discard;
ede2059d 1225 new->output = dst_discard_out;
14e50e57 1226
0a1f5962 1227 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1228 rt->rt6i_idev = ort->rt6i_idev;
1229 if (rt->rt6i_idev)
1230 in6_dev_hold(rt->rt6i_idev);
14e50e57 1231
4e3fd7a0 1232 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1233 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1234 rt->rt6i_metric = 0;
1235
1236 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1237#ifdef CONFIG_IPV6_SUBTREES
1238 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1239#endif
1240
1241 dst_free(new);
1242 }
1243
69ead7af
DM
1244 dst_release(dst_orig);
1245 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1246}
14e50e57 1247
1da177e4
LT
1248/*
1249 * Destination cache support functions
1250 */
1251
4b32b5ad
MKL
1252static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1253{
1254 if (rt->dst.from &&
1255 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1256 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1257}
1258
3da59bd9
MKL
1259static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1260{
1261 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1262 return NULL;
1263
1264 if (rt6_check_expired(rt))
1265 return NULL;
1266
1267 return &rt->dst;
1268}
1269
1270static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1271{
5973fb1e
MKL
1272 if (!__rt6_check_expired(rt) &&
1273 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3da59bd9
MKL
1274 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1275 return &rt->dst;
1276 else
1277 return NULL;
1278}
1279
1da177e4
LT
1280static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1281{
1282 struct rt6_info *rt;
1283
1284 rt = (struct rt6_info *) dst;
1285
6f3118b5
ND
1286 /* All IPV6 dsts are created with ->obsolete set to the value
1287 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1288 * into this function always.
1289 */
e3bc10bd 1290
4b32b5ad
MKL
1291 rt6_dst_from_metrics_check(rt);
1292
02bcf4e0
MKL
1293 if (rt->rt6i_flags & RTF_PCPU ||
1294 (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
3da59bd9
MKL
1295 return rt6_dst_from_check(rt, cookie);
1296 else
1297 return rt6_check(rt, cookie);
1da177e4
LT
1298}
1299
1300static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1301{
1302 struct rt6_info *rt = (struct rt6_info *) dst;
1303
1304 if (rt) {
54c1a859
YH
1305 if (rt->rt6i_flags & RTF_CACHE) {
1306 if (rt6_check_expired(rt)) {
1307 ip6_del_rt(rt);
1308 dst = NULL;
1309 }
1310 } else {
1da177e4 1311 dst_release(dst);
54c1a859
YH
1312 dst = NULL;
1313 }
1da177e4 1314 }
54c1a859 1315 return dst;
1da177e4
LT
1316}
1317
1318static void ip6_link_failure(struct sk_buff *skb)
1319{
1320 struct rt6_info *rt;
1321
3ffe533c 1322 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1323
adf30907 1324 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1325 if (rt) {
1eb4f758
HFS
1326 if (rt->rt6i_flags & RTF_CACHE) {
1327 dst_hold(&rt->dst);
8e3d5be7 1328 ip6_del_rt(rt);
1eb4f758 1329 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1330 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1331 }
1da177e4
LT
1332 }
1333}
1334
45e4fd26
MKL
1335static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1336{
1337 struct net *net = dev_net(rt->dst.dev);
1338
1339 rt->rt6i_flags |= RTF_MODIFIED;
1340 rt->rt6i_pmtu = mtu;
1341 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1342}
1343
0d3f6d29
MKL
1344static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1345{
1346 return !(rt->rt6i_flags & RTF_CACHE) &&
1347 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1348}
1349
45e4fd26
MKL
1350static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1351 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1352{
67ba4152 1353 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1354
45e4fd26
MKL
1355 if (rt6->rt6i_flags & RTF_LOCAL)
1356 return;
81aded24 1357
45e4fd26
MKL
1358 dst_confirm(dst);
1359 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1360 if (mtu >= dst_mtu(dst))
1361 return;
9d289715 1362
0d3f6d29 1363 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26
MKL
1364 rt6_do_update_pmtu(rt6, mtu);
1365 } else {
1366 const struct in6_addr *daddr, *saddr;
1367 struct rt6_info *nrt6;
1368
1369 if (iph) {
1370 daddr = &iph->daddr;
1371 saddr = &iph->saddr;
1372 } else if (sk) {
1373 daddr = &sk->sk_v6_daddr;
1374 saddr = &inet6_sk(sk)->saddr;
1375 } else {
1376 return;
1377 }
1378 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1379 if (nrt6) {
1380 rt6_do_update_pmtu(nrt6, mtu);
1381
1382 /* ip6_ins_rt(nrt6) will bump the
1383 * rt6->rt6i_node->fn_sernum
1384 * which will fail the next rt6_check() and
1385 * invalidate the sk->sk_dst_cache.
1386 */
1387 ip6_ins_rt(nrt6);
1388 }
1da177e4
LT
1389 }
1390}
1391
45e4fd26
MKL
1392static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1393 struct sk_buff *skb, u32 mtu)
1394{
1395 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1396}
1397
42ae66c8
DM
1398void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1399 int oif, u32 mark)
81aded24
DM
1400{
1401 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1402 struct dst_entry *dst;
1403 struct flowi6 fl6;
1404
1405 memset(&fl6, 0, sizeof(fl6));
1406 fl6.flowi6_oif = oif;
1b3c61dc 1407 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1408 fl6.daddr = iph->daddr;
1409 fl6.saddr = iph->saddr;
6502ca52 1410 fl6.flowlabel = ip6_flowinfo(iph);
81aded24
DM
1411
1412 dst = ip6_route_output(net, NULL, &fl6);
1413 if (!dst->error)
45e4fd26 1414 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1415 dst_release(dst);
1416}
1417EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1418
1419void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1420{
33c162a9
MKL
1421 struct dst_entry *dst;
1422
81aded24
DM
1423 ip6_update_pmtu(skb, sock_net(sk), mtu,
1424 sk->sk_bound_dev_if, sk->sk_mark);
33c162a9
MKL
1425
1426 dst = __sk_dst_get(sk);
1427 if (!dst || !dst->obsolete ||
1428 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1429 return;
1430
1431 bh_lock_sock(sk);
1432 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1433 ip6_datagram_dst_update(sk, false);
1434 bh_unlock_sock(sk);
81aded24
DM
1435}
1436EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1437
b55b76b2
DJ
1438/* Handle redirects */
1439struct ip6rd_flowi {
1440 struct flowi6 fl6;
1441 struct in6_addr gateway;
1442};
1443
1444static struct rt6_info *__ip6_route_redirect(struct net *net,
1445 struct fib6_table *table,
1446 struct flowi6 *fl6,
1447 int flags)
1448{
1449 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1450 struct rt6_info *rt;
1451 struct fib6_node *fn;
1452
1453 /* Get the "current" route for this destination and
1454 * check if the redirect has come from approriate router.
1455 *
1456 * RFC 4861 specifies that redirects should only be
1457 * accepted if they come from the nexthop to the target.
1458 * Due to the way the routes are chosen, this notion
1459 * is a bit fuzzy and one might need to check all possible
1460 * routes.
1461 */
1462
1463 read_lock_bh(&table->tb6_lock);
1464 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1465restart:
1466 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1467 if (rt6_check_expired(rt))
1468 continue;
1469 if (rt->dst.error)
1470 break;
1471 if (!(rt->rt6i_flags & RTF_GATEWAY))
1472 continue;
1473 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1474 continue;
1475 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1476 continue;
1477 break;
1478 }
1479
1480 if (!rt)
1481 rt = net->ipv6.ip6_null_entry;
1482 else if (rt->dst.error) {
1483 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1484 goto out;
1485 }
1486
1487 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1488 fn = fib6_backtrack(fn, &fl6->saddr);
1489 if (fn)
1490 goto restart;
b55b76b2 1491 }
a3c00e46 1492
b0a1ba59 1493out:
b55b76b2
DJ
1494 dst_hold(&rt->dst);
1495
1496 read_unlock_bh(&table->tb6_lock);
1497
b811580d 1498 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
b55b76b2
DJ
1499 return rt;
1500};
1501
1502static struct dst_entry *ip6_route_redirect(struct net *net,
1503 const struct flowi6 *fl6,
1504 const struct in6_addr *gateway)
1505{
1506 int flags = RT6_LOOKUP_F_HAS_SADDR;
1507 struct ip6rd_flowi rdfl;
1508
1509 rdfl.fl6 = *fl6;
1510 rdfl.gateway = *gateway;
1511
1512 return fib6_rule_lookup(net, &rdfl.fl6,
1513 flags, __ip6_route_redirect);
1514}
1515
3a5ad2ee
DM
1516void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1517{
1518 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1519 struct dst_entry *dst;
1520 struct flowi6 fl6;
1521
1522 memset(&fl6, 0, sizeof(fl6));
e374c618 1523 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1524 fl6.flowi6_oif = oif;
1525 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1526 fl6.daddr = iph->daddr;
1527 fl6.saddr = iph->saddr;
6502ca52 1528 fl6.flowlabel = ip6_flowinfo(iph);
3a5ad2ee 1529
b55b76b2
DJ
1530 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1531 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1532 dst_release(dst);
1533}
1534EXPORT_SYMBOL_GPL(ip6_redirect);
1535
c92a59ec
DJ
1536void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1537 u32 mark)
1538{
1539 const struct ipv6hdr *iph = ipv6_hdr(skb);
1540 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1541 struct dst_entry *dst;
1542 struct flowi6 fl6;
1543
1544 memset(&fl6, 0, sizeof(fl6));
e374c618 1545 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1546 fl6.flowi6_oif = oif;
1547 fl6.flowi6_mark = mark;
c92a59ec
DJ
1548 fl6.daddr = msg->dest;
1549 fl6.saddr = iph->daddr;
1550
b55b76b2
DJ
1551 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1552 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1553 dst_release(dst);
1554}
1555
3a5ad2ee
DM
1556void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1557{
1558 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1559}
1560EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1561
0dbaee3b 1562static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1563{
0dbaee3b
DM
1564 struct net_device *dev = dst->dev;
1565 unsigned int mtu = dst_mtu(dst);
1566 struct net *net = dev_net(dev);
1567
1da177e4
LT
1568 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1569
5578689a
DL
1570 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1571 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1572
1573 /*
1ab1457c
YH
1574 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1575 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1576 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1577 * rely only on pmtu discovery"
1578 */
1579 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1580 mtu = IPV6_MAXPLEN;
1581 return mtu;
1582}
1583
ebb762f2 1584static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1585{
4b32b5ad
MKL
1586 const struct rt6_info *rt = (const struct rt6_info *)dst;
1587 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1588 struct inet6_dev *idev;
618f9bc7 1589
4b32b5ad
MKL
1590 if (mtu)
1591 goto out;
1592
1593 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1594 if (mtu)
30f78d8e 1595 goto out;
618f9bc7
SK
1596
1597 mtu = IPV6_MIN_MTU;
d33e4553
DM
1598
1599 rcu_read_lock();
1600 idev = __in6_dev_get(dst->dev);
1601 if (idev)
1602 mtu = idev->cnf.mtu6;
1603 rcu_read_unlock();
1604
30f78d8e
ED
1605out:
1606 return min_t(unsigned int, mtu, IP6_MAX_MTU);
d33e4553
DM
1607}
1608
3b00944c
YH
1609static struct dst_entry *icmp6_dst_gc_list;
1610static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1611
3b00944c 1612struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1613 struct flowi6 *fl6)
1da177e4 1614{
87a11578 1615 struct dst_entry *dst;
1da177e4
LT
1616 struct rt6_info *rt;
1617 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1618 struct net *net = dev_net(dev);
1da177e4 1619
38308473 1620 if (unlikely(!idev))
122bdf67 1621 return ERR_PTR(-ENODEV);
1da177e4 1622
ad706862 1623 rt = ip6_dst_alloc(net, dev, 0);
38308473 1624 if (unlikely(!rt)) {
1da177e4 1625 in6_dev_put(idev);
87a11578 1626 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1627 goto out;
1628 }
1629
8e2ec639
YZ
1630 rt->dst.flags |= DST_HOST;
1631 rt->dst.output = ip6_output;
d8d1f30b 1632 atomic_set(&rt->dst.__refcnt, 1);
550bab42 1633 rt->rt6i_gateway = fl6->daddr;
87a11578 1634 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1635 rt->rt6i_dst.plen = 128;
1636 rt->rt6i_idev = idev;
14edd87d 1637 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1638
3b00944c 1639 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1640 rt->dst.next = icmp6_dst_gc_list;
1641 icmp6_dst_gc_list = &rt->dst;
3b00944c 1642 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1643
5578689a 1644 fib6_force_start_gc(net);
1da177e4 1645
87a11578
DM
1646 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1647
1da177e4 1648out:
87a11578 1649 return dst;
1da177e4
LT
1650}
1651
3d0f24a7 1652int icmp6_dst_gc(void)
1da177e4 1653{
e9476e95 1654 struct dst_entry *dst, **pprev;
3d0f24a7 1655 int more = 0;
1da177e4 1656
3b00944c
YH
1657 spin_lock_bh(&icmp6_dst_lock);
1658 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1659
1da177e4
LT
1660 while ((dst = *pprev) != NULL) {
1661 if (!atomic_read(&dst->__refcnt)) {
1662 *pprev = dst->next;
1663 dst_free(dst);
1da177e4
LT
1664 } else {
1665 pprev = &dst->next;
3d0f24a7 1666 ++more;
1da177e4
LT
1667 }
1668 }
1669
3b00944c 1670 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1671
3d0f24a7 1672 return more;
1da177e4
LT
1673}
1674
1e493d19
DM
1675static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1676 void *arg)
1677{
1678 struct dst_entry *dst, **pprev;
1679
1680 spin_lock_bh(&icmp6_dst_lock);
1681 pprev = &icmp6_dst_gc_list;
1682 while ((dst = *pprev) != NULL) {
1683 struct rt6_info *rt = (struct rt6_info *) dst;
1684 if (func(rt, arg)) {
1685 *pprev = dst->next;
1686 dst_free(dst);
1687 } else {
1688 pprev = &dst->next;
1689 }
1690 }
1691 spin_unlock_bh(&icmp6_dst_lock);
1692}
1693
569d3645 1694static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1695{
86393e52 1696 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1697 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1698 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1699 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1700 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1701 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1702 int entries;
7019b78e 1703
fc66f95c 1704 entries = dst_entries_get_fast(ops);
49a18d86 1705 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1706 entries <= rt_max_size)
1da177e4
LT
1707 goto out;
1708
6891a346 1709 net->ipv6.ip6_rt_gc_expire++;
14956643 1710 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1711 entries = dst_entries_get_slow(ops);
1712 if (entries < ops->gc_thresh)
7019b78e 1713 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1714out:
7019b78e 1715 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1716 return entries > rt_max_size;
1da177e4
LT
1717}
1718
e715b6d3
FW
1719static int ip6_convert_metrics(struct mx6_config *mxc,
1720 const struct fib6_config *cfg)
1721{
c3a8d947 1722 bool ecn_ca = false;
e715b6d3
FW
1723 struct nlattr *nla;
1724 int remaining;
1725 u32 *mp;
1726
63159f29 1727 if (!cfg->fc_mx)
e715b6d3
FW
1728 return 0;
1729
1730 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1731 if (unlikely(!mp))
1732 return -ENOMEM;
1733
1734 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1735 int type = nla_type(nla);
1bb14807 1736 u32 val;
e715b6d3 1737
1bb14807
DB
1738 if (!type)
1739 continue;
1740 if (unlikely(type > RTAX_MAX))
1741 goto err;
ea697639 1742
1bb14807
DB
1743 if (type == RTAX_CC_ALGO) {
1744 char tmp[TCP_CA_NAME_MAX];
e715b6d3 1745
1bb14807 1746 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 1747 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
1748 if (val == TCP_CA_UNSPEC)
1749 goto err;
1750 } else {
1751 val = nla_get_u32(nla);
e715b6d3 1752 }
626abd59
PA
1753 if (type == RTAX_HOPLIMIT && val > 255)
1754 val = 255;
b8d3e416
DB
1755 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1756 goto err;
1bb14807
DB
1757
1758 mp[type - 1] = val;
1759 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
1760 }
1761
c3a8d947
DB
1762 if (ecn_ca) {
1763 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1764 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1765 }
e715b6d3 1766
c3a8d947 1767 mxc->mx = mp;
e715b6d3
FW
1768 return 0;
1769 err:
1770 kfree(mp);
1771 return -EINVAL;
1772}
1da177e4 1773
8c14586f
DA
1774static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1775 struct fib6_config *cfg,
1776 const struct in6_addr *gw_addr)
1777{
1778 struct flowi6 fl6 = {
1779 .flowi6_oif = cfg->fc_ifindex,
1780 .daddr = *gw_addr,
1781 .saddr = cfg->fc_prefsrc,
1782 };
1783 struct fib6_table *table;
1784 struct rt6_info *rt;
1785 int flags = 0;
1786
1787 table = fib6_get_table(net, cfg->fc_table);
1788 if (!table)
1789 return NULL;
1790
1791 if (!ipv6_addr_any(&cfg->fc_prefsrc))
1792 flags |= RT6_LOOKUP_F_HAS_SADDR;
1793
1794 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1795
1796 /* if table lookup failed, fall back to full lookup */
1797 if (rt == net->ipv6.ip6_null_entry) {
1798 ip6_rt_put(rt);
1799 rt = NULL;
1800 }
1801
1802 return rt;
1803}
1804
8c5b83f0 1805static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1da177e4 1806{
5578689a 1807 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1808 struct rt6_info *rt = NULL;
1809 struct net_device *dev = NULL;
1810 struct inet6_dev *idev = NULL;
c71099ac 1811 struct fib6_table *table;
1da177e4 1812 int addr_type;
8c5b83f0 1813 int err = -EINVAL;
1da177e4 1814
86872cb5 1815 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
8c5b83f0 1816 goto out;
1da177e4 1817#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1818 if (cfg->fc_src_len)
8c5b83f0 1819 goto out;
1da177e4 1820#endif
86872cb5 1821 if (cfg->fc_ifindex) {
1da177e4 1822 err = -ENODEV;
5578689a 1823 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1824 if (!dev)
1825 goto out;
1826 idev = in6_dev_get(dev);
1827 if (!idev)
1828 goto out;
1829 }
1830
86872cb5
TG
1831 if (cfg->fc_metric == 0)
1832 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1833
d71314b4 1834 err = -ENOBUFS;
38308473
DM
1835 if (cfg->fc_nlinfo.nlh &&
1836 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1837 table = fib6_get_table(net, cfg->fc_table);
38308473 1838 if (!table) {
f3213831 1839 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1840 table = fib6_new_table(net, cfg->fc_table);
1841 }
1842 } else {
1843 table = fib6_new_table(net, cfg->fc_table);
1844 }
38308473
DM
1845
1846 if (!table)
c71099ac 1847 goto out;
c71099ac 1848
ad706862
MKL
1849 rt = ip6_dst_alloc(net, NULL,
1850 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 1851
38308473 1852 if (!rt) {
1da177e4
LT
1853 err = -ENOMEM;
1854 goto out;
1855 }
1856
1716a961
G
1857 if (cfg->fc_flags & RTF_EXPIRES)
1858 rt6_set_expires(rt, jiffies +
1859 clock_t_to_jiffies(cfg->fc_expires));
1860 else
1861 rt6_clean_expires(rt);
1da177e4 1862
86872cb5
TG
1863 if (cfg->fc_protocol == RTPROT_UNSPEC)
1864 cfg->fc_protocol = RTPROT_BOOT;
1865 rt->rt6i_protocol = cfg->fc_protocol;
1866
1867 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1868
1869 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1870 rt->dst.input = ip6_mc_input;
ab79ad14
1871 else if (cfg->fc_flags & RTF_LOCAL)
1872 rt->dst.input = ip6_input;
1da177e4 1873 else
d8d1f30b 1874 rt->dst.input = ip6_forward;
1da177e4 1875
d8d1f30b 1876 rt->dst.output = ip6_output;
1da177e4 1877
19e42e45
RP
1878 if (cfg->fc_encap) {
1879 struct lwtunnel_state *lwtstate;
1880
1881 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
127eb7cd
TH
1882 cfg->fc_encap, AF_INET6, cfg,
1883 &lwtstate);
19e42e45
RP
1884 if (err)
1885 goto out;
61adedf3
JB
1886 rt->dst.lwtstate = lwtstate_get(lwtstate);
1887 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1888 rt->dst.lwtstate->orig_output = rt->dst.output;
1889 rt->dst.output = lwtunnel_output;
25368623 1890 }
61adedf3
JB
1891 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1892 rt->dst.lwtstate->orig_input = rt->dst.input;
1893 rt->dst.input = lwtunnel_input;
25368623 1894 }
19e42e45
RP
1895 }
1896
86872cb5
TG
1897 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1898 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 1899 if (rt->rt6i_dst.plen == 128)
e5fd387a 1900 rt->dst.flags |= DST_HOST;
e5fd387a 1901
1da177e4 1902#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1903 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1904 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1905#endif
1906
86872cb5 1907 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1908
1909 /* We cannot add true routes via loopback here,
1910 they would result in kernel looping; promote them to reject routes
1911 */
86872cb5 1912 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1913 (dev && (dev->flags & IFF_LOOPBACK) &&
1914 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1915 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1916 /* hold loopback dev/idev if we haven't done so. */
5578689a 1917 if (dev != net->loopback_dev) {
1da177e4
LT
1918 if (dev) {
1919 dev_put(dev);
1920 in6_dev_put(idev);
1921 }
5578689a 1922 dev = net->loopback_dev;
1da177e4
LT
1923 dev_hold(dev);
1924 idev = in6_dev_get(dev);
1925 if (!idev) {
1926 err = -ENODEV;
1927 goto out;
1928 }
1929 }
1da177e4 1930 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1931 switch (cfg->fc_type) {
1932 case RTN_BLACKHOLE:
1933 rt->dst.error = -EINVAL;
ede2059d 1934 rt->dst.output = dst_discard_out;
7150aede 1935 rt->dst.input = dst_discard;
ef2c7d7b
ND
1936 break;
1937 case RTN_PROHIBIT:
1938 rt->dst.error = -EACCES;
7150aede
K
1939 rt->dst.output = ip6_pkt_prohibit_out;
1940 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 1941 break;
b4949ab2 1942 case RTN_THROW:
0315e382 1943 case RTN_UNREACHABLE:
ef2c7d7b 1944 default:
7150aede 1945 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
1946 : (cfg->fc_type == RTN_UNREACHABLE)
1947 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
1948 rt->dst.output = ip6_pkt_discard_out;
1949 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
1950 break;
1951 }
1da177e4
LT
1952 goto install_route;
1953 }
1954
86872cb5 1955 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1956 const struct in6_addr *gw_addr;
1da177e4
LT
1957 int gwa_type;
1958
86872cb5 1959 gw_addr = &cfg->fc_gateway;
330567b7 1960 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
1961
1962 /* if gw_addr is local we will fail to detect this in case
1963 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1964 * will return already-added prefix route via interface that
1965 * prefix route was assigned to, which might be non-loopback.
1966 */
1967 err = -EINVAL;
330567b7
FW
1968 if (ipv6_chk_addr_and_flags(net, gw_addr,
1969 gwa_type & IPV6_ADDR_LINKLOCAL ?
1970 dev : NULL, 0, 0))
48ed7b26
FW
1971 goto out;
1972
4e3fd7a0 1973 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1974
1975 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
8c14586f 1976 struct rt6_info *grt = NULL;
1da177e4
LT
1977
1978 /* IPv6 strictly inhibits using not link-local
1979 addresses as nexthop address.
1980 Otherwise, router will not able to send redirects.
1981 It is very good, but in some (rare!) circumstances
1982 (SIT, PtP, NBMA NOARP links) it is handy to allow
1983 some exceptions. --ANK
1984 */
38308473 1985 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1986 goto out;
1987
8c14586f
DA
1988 if (cfg->fc_table)
1989 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
1990
1991 if (!grt)
1992 grt = rt6_lookup(net, gw_addr, NULL,
1993 cfg->fc_ifindex, 1);
1da177e4
LT
1994
1995 err = -EHOSTUNREACH;
38308473 1996 if (!grt)
1da177e4
LT
1997 goto out;
1998 if (dev) {
d1918542 1999 if (dev != grt->dst.dev) {
94e187c0 2000 ip6_rt_put(grt);
1da177e4
LT
2001 goto out;
2002 }
2003 } else {
d1918542 2004 dev = grt->dst.dev;
1da177e4
LT
2005 idev = grt->rt6i_idev;
2006 dev_hold(dev);
2007 in6_dev_hold(grt->rt6i_idev);
2008 }
38308473 2009 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 2010 err = 0;
94e187c0 2011 ip6_rt_put(grt);
1da177e4
LT
2012
2013 if (err)
2014 goto out;
2015 }
2016 err = -EINVAL;
38308473 2017 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
2018 goto out;
2019 }
2020
2021 err = -ENODEV;
38308473 2022 if (!dev)
1da177e4
LT
2023 goto out;
2024
c3968a85
DW
2025 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2026 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2027 err = -EINVAL;
2028 goto out;
2029 }
4e3fd7a0 2030 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2031 rt->rt6i_prefsrc.plen = 128;
2032 } else
2033 rt->rt6i_prefsrc.plen = 0;
2034
86872cb5 2035 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2036
2037install_route:
d8d1f30b 2038 rt->dst.dev = dev;
1da177e4 2039 rt->rt6i_idev = idev;
c71099ac 2040 rt->rt6i_table = table;
63152fc0 2041
c346dca1 2042 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2043
8c5b83f0 2044 return rt;
6b9ea5a6
RP
2045out:
2046 if (dev)
2047 dev_put(dev);
2048 if (idev)
2049 in6_dev_put(idev);
2050 if (rt)
2051 dst_free(&rt->dst);
2052
8c5b83f0 2053 return ERR_PTR(err);
6b9ea5a6
RP
2054}
2055
2056int ip6_route_add(struct fib6_config *cfg)
2057{
2058 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2059 struct rt6_info *rt;
6b9ea5a6
RP
2060 int err;
2061
8c5b83f0
RP
2062 rt = ip6_route_info_create(cfg);
2063 if (IS_ERR(rt)) {
2064 err = PTR_ERR(rt);
2065 rt = NULL;
6b9ea5a6 2066 goto out;
8c5b83f0 2067 }
6b9ea5a6 2068
e715b6d3
FW
2069 err = ip6_convert_metrics(&mxc, cfg);
2070 if (err)
2071 goto out;
1da177e4 2072
e715b6d3
FW
2073 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2074
2075 kfree(mxc.mx);
6b9ea5a6 2076
e715b6d3 2077 return err;
1da177e4 2078out:
1da177e4 2079 if (rt)
d8d1f30b 2080 dst_free(&rt->dst);
6b9ea5a6 2081
1da177e4
LT
2082 return err;
2083}
2084
86872cb5 2085static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2086{
2087 int err;
c71099ac 2088 struct fib6_table *table;
d1918542 2089 struct net *net = dev_net(rt->dst.dev);
1da177e4 2090
8e3d5be7
MKL
2091 if (rt == net->ipv6.ip6_null_entry ||
2092 rt->dst.flags & DST_NOCACHE) {
6825a26c
G
2093 err = -ENOENT;
2094 goto out;
2095 }
6c813a72 2096
c71099ac
TG
2097 table = rt->rt6i_table;
2098 write_lock_bh(&table->tb6_lock);
86872cb5 2099 err = fib6_del(rt, info);
c71099ac 2100 write_unlock_bh(&table->tb6_lock);
1da177e4 2101
6825a26c 2102out:
94e187c0 2103 ip6_rt_put(rt);
1da177e4
LT
2104 return err;
2105}
2106
e0a1ad73
TG
2107int ip6_del_rt(struct rt6_info *rt)
2108{
4d1169c1 2109 struct nl_info info = {
d1918542 2110 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2111 };
528c4ceb 2112 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2113}
2114
86872cb5 2115static int ip6_route_del(struct fib6_config *cfg)
1da177e4 2116{
c71099ac 2117 struct fib6_table *table;
1da177e4
LT
2118 struct fib6_node *fn;
2119 struct rt6_info *rt;
2120 int err = -ESRCH;
2121
5578689a 2122 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 2123 if (!table)
c71099ac
TG
2124 return err;
2125
2126 read_lock_bh(&table->tb6_lock);
1da177e4 2127
c71099ac 2128 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
2129 &cfg->fc_dst, cfg->fc_dst_len,
2130 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 2131
1da177e4 2132 if (fn) {
d8d1f30b 2133 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2134 if ((rt->rt6i_flags & RTF_CACHE) &&
2135 !(cfg->fc_flags & RTF_CACHE))
2136 continue;
86872cb5 2137 if (cfg->fc_ifindex &&
d1918542
DM
2138 (!rt->dst.dev ||
2139 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2140 continue;
86872cb5
TG
2141 if (cfg->fc_flags & RTF_GATEWAY &&
2142 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2143 continue;
86872cb5 2144 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2145 continue;
d8d1f30b 2146 dst_hold(&rt->dst);
c71099ac 2147 read_unlock_bh(&table->tb6_lock);
1da177e4 2148
86872cb5 2149 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
2150 }
2151 }
c71099ac 2152 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2153
2154 return err;
2155}
2156
6700c270 2157static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2158{
a6279458 2159 struct netevent_redirect netevent;
e8599ff4 2160 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2161 struct ndisc_options ndopts;
2162 struct inet6_dev *in6_dev;
2163 struct neighbour *neigh;
71bcdba0 2164 struct rd_msg *msg;
6e157b6a
DM
2165 int optlen, on_link;
2166 u8 *lladdr;
e8599ff4 2167
29a3cad5 2168 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2169 optlen -= sizeof(*msg);
e8599ff4
DM
2170
2171 if (optlen < 0) {
6e157b6a 2172 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2173 return;
2174 }
2175
71bcdba0 2176 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2177
71bcdba0 2178 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2179 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2180 return;
2181 }
2182
6e157b6a 2183 on_link = 0;
71bcdba0 2184 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2185 on_link = 1;
71bcdba0 2186 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2187 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2188 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2189 return;
2190 }
2191
2192 in6_dev = __in6_dev_get(skb->dev);
2193 if (!in6_dev)
2194 return;
2195 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2196 return;
2197
2198 /* RFC2461 8.1:
2199 * The IP source address of the Redirect MUST be the same as the current
2200 * first-hop router for the specified ICMP Destination Address.
2201 */
2202
71bcdba0 2203 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2204 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2205 return;
2206 }
6e157b6a
DM
2207
2208 lladdr = NULL;
e8599ff4
DM
2209 if (ndopts.nd_opts_tgt_lladdr) {
2210 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2211 skb->dev);
2212 if (!lladdr) {
2213 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2214 return;
2215 }
2216 }
2217
6e157b6a 2218 rt = (struct rt6_info *) dst;
ec13ad1d 2219 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 2220 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2221 return;
6e157b6a 2222 }
e8599ff4 2223
6e157b6a
DM
2224 /* Redirect received -> path was valid.
2225 * Look, redirects are sent only in response to data packets,
2226 * so that this nexthop apparently is reachable. --ANK
2227 */
2228 dst_confirm(&rt->dst);
a6279458 2229
71bcdba0 2230 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2231 if (!neigh)
2232 return;
a6279458 2233
1da177e4
LT
2234 /*
2235 * We have finally decided to accept it.
2236 */
2237
1ab1457c 2238 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
2239 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2240 NEIGH_UPDATE_F_OVERRIDE|
2241 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2242 NEIGH_UPDATE_F_ISROUTER))
2243 );
2244
83a09abd 2245 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2246 if (!nrt)
1da177e4
LT
2247 goto out;
2248
2249 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2250 if (on_link)
2251 nrt->rt6i_flags &= ~RTF_GATEWAY;
2252
4e3fd7a0 2253 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2254
40e22e8f 2255 if (ip6_ins_rt(nrt))
1da177e4
LT
2256 goto out;
2257
d8d1f30b
CG
2258 netevent.old = &rt->dst;
2259 netevent.new = &nrt->dst;
71bcdba0 2260 netevent.daddr = &msg->dest;
60592833 2261 netevent.neigh = neigh;
8d71740c
TT
2262 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2263
38308473 2264 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2265 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2266 ip6_del_rt(rt);
1da177e4
LT
2267 }
2268
2269out:
e8599ff4 2270 neigh_release(neigh);
6e157b6a
DM
2271}
2272
1da177e4
LT
2273/*
2274 * Misc support functions
2275 */
2276
4b32b5ad
MKL
2277static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2278{
2279 BUG_ON(from->dst.from);
2280
2281 rt->rt6i_flags &= ~RTF_EXPIRES;
2282 dst_hold(&from->dst);
2283 rt->dst.from = &from->dst;
2284 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2285}
2286
83a09abd
MKL
2287static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2288{
2289 rt->dst.input = ort->dst.input;
2290 rt->dst.output = ort->dst.output;
2291 rt->rt6i_dst = ort->rt6i_dst;
2292 rt->dst.error = ort->dst.error;
2293 rt->rt6i_idev = ort->rt6i_idev;
2294 if (rt->rt6i_idev)
2295 in6_dev_hold(rt->rt6i_idev);
2296 rt->dst.lastuse = jiffies;
2297 rt->rt6i_gateway = ort->rt6i_gateway;
2298 rt->rt6i_flags = ort->rt6i_flags;
2299 rt6_set_from(rt, ort);
2300 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2301#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2302 rt->rt6i_src = ort->rt6i_src;
1da177e4 2303#endif
83a09abd
MKL
2304 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2305 rt->rt6i_table = ort->rt6i_table;
61adedf3 2306 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2307}
2308
70ceb4f5 2309#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2310static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
2311 const struct in6_addr *prefix, int prefixlen,
2312 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
2313{
2314 struct fib6_node *fn;
2315 struct rt6_info *rt = NULL;
c71099ac
TG
2316 struct fib6_table *table;
2317
efa2cea0 2318 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 2319 if (!table)
c71099ac 2320 return NULL;
70ceb4f5 2321
5744dd9b 2322 read_lock_bh(&table->tb6_lock);
67ba4152 2323 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2324 if (!fn)
2325 goto out;
2326
d8d1f30b 2327 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2328 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2329 continue;
2330 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2331 continue;
2332 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2333 continue;
d8d1f30b 2334 dst_hold(&rt->dst);
70ceb4f5
YH
2335 break;
2336 }
2337out:
5744dd9b 2338 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2339 return rt;
2340}
2341
efa2cea0 2342static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
2343 const struct in6_addr *prefix, int prefixlen,
2344 const struct in6_addr *gwaddr, int ifindex,
95c96174 2345 unsigned int pref)
70ceb4f5 2346{
86872cb5 2347 struct fib6_config cfg = {
238fc7ea 2348 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2349 .fc_ifindex = ifindex,
2350 .fc_dst_len = prefixlen,
2351 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2352 RTF_UP | RTF_PREF(pref),
15e47304 2353 .fc_nlinfo.portid = 0,
efa2cea0
DL
2354 .fc_nlinfo.nlh = NULL,
2355 .fc_nlinfo.nl_net = net,
86872cb5
TG
2356 };
2357
ca254490 2358 cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
4e3fd7a0
AD
2359 cfg.fc_dst = *prefix;
2360 cfg.fc_gateway = *gwaddr;
70ceb4f5 2361
e317da96
YH
2362 /* We should treat it as a default route if prefix length is 0. */
2363 if (!prefixlen)
86872cb5 2364 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2365
86872cb5 2366 ip6_route_add(&cfg);
70ceb4f5 2367
efa2cea0 2368 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
2369}
2370#endif
2371
b71d1d42 2372struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2373{
1da177e4 2374 struct rt6_info *rt;
c71099ac 2375 struct fib6_table *table;
1da177e4 2376
c346dca1 2377 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 2378 if (!table)
c71099ac 2379 return NULL;
1da177e4 2380
5744dd9b 2381 read_lock_bh(&table->tb6_lock);
67ba4152 2382 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2383 if (dev == rt->dst.dev &&
045927ff 2384 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2385 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2386 break;
2387 }
2388 if (rt)
d8d1f30b 2389 dst_hold(&rt->dst);
5744dd9b 2390 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2391 return rt;
2392}
2393
b71d1d42 2394struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2395 struct net_device *dev,
2396 unsigned int pref)
1da177e4 2397{
86872cb5 2398 struct fib6_config cfg = {
ca254490 2399 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 2400 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2401 .fc_ifindex = dev->ifindex,
2402 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2403 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2404 .fc_nlinfo.portid = 0,
5578689a 2405 .fc_nlinfo.nlh = NULL,
c346dca1 2406 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2407 };
1da177e4 2408
4e3fd7a0 2409 cfg.fc_gateway = *gwaddr;
1da177e4 2410
86872cb5 2411 ip6_route_add(&cfg);
1da177e4 2412
1da177e4
LT
2413 return rt6_get_dflt_router(gwaddr, dev);
2414}
2415
7b4da532 2416void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
2417{
2418 struct rt6_info *rt;
c71099ac
TG
2419 struct fib6_table *table;
2420
2421 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 2422 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 2423 if (!table)
c71099ac 2424 return;
1da177e4
LT
2425
2426restart:
c71099ac 2427 read_lock_bh(&table->tb6_lock);
d8d1f30b 2428 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2429 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2430 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2431 dst_hold(&rt->dst);
c71099ac 2432 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2433 ip6_del_rt(rt);
1da177e4
LT
2434 goto restart;
2435 }
2436 }
c71099ac 2437 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2438}
2439
5578689a
DL
2440static void rtmsg_to_fib6_config(struct net *net,
2441 struct in6_rtmsg *rtmsg,
86872cb5
TG
2442 struct fib6_config *cfg)
2443{
2444 memset(cfg, 0, sizeof(*cfg));
2445
ca254490
DA
2446 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2447 : RT6_TABLE_MAIN;
86872cb5
TG
2448 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2449 cfg->fc_metric = rtmsg->rtmsg_metric;
2450 cfg->fc_expires = rtmsg->rtmsg_info;
2451 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2452 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2453 cfg->fc_flags = rtmsg->rtmsg_flags;
2454
5578689a 2455 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2456
4e3fd7a0
AD
2457 cfg->fc_dst = rtmsg->rtmsg_dst;
2458 cfg->fc_src = rtmsg->rtmsg_src;
2459 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2460}
2461
5578689a 2462int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2463{
86872cb5 2464 struct fib6_config cfg;
1da177e4
LT
2465 struct in6_rtmsg rtmsg;
2466 int err;
2467
67ba4152 2468 switch (cmd) {
1da177e4
LT
2469 case SIOCADDRT: /* Add a route */
2470 case SIOCDELRT: /* Delete a route */
af31f412 2471 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2472 return -EPERM;
2473 err = copy_from_user(&rtmsg, arg,
2474 sizeof(struct in6_rtmsg));
2475 if (err)
2476 return -EFAULT;
86872cb5 2477
5578689a 2478 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2479
1da177e4
LT
2480 rtnl_lock();
2481 switch (cmd) {
2482 case SIOCADDRT:
86872cb5 2483 err = ip6_route_add(&cfg);
1da177e4
LT
2484 break;
2485 case SIOCDELRT:
86872cb5 2486 err = ip6_route_del(&cfg);
1da177e4
LT
2487 break;
2488 default:
2489 err = -EINVAL;
2490 }
2491 rtnl_unlock();
2492
2493 return err;
3ff50b79 2494 }
1da177e4
LT
2495
2496 return -EINVAL;
2497}
2498
2499/*
2500 * Drop the packet on the floor
2501 */
2502
d5fdd6ba 2503static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2504{
612f09e8 2505 int type;
adf30907 2506 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2507 switch (ipstats_mib_noroutes) {
2508 case IPSTATS_MIB_INNOROUTES:
0660e03f 2509 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2510 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2511 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2512 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2513 break;
2514 }
2515 /* FALLTHROUGH */
2516 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2517 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2518 ipstats_mib_noroutes);
612f09e8
YH
2519 break;
2520 }
3ffe533c 2521 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2522 kfree_skb(skb);
2523 return 0;
2524}
2525
9ce8ade0
TG
2526static int ip6_pkt_discard(struct sk_buff *skb)
2527{
612f09e8 2528 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2529}
2530
ede2059d 2531static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 2532{
adf30907 2533 skb->dev = skb_dst(skb)->dev;
612f09e8 2534 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2535}
2536
9ce8ade0
TG
2537static int ip6_pkt_prohibit(struct sk_buff *skb)
2538{
612f09e8 2539 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2540}
2541
ede2059d 2542static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 2543{
adf30907 2544 skb->dev = skb_dst(skb)->dev;
612f09e8 2545 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2546}
2547
1da177e4
LT
2548/*
2549 * Allocate a dst for local (unicast / anycast) address.
2550 */
2551
2552struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2553 const struct in6_addr *addr,
8f031519 2554 bool anycast)
1da177e4 2555{
ca254490 2556 u32 tb_id;
c346dca1 2557 struct net *net = dev_net(idev->dev);
a3300ef4 2558 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
ad706862 2559 DST_NOCOUNT);
a3300ef4 2560 if (!rt)
1da177e4
LT
2561 return ERR_PTR(-ENOMEM);
2562
1da177e4
LT
2563 in6_dev_hold(idev);
2564
11d53b49 2565 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2566 rt->dst.input = ip6_input;
2567 rt->dst.output = ip6_output;
1da177e4 2568 rt->rt6i_idev = idev;
1da177e4
LT
2569
2570 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2571 if (anycast)
2572 rt->rt6i_flags |= RTF_ANYCAST;
2573 else
1da177e4 2574 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2575
550bab42 2576 rt->rt6i_gateway = *addr;
4e3fd7a0 2577 rt->rt6i_dst.addr = *addr;
1da177e4 2578 rt->rt6i_dst.plen = 128;
ca254490
DA
2579 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2580 rt->rt6i_table = fib6_get_table(net, tb_id);
8e3d5be7 2581 rt->dst.flags |= DST_NOCACHE;
1da177e4 2582
d8d1f30b 2583 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2584
2585 return rt;
2586}
2587
c3968a85
DW
2588int ip6_route_get_saddr(struct net *net,
2589 struct rt6_info *rt,
b71d1d42 2590 const struct in6_addr *daddr,
c3968a85
DW
2591 unsigned int prefs,
2592 struct in6_addr *saddr)
2593{
e16e888b
MS
2594 struct inet6_dev *idev =
2595 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
c3968a85 2596 int err = 0;
e16e888b 2597 if (rt && rt->rt6i_prefsrc.plen)
4e3fd7a0 2598 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2599 else
2600 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2601 daddr, prefs, saddr);
2602 return err;
2603}
2604
2605/* remove deleted ip from prefsrc entries */
2606struct arg_dev_net_ip {
2607 struct net_device *dev;
2608 struct net *net;
2609 struct in6_addr *addr;
2610};
2611
2612static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2613{
2614 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2615 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2616 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2617
d1918542 2618 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2619 rt != net->ipv6.ip6_null_entry &&
2620 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2621 /* remove prefsrc entry */
2622 rt->rt6i_prefsrc.plen = 0;
2623 }
2624 return 0;
2625}
2626
2627void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2628{
2629 struct net *net = dev_net(ifp->idev->dev);
2630 struct arg_dev_net_ip adni = {
2631 .dev = ifp->idev->dev,
2632 .net = net,
2633 .addr = &ifp->addr,
2634 };
0c3584d5 2635 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2636}
2637
be7a010d
DJ
2638#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2639#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2640
2641/* Remove routers and update dst entries when gateway turn into host. */
2642static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2643{
2644 struct in6_addr *gateway = (struct in6_addr *)arg;
2645
2646 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2647 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2648 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2649 return -1;
2650 }
2651 return 0;
2652}
2653
2654void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2655{
2656 fib6_clean_all(net, fib6_clean_tohost, gateway);
2657}
2658
8ed67789
DL
2659struct arg_dev_net {
2660 struct net_device *dev;
2661 struct net *net;
2662};
2663
1da177e4
LT
2664static int fib6_ifdown(struct rt6_info *rt, void *arg)
2665{
bc3ef660 2666 const struct arg_dev_net *adn = arg;
2667 const struct net_device *dev = adn->dev;
8ed67789 2668
d1918542 2669 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2670 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2671 return -1;
c159d30c 2672
1da177e4
LT
2673 return 0;
2674}
2675
f3db4851 2676void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2677{
8ed67789
DL
2678 struct arg_dev_net adn = {
2679 .dev = dev,
2680 .net = net,
2681 };
2682
0c3584d5 2683 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2684 icmp6_clean_all(fib6_ifdown, &adn);
e332bc67
EB
2685 if (dev)
2686 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
2687}
2688
95c96174 2689struct rt6_mtu_change_arg {
1da177e4 2690 struct net_device *dev;
95c96174 2691 unsigned int mtu;
1da177e4
LT
2692};
2693
2694static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2695{
2696 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2697 struct inet6_dev *idev;
2698
2699 /* In IPv6 pmtu discovery is not optional,
2700 so that RTAX_MTU lock cannot disable it.
2701 We still use this lock to block changes
2702 caused by addrconf/ndisc.
2703 */
2704
2705 idev = __in6_dev_get(arg->dev);
38308473 2706 if (!idev)
1da177e4
LT
2707 return 0;
2708
2709 /* For administrative MTU increase, there is no way to discover
2710 IPv6 PMTU increase, so PMTU increase should be updated here.
2711 Since RFC 1981 doesn't include administrative MTU increase
2712 update PMTU increase is a MUST. (i.e. jumbo frame)
2713 */
2714 /*
2715 If new MTU is less than route PMTU, this new MTU will be the
2716 lowest MTU in the path, update the route PMTU to reflect PMTU
2717 decreases; if new MTU is greater than route PMTU, and the
2718 old MTU is the lowest MTU in the path, update the route PMTU
2719 to reflect the increase. In this case if the other nodes' MTU
2720 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2721 PMTU discouvery.
2722 */
d1918542 2723 if (rt->dst.dev == arg->dev &&
4b32b5ad
MKL
2724 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2725 if (rt->rt6i_flags & RTF_CACHE) {
2726 /* For RTF_CACHE with rt6i_pmtu == 0
2727 * (i.e. a redirected route),
2728 * the metrics of its rt->dst.from has already
2729 * been updated.
2730 */
2731 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2732 rt->rt6i_pmtu = arg->mtu;
2733 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2734 (dst_mtu(&rt->dst) < arg->mtu &&
2735 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2736 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2737 }
566cfd8f 2738 }
1da177e4
LT
2739 return 0;
2740}
2741
95c96174 2742void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2743{
c71099ac
TG
2744 struct rt6_mtu_change_arg arg = {
2745 .dev = dev,
2746 .mtu = mtu,
2747 };
1da177e4 2748
0c3584d5 2749 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2750}
2751
ef7c79ed 2752static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2753 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2754 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2755 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2756 [RTA_PRIORITY] = { .type = NLA_U32 },
2757 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2758 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2759 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
2760 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2761 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 2762 [RTA_EXPIRES] = { .type = NLA_U32 },
86872cb5
TG
2763};
2764
2765static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2766 struct fib6_config *cfg)
1da177e4 2767{
86872cb5
TG
2768 struct rtmsg *rtm;
2769 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2770 unsigned int pref;
86872cb5 2771 int err;
1da177e4 2772
86872cb5
TG
2773 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2774 if (err < 0)
2775 goto errout;
1da177e4 2776
86872cb5
TG
2777 err = -EINVAL;
2778 rtm = nlmsg_data(nlh);
2779 memset(cfg, 0, sizeof(*cfg));
2780
2781 cfg->fc_table = rtm->rtm_table;
2782 cfg->fc_dst_len = rtm->rtm_dst_len;
2783 cfg->fc_src_len = rtm->rtm_src_len;
2784 cfg->fc_flags = RTF_UP;
2785 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2786 cfg->fc_type = rtm->rtm_type;
86872cb5 2787
ef2c7d7b
ND
2788 if (rtm->rtm_type == RTN_UNREACHABLE ||
2789 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2790 rtm->rtm_type == RTN_PROHIBIT ||
2791 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2792 cfg->fc_flags |= RTF_REJECT;
2793
ab79ad14
2794 if (rtm->rtm_type == RTN_LOCAL)
2795 cfg->fc_flags |= RTF_LOCAL;
2796
1f56a01f
MKL
2797 if (rtm->rtm_flags & RTM_F_CLONED)
2798 cfg->fc_flags |= RTF_CACHE;
2799
15e47304 2800 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2801 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2802 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2803
2804 if (tb[RTA_GATEWAY]) {
67b61f6c 2805 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2806 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2807 }
86872cb5
TG
2808
2809 if (tb[RTA_DST]) {
2810 int plen = (rtm->rtm_dst_len + 7) >> 3;
2811
2812 if (nla_len(tb[RTA_DST]) < plen)
2813 goto errout;
2814
2815 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2816 }
86872cb5
TG
2817
2818 if (tb[RTA_SRC]) {
2819 int plen = (rtm->rtm_src_len + 7) >> 3;
2820
2821 if (nla_len(tb[RTA_SRC]) < plen)
2822 goto errout;
2823
2824 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2825 }
86872cb5 2826
c3968a85 2827 if (tb[RTA_PREFSRC])
67b61f6c 2828 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 2829
86872cb5
TG
2830 if (tb[RTA_OIF])
2831 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2832
2833 if (tb[RTA_PRIORITY])
2834 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2835
2836 if (tb[RTA_METRICS]) {
2837 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2838 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2839 }
86872cb5
TG
2840
2841 if (tb[RTA_TABLE])
2842 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2843
51ebd318
ND
2844 if (tb[RTA_MULTIPATH]) {
2845 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2846 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2847 }
2848
c78ba6d6
LR
2849 if (tb[RTA_PREF]) {
2850 pref = nla_get_u8(tb[RTA_PREF]);
2851 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2852 pref != ICMPV6_ROUTER_PREF_HIGH)
2853 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2854 cfg->fc_flags |= RTF_PREF(pref);
2855 }
2856
19e42e45
RP
2857 if (tb[RTA_ENCAP])
2858 cfg->fc_encap = tb[RTA_ENCAP];
2859
2860 if (tb[RTA_ENCAP_TYPE])
2861 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2862
32bc201e
XL
2863 if (tb[RTA_EXPIRES]) {
2864 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2865
2866 if (addrconf_finite_timeout(timeout)) {
2867 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2868 cfg->fc_flags |= RTF_EXPIRES;
2869 }
2870 }
2871
86872cb5
TG
2872 err = 0;
2873errout:
2874 return err;
1da177e4
LT
2875}
2876
6b9ea5a6
RP
2877struct rt6_nh {
2878 struct rt6_info *rt6_info;
2879 struct fib6_config r_cfg;
2880 struct mx6_config mxc;
2881 struct list_head next;
2882};
2883
2884static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2885{
2886 struct rt6_nh *nh;
2887
2888 list_for_each_entry(nh, rt6_nh_list, next) {
2889 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2890 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2891 nh->r_cfg.fc_ifindex);
2892 }
2893}
2894
2895static int ip6_route_info_append(struct list_head *rt6_nh_list,
2896 struct rt6_info *rt, struct fib6_config *r_cfg)
2897{
2898 struct rt6_nh *nh;
2899 struct rt6_info *rtnh;
2900 int err = -EEXIST;
2901
2902 list_for_each_entry(nh, rt6_nh_list, next) {
2903 /* check if rt6_info already exists */
2904 rtnh = nh->rt6_info;
2905
2906 if (rtnh->dst.dev == rt->dst.dev &&
2907 rtnh->rt6i_idev == rt->rt6i_idev &&
2908 ipv6_addr_equal(&rtnh->rt6i_gateway,
2909 &rt->rt6i_gateway))
2910 return err;
2911 }
2912
2913 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2914 if (!nh)
2915 return -ENOMEM;
2916 nh->rt6_info = rt;
2917 err = ip6_convert_metrics(&nh->mxc, r_cfg);
2918 if (err) {
2919 kfree(nh);
2920 return err;
2921 }
2922 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2923 list_add_tail(&nh->next, rt6_nh_list);
2924
2925 return 0;
2926}
2927
2928static int ip6_route_multipath_add(struct fib6_config *cfg)
51ebd318
ND
2929{
2930 struct fib6_config r_cfg;
2931 struct rtnexthop *rtnh;
6b9ea5a6
RP
2932 struct rt6_info *rt;
2933 struct rt6_nh *err_nh;
2934 struct rt6_nh *nh, *nh_safe;
51ebd318
ND
2935 int remaining;
2936 int attrlen;
6b9ea5a6
RP
2937 int err = 1;
2938 int nhn = 0;
2939 int replace = (cfg->fc_nlinfo.nlh &&
2940 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2941 LIST_HEAD(rt6_nh_list);
51ebd318 2942
35f1b4e9 2943 remaining = cfg->fc_mp_len;
51ebd318 2944 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 2945
6b9ea5a6
RP
2946 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2947 * rt6_info structs per nexthop
2948 */
51ebd318
ND
2949 while (rtnh_ok(rtnh, remaining)) {
2950 memcpy(&r_cfg, cfg, sizeof(*cfg));
2951 if (rtnh->rtnh_ifindex)
2952 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2953
2954 attrlen = rtnh_attrlen(rtnh);
2955 if (attrlen > 0) {
2956 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2957
2958 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2959 if (nla) {
67b61f6c 2960 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
2961 r_cfg.fc_flags |= RTF_GATEWAY;
2962 }
19e42e45
RP
2963 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2964 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2965 if (nla)
2966 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 2967 }
6b9ea5a6 2968
8c5b83f0
RP
2969 rt = ip6_route_info_create(&r_cfg);
2970 if (IS_ERR(rt)) {
2971 err = PTR_ERR(rt);
2972 rt = NULL;
6b9ea5a6 2973 goto cleanup;
8c5b83f0 2974 }
6b9ea5a6
RP
2975
2976 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 2977 if (err) {
6b9ea5a6
RP
2978 dst_free(&rt->dst);
2979 goto cleanup;
2980 }
2981
2982 rtnh = rtnh_next(rtnh, &remaining);
2983 }
2984
2985 err_nh = NULL;
2986 list_for_each_entry(nh, &rt6_nh_list, next) {
2987 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2988 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2989 nh->rt6_info = NULL;
2990 if (err) {
2991 if (replace && nhn)
2992 ip6_print_replace_route_err(&rt6_nh_list);
2993 err_nh = nh;
2994 goto add_errout;
51ebd318 2995 }
6b9ea5a6 2996
1a72418b 2997 /* Because each route is added like a single route we remove
27596472
MK
2998 * these flags after the first nexthop: if there is a collision,
2999 * we have already failed to add the first nexthop:
3000 * fib6_add_rt2node() has rejected it; when replacing, old
3001 * nexthops have been replaced by first new, the rest should
3002 * be added to it.
1a72418b 3003 */
27596472
MK
3004 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3005 NLM_F_REPLACE);
6b9ea5a6
RP
3006 nhn++;
3007 }
3008
3009 goto cleanup;
3010
3011add_errout:
3012 /* Delete routes that were already added */
3013 list_for_each_entry(nh, &rt6_nh_list, next) {
3014 if (err_nh == nh)
3015 break;
3016 ip6_route_del(&nh->r_cfg);
3017 }
3018
3019cleanup:
3020 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3021 if (nh->rt6_info)
3022 dst_free(&nh->rt6_info->dst);
52fe51f8 3023 kfree(nh->mxc.mx);
6b9ea5a6
RP
3024 list_del(&nh->next);
3025 kfree(nh);
3026 }
3027
3028 return err;
3029}
3030
3031static int ip6_route_multipath_del(struct fib6_config *cfg)
3032{
3033 struct fib6_config r_cfg;
3034 struct rtnexthop *rtnh;
3035 int remaining;
3036 int attrlen;
3037 int err = 1, last_err = 0;
3038
3039 remaining = cfg->fc_mp_len;
3040 rtnh = (struct rtnexthop *)cfg->fc_mp;
3041
3042 /* Parse a Multipath Entry */
3043 while (rtnh_ok(rtnh, remaining)) {
3044 memcpy(&r_cfg, cfg, sizeof(*cfg));
3045 if (rtnh->rtnh_ifindex)
3046 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3047
3048 attrlen = rtnh_attrlen(rtnh);
3049 if (attrlen > 0) {
3050 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3051
3052 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3053 if (nla) {
3054 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3055 r_cfg.fc_flags |= RTF_GATEWAY;
3056 }
3057 }
3058 err = ip6_route_del(&r_cfg);
3059 if (err)
3060 last_err = err;
3061
51ebd318
ND
3062 rtnh = rtnh_next(rtnh, &remaining);
3063 }
3064
3065 return last_err;
3066}
3067
67ba4152 3068static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3069{
86872cb5
TG
3070 struct fib6_config cfg;
3071 int err;
1da177e4 3072
86872cb5
TG
3073 err = rtm_to_fib6_config(skb, nlh, &cfg);
3074 if (err < 0)
3075 return err;
3076
51ebd318 3077 if (cfg.fc_mp)
6b9ea5a6 3078 return ip6_route_multipath_del(&cfg);
51ebd318
ND
3079 else
3080 return ip6_route_del(&cfg);
1da177e4
LT
3081}
3082
67ba4152 3083static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3084{
86872cb5
TG
3085 struct fib6_config cfg;
3086 int err;
1da177e4 3087
86872cb5
TG
3088 err = rtm_to_fib6_config(skb, nlh, &cfg);
3089 if (err < 0)
3090 return err;
3091
51ebd318 3092 if (cfg.fc_mp)
6b9ea5a6 3093 return ip6_route_multipath_add(&cfg);
51ebd318
ND
3094 else
3095 return ip6_route_add(&cfg);
1da177e4
LT
3096}
3097
19e42e45 3098static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f
TG
3099{
3100 return NLMSG_ALIGN(sizeof(struct rtmsg))
3101 + nla_total_size(16) /* RTA_SRC */
3102 + nla_total_size(16) /* RTA_DST */
3103 + nla_total_size(16) /* RTA_GATEWAY */
3104 + nla_total_size(16) /* RTA_PREFSRC */
3105 + nla_total_size(4) /* RTA_TABLE */
3106 + nla_total_size(4) /* RTA_IIF */
3107 + nla_total_size(4) /* RTA_OIF */
3108 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3109 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3110 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3111 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3112 + nla_total_size(1) /* RTA_PREF */
61adedf3 3113 + lwtunnel_get_encap_size(rt->dst.lwtstate);
339bf98f
TG
3114}
3115
191cd582
BH
3116static int rt6_fill_node(struct net *net,
3117 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 3118 struct in6_addr *dst, struct in6_addr *src,
15e47304 3119 int iif, int type, u32 portid, u32 seq,
7bc570c8 3120 int prefix, int nowait, unsigned int flags)
1da177e4 3121{
4b32b5ad 3122 u32 metrics[RTAX_MAX];
1da177e4 3123 struct rtmsg *rtm;
2d7202bf 3124 struct nlmsghdr *nlh;
e3703b3d 3125 long expires;
9e762a4a 3126 u32 table;
1da177e4
LT
3127
3128 if (prefix) { /* user wants prefix routes only */
3129 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3130 /* success since this is not a prefix route */
3131 return 1;
3132 }
3133 }
3134
15e47304 3135 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 3136 if (!nlh)
26932566 3137 return -EMSGSIZE;
2d7202bf
TG
3138
3139 rtm = nlmsg_data(nlh);
1da177e4
LT
3140 rtm->rtm_family = AF_INET6;
3141 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3142 rtm->rtm_src_len = rt->rt6i_src.plen;
3143 rtm->rtm_tos = 0;
c71099ac 3144 if (rt->rt6i_table)
9e762a4a 3145 table = rt->rt6i_table->tb6_id;
c71099ac 3146 else
9e762a4a
PM
3147 table = RT6_TABLE_UNSPEC;
3148 rtm->rtm_table = table;
c78679e8
DM
3149 if (nla_put_u32(skb, RTA_TABLE, table))
3150 goto nla_put_failure;
ef2c7d7b
ND
3151 if (rt->rt6i_flags & RTF_REJECT) {
3152 switch (rt->dst.error) {
3153 case -EINVAL:
3154 rtm->rtm_type = RTN_BLACKHOLE;
3155 break;
3156 case -EACCES:
3157 rtm->rtm_type = RTN_PROHIBIT;
3158 break;
b4949ab2
ND
3159 case -EAGAIN:
3160 rtm->rtm_type = RTN_THROW;
3161 break;
ef2c7d7b
ND
3162 default:
3163 rtm->rtm_type = RTN_UNREACHABLE;
3164 break;
3165 }
3166 }
38308473 3167 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 3168 rtm->rtm_type = RTN_LOCAL;
d1918542 3169 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
3170 rtm->rtm_type = RTN_LOCAL;
3171 else
3172 rtm->rtm_type = RTN_UNICAST;
3173 rtm->rtm_flags = 0;
35103d11 3174 if (!netif_carrier_ok(rt->dst.dev)) {
cea45e20 3175 rtm->rtm_flags |= RTNH_F_LINKDOWN;
35103d11
AG
3176 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3177 rtm->rtm_flags |= RTNH_F_DEAD;
3178 }
1da177e4
LT
3179 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3180 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 3181 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 3182 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
3183 else if (rt->rt6i_flags & RTF_ADDRCONF) {
3184 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3185 rtm->rtm_protocol = RTPROT_RA;
3186 else
3187 rtm->rtm_protocol = RTPROT_KERNEL;
3188 }
1da177e4 3189
38308473 3190 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
3191 rtm->rtm_flags |= RTM_F_CLONED;
3192
3193 if (dst) {
930345ea 3194 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 3195 goto nla_put_failure;
1ab1457c 3196 rtm->rtm_dst_len = 128;
1da177e4 3197 } else if (rtm->rtm_dst_len)
930345ea 3198 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 3199 goto nla_put_failure;
1da177e4
LT
3200#ifdef CONFIG_IPV6_SUBTREES
3201 if (src) {
930345ea 3202 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 3203 goto nla_put_failure;
1ab1457c 3204 rtm->rtm_src_len = 128;
c78679e8 3205 } else if (rtm->rtm_src_len &&
930345ea 3206 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 3207 goto nla_put_failure;
1da177e4 3208#endif
7bc570c8
YH
3209 if (iif) {
3210#ifdef CONFIG_IPV6_MROUTE
3211 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 3212 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
3213 if (err <= 0) {
3214 if (!nowait) {
3215 if (err == 0)
3216 return 0;
3217 goto nla_put_failure;
3218 } else {
3219 if (err == -EMSGSIZE)
3220 goto nla_put_failure;
3221 }
3222 }
3223 } else
3224#endif
c78679e8
DM
3225 if (nla_put_u32(skb, RTA_IIF, iif))
3226 goto nla_put_failure;
7bc570c8 3227 } else if (dst) {
1da177e4 3228 struct in6_addr saddr_buf;
c78679e8 3229 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 3230 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3231 goto nla_put_failure;
1da177e4 3232 }
2d7202bf 3233
c3968a85
DW
3234 if (rt->rt6i_prefsrc.plen) {
3235 struct in6_addr saddr_buf;
4e3fd7a0 3236 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 3237 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3238 goto nla_put_failure;
c3968a85
DW
3239 }
3240
4b32b5ad
MKL
3241 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3242 if (rt->rt6i_pmtu)
3243 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3244 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
3245 goto nla_put_failure;
3246
dd0cbf29 3247 if (rt->rt6i_flags & RTF_GATEWAY) {
930345ea 3248 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
94f826b8 3249 goto nla_put_failure;
94f826b8 3250 }
2d7202bf 3251
c78679e8
DM
3252 if (rt->dst.dev &&
3253 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3254 goto nla_put_failure;
3255 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3256 goto nla_put_failure;
8253947e
LW
3257
3258 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 3259
87a50699 3260 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 3261 goto nla_put_failure;
2d7202bf 3262
c78ba6d6
LR
3263 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3264 goto nla_put_failure;
3265
61adedf3 3266 lwtunnel_fill_encap(skb, rt->dst.lwtstate);
19e42e45 3267
053c095a
JB
3268 nlmsg_end(skb, nlh);
3269 return 0;
2d7202bf
TG
3270
3271nla_put_failure:
26932566
PM
3272 nlmsg_cancel(skb, nlh);
3273 return -EMSGSIZE;
1da177e4
LT
3274}
3275
1b43af54 3276int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
3277{
3278 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3279 int prefix;
3280
2d7202bf
TG
3281 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3282 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
3283 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3284 } else
3285 prefix = 0;
3286
191cd582
BH
3287 return rt6_fill_node(arg->net,
3288 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 3289 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 3290 prefix, 0, NLM_F_MULTI);
1da177e4
LT
3291}
3292
67ba4152 3293static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 3294{
3b1e0a65 3295 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
3296 struct nlattr *tb[RTA_MAX+1];
3297 struct rt6_info *rt;
1da177e4 3298 struct sk_buff *skb;
ab364a6f 3299 struct rtmsg *rtm;
4c9483b2 3300 struct flowi6 fl6;
72331bc0 3301 int err, iif = 0, oif = 0;
1da177e4 3302
ab364a6f
TG
3303 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3304 if (err < 0)
3305 goto errout;
1da177e4 3306
ab364a6f 3307 err = -EINVAL;
4c9483b2 3308 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
3309 rtm = nlmsg_data(nlh);
3310 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
1da177e4 3311
ab364a6f
TG
3312 if (tb[RTA_SRC]) {
3313 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3314 goto errout;
3315
4e3fd7a0 3316 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
3317 }
3318
3319 if (tb[RTA_DST]) {
3320 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3321 goto errout;
3322
4e3fd7a0 3323 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
3324 }
3325
3326 if (tb[RTA_IIF])
3327 iif = nla_get_u32(tb[RTA_IIF]);
3328
3329 if (tb[RTA_OIF])
72331bc0 3330 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 3331
2e47b291
LC
3332 if (tb[RTA_MARK])
3333 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3334
1da177e4
LT
3335 if (iif) {
3336 struct net_device *dev;
72331bc0
SL
3337 int flags = 0;
3338
5578689a 3339 dev = __dev_get_by_index(net, iif);
1da177e4
LT
3340 if (!dev) {
3341 err = -ENODEV;
ab364a6f 3342 goto errout;
1da177e4 3343 }
72331bc0
SL
3344
3345 fl6.flowi6_iif = iif;
3346
3347 if (!ipv6_addr_any(&fl6.saddr))
3348 flags |= RT6_LOOKUP_F_HAS_SADDR;
3349
3350 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3351 flags);
3352 } else {
3353 fl6.flowi6_oif = oif;
3354
ca254490
DA
3355 if (netif_index_is_l3_master(net, oif)) {
3356 fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3357 FLOWI_FLAG_SKIP_NH_OIF;
3358 }
3359
72331bc0 3360 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
3361 }
3362
ab364a6f 3363 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 3364 if (!skb) {
94e187c0 3365 ip6_rt_put(rt);
ab364a6f
TG
3366 err = -ENOBUFS;
3367 goto errout;
3368 }
1da177e4 3369
ab364a6f
TG
3370 /* Reserve room for dummy headers, this skb can pass
3371 through good chunk of routing engine.
3372 */
459a98ed 3373 skb_reset_mac_header(skb);
ab364a6f 3374 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 3375
d8d1f30b 3376 skb_dst_set(skb, &rt->dst);
1da177e4 3377
4c9483b2 3378 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 3379 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 3380 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 3381 if (err < 0) {
ab364a6f
TG
3382 kfree_skb(skb);
3383 goto errout;
1da177e4
LT
3384 }
3385
15e47304 3386 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 3387errout:
1da177e4 3388 return err;
1da177e4
LT
3389}
3390
37a1d361
RP
3391void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3392 unsigned int nlm_flags)
1da177e4
LT
3393{
3394 struct sk_buff *skb;
5578689a 3395 struct net *net = info->nl_net;
528c4ceb
DL
3396 u32 seq;
3397 int err;
3398
3399 err = -ENOBUFS;
38308473 3400 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 3401
19e42e45 3402 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 3403 if (!skb)
21713ebc
TG
3404 goto errout;
3405
191cd582 3406 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
37a1d361 3407 event, info->portid, seq, 0, 0, nlm_flags);
26932566
PM
3408 if (err < 0) {
3409 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3410 WARN_ON(err == -EMSGSIZE);
3411 kfree_skb(skb);
3412 goto errout;
3413 }
15e47304 3414 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
3415 info->nlh, gfp_any());
3416 return;
21713ebc
TG
3417errout:
3418 if (err < 0)
5578689a 3419 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
3420}
3421
8ed67789 3422static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 3423 unsigned long event, void *ptr)
8ed67789 3424{
351638e7 3425 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 3426 struct net *net = dev_net(dev);
8ed67789
DL
3427
3428 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 3429 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
3430 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3431#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3432 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 3433 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 3434 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
3435 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3436#endif
3437 }
3438
3439 return NOTIFY_OK;
3440}
3441
1da177e4
LT
3442/*
3443 * /proc
3444 */
3445
3446#ifdef CONFIG_PROC_FS
3447
33120b30
AD
3448static const struct file_operations ipv6_route_proc_fops = {
3449 .owner = THIS_MODULE,
3450 .open = ipv6_route_open,
3451 .read = seq_read,
3452 .llseek = seq_lseek,
8d2ca1d7 3453 .release = seq_release_net,
33120b30
AD
3454};
3455
1da177e4
LT
3456static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3457{
69ddb805 3458 struct net *net = (struct net *)seq->private;
1da177e4 3459 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
3460 net->ipv6.rt6_stats->fib_nodes,
3461 net->ipv6.rt6_stats->fib_route_nodes,
3462 net->ipv6.rt6_stats->fib_rt_alloc,
3463 net->ipv6.rt6_stats->fib_rt_entries,
3464 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 3465 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 3466 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
3467
3468 return 0;
3469}
3470
3471static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3472{
de05c557 3473 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
3474}
3475
9a32144e 3476static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
3477 .owner = THIS_MODULE,
3478 .open = rt6_stats_seq_open,
3479 .read = seq_read,
3480 .llseek = seq_lseek,
b6fcbdb4 3481 .release = single_release_net,
1da177e4
LT
3482};
3483#endif /* CONFIG_PROC_FS */
3484
3485#ifdef CONFIG_SYSCTL
3486
1da177e4 3487static
fe2c6338 3488int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
3489 void __user *buffer, size_t *lenp, loff_t *ppos)
3490{
c486da34
LAG
3491 struct net *net;
3492 int delay;
3493 if (!write)
1da177e4 3494 return -EINVAL;
c486da34
LAG
3495
3496 net = (struct net *)ctl->extra1;
3497 delay = net->ipv6.sysctl.flush_delay;
3498 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3499 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3500 return 0;
1da177e4
LT
3501}
3502
fe2c6338 3503struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3504 {
1da177e4 3505 .procname = "flush",
4990509f 3506 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3507 .maxlen = sizeof(int),
89c8b3a1 3508 .mode = 0200,
6d9f239a 3509 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3510 },
3511 {
1da177e4 3512 .procname = "gc_thresh",
9a7ec3a9 3513 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3514 .maxlen = sizeof(int),
3515 .mode = 0644,
6d9f239a 3516 .proc_handler = proc_dointvec,
1da177e4
LT
3517 },
3518 {
1da177e4 3519 .procname = "max_size",
4990509f 3520 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3521 .maxlen = sizeof(int),
3522 .mode = 0644,
6d9f239a 3523 .proc_handler = proc_dointvec,
1da177e4
LT
3524 },
3525 {
1da177e4 3526 .procname = "gc_min_interval",
4990509f 3527 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3528 .maxlen = sizeof(int),
3529 .mode = 0644,
6d9f239a 3530 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3531 },
3532 {
1da177e4 3533 .procname = "gc_timeout",
4990509f 3534 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3535 .maxlen = sizeof(int),
3536 .mode = 0644,
6d9f239a 3537 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3538 },
3539 {
1da177e4 3540 .procname = "gc_interval",
4990509f 3541 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3542 .maxlen = sizeof(int),
3543 .mode = 0644,
6d9f239a 3544 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3545 },
3546 {
1da177e4 3547 .procname = "gc_elasticity",
4990509f 3548 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3549 .maxlen = sizeof(int),
3550 .mode = 0644,
f3d3f616 3551 .proc_handler = proc_dointvec,
1da177e4
LT
3552 },
3553 {
1da177e4 3554 .procname = "mtu_expires",
4990509f 3555 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3556 .maxlen = sizeof(int),
3557 .mode = 0644,
6d9f239a 3558 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3559 },
3560 {
1da177e4 3561 .procname = "min_adv_mss",
4990509f 3562 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3563 .maxlen = sizeof(int),
3564 .mode = 0644,
f3d3f616 3565 .proc_handler = proc_dointvec,
1da177e4
LT
3566 },
3567 {
1da177e4 3568 .procname = "gc_min_interval_ms",
4990509f 3569 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3570 .maxlen = sizeof(int),
3571 .mode = 0644,
6d9f239a 3572 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3573 },
f8572d8f 3574 { }
1da177e4
LT
3575};
3576
2c8c1e72 3577struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3578{
3579 struct ctl_table *table;
3580
3581 table = kmemdup(ipv6_route_table_template,
3582 sizeof(ipv6_route_table_template),
3583 GFP_KERNEL);
5ee09105
YH
3584
3585 if (table) {
3586 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3587 table[0].extra1 = net;
86393e52 3588 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3589 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3590 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3591 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3592 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3593 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3594 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3595 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3596 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3597
3598 /* Don't export sysctls to unprivileged users */
3599 if (net->user_ns != &init_user_ns)
3600 table[0].procname = NULL;
5ee09105
YH
3601 }
3602
760f2d01
DL
3603 return table;
3604}
1da177e4
LT
3605#endif
3606
2c8c1e72 3607static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3608{
633d424b 3609 int ret = -ENOMEM;
8ed67789 3610
86393e52
AD
3611 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3612 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3613
fc66f95c
ED
3614 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3615 goto out_ip6_dst_ops;
3616
8ed67789
DL
3617 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3618 sizeof(*net->ipv6.ip6_null_entry),
3619 GFP_KERNEL);
3620 if (!net->ipv6.ip6_null_entry)
fc66f95c 3621 goto out_ip6_dst_entries;
d8d1f30b 3622 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3623 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3624 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3625 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3626 ip6_template_metrics, true);
8ed67789
DL
3627
3628#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3629 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3630 sizeof(*net->ipv6.ip6_prohibit_entry),
3631 GFP_KERNEL);
68fffc67
PZ
3632 if (!net->ipv6.ip6_prohibit_entry)
3633 goto out_ip6_null_entry;
d8d1f30b 3634 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3635 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3636 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3637 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3638 ip6_template_metrics, true);
8ed67789
DL
3639
3640 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3641 sizeof(*net->ipv6.ip6_blk_hole_entry),
3642 GFP_KERNEL);
68fffc67
PZ
3643 if (!net->ipv6.ip6_blk_hole_entry)
3644 goto out_ip6_prohibit_entry;
d8d1f30b 3645 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3646 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3647 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3648 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3649 ip6_template_metrics, true);
8ed67789
DL
3650#endif
3651
b339a47c
PZ
3652 net->ipv6.sysctl.flush_delay = 0;
3653 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3654 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3655 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3656 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3657 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3658 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3659 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3660
6891a346
BT
3661 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3662
8ed67789
DL
3663 ret = 0;
3664out:
3665 return ret;
f2fc6a54 3666
68fffc67
PZ
3667#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3668out_ip6_prohibit_entry:
3669 kfree(net->ipv6.ip6_prohibit_entry);
3670out_ip6_null_entry:
3671 kfree(net->ipv6.ip6_null_entry);
3672#endif
fc66f95c
ED
3673out_ip6_dst_entries:
3674 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3675out_ip6_dst_ops:
f2fc6a54 3676 goto out;
cdb18761
DL
3677}
3678
2c8c1e72 3679static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3680{
8ed67789
DL
3681 kfree(net->ipv6.ip6_null_entry);
3682#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3683 kfree(net->ipv6.ip6_prohibit_entry);
3684 kfree(net->ipv6.ip6_blk_hole_entry);
3685#endif
41bb78b4 3686 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3687}
3688
d189634e
TG
3689static int __net_init ip6_route_net_init_late(struct net *net)
3690{
3691#ifdef CONFIG_PROC_FS
d4beaa66
G
3692 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3693 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
3694#endif
3695 return 0;
3696}
3697
3698static void __net_exit ip6_route_net_exit_late(struct net *net)
3699{
3700#ifdef CONFIG_PROC_FS
ece31ffd
G
3701 remove_proc_entry("ipv6_route", net->proc_net);
3702 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
3703#endif
3704}
3705
cdb18761
DL
3706static struct pernet_operations ip6_route_net_ops = {
3707 .init = ip6_route_net_init,
3708 .exit = ip6_route_net_exit,
3709};
3710
c3426b47
DM
3711static int __net_init ipv6_inetpeer_init(struct net *net)
3712{
3713 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3714
3715 if (!bp)
3716 return -ENOMEM;
3717 inet_peer_base_init(bp);
3718 net->ipv6.peers = bp;
3719 return 0;
3720}
3721
3722static void __net_exit ipv6_inetpeer_exit(struct net *net)
3723{
3724 struct inet_peer_base *bp = net->ipv6.peers;
3725
3726 net->ipv6.peers = NULL;
56a6b248 3727 inetpeer_invalidate_tree(bp);
c3426b47
DM
3728 kfree(bp);
3729}
3730
2b823f72 3731static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3732 .init = ipv6_inetpeer_init,
3733 .exit = ipv6_inetpeer_exit,
3734};
3735
d189634e
TG
3736static struct pernet_operations ip6_route_net_late_ops = {
3737 .init = ip6_route_net_init_late,
3738 .exit = ip6_route_net_exit_late,
3739};
3740
8ed67789
DL
3741static struct notifier_block ip6_route_dev_notifier = {
3742 .notifier_call = ip6_route_dev_notify,
3743 .priority = 0,
3744};
3745
433d49c3 3746int __init ip6_route_init(void)
1da177e4 3747{
433d49c3 3748 int ret;
8d0b94af 3749 int cpu;
433d49c3 3750
9a7ec3a9
DL
3751 ret = -ENOMEM;
3752 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3753 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3754 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3755 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3756 goto out;
14e50e57 3757
fc66f95c 3758 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3759 if (ret)
bdb3289f 3760 goto out_kmem_cache;
bdb3289f 3761
c3426b47
DM
3762 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3763 if (ret)
e8803b6c 3764 goto out_dst_entries;
2a0c451a 3765
7e52b33b
DM
3766 ret = register_pernet_subsys(&ip6_route_net_ops);
3767 if (ret)
3768 goto out_register_inetpeer;
c3426b47 3769
5dc121e9
AE
3770 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3771
8ed67789
DL
3772 /* Registering of the loopback is done before this portion of code,
3773 * the loopback reference in rt6_info will not be taken, do it
3774 * manually for init_net */
d8d1f30b 3775 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3776 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3777 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3778 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3779 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3780 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3781 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3782 #endif
e8803b6c 3783 ret = fib6_init();
433d49c3 3784 if (ret)
8ed67789 3785 goto out_register_subsys;
433d49c3 3786
433d49c3
DL
3787 ret = xfrm6_init();
3788 if (ret)
e8803b6c 3789 goto out_fib6_init;
c35b7e72 3790
433d49c3
DL
3791 ret = fib6_rules_init();
3792 if (ret)
3793 goto xfrm6_init;
7e5449c2 3794
d189634e
TG
3795 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3796 if (ret)
3797 goto fib6_rules_init;
3798
433d49c3 3799 ret = -ENOBUFS;
c7ac8679
GR
3800 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3801 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3802 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3803 goto out_register_late_subsys;
c127ea2c 3804
8ed67789 3805 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3806 if (ret)
d189634e 3807 goto out_register_late_subsys;
8ed67789 3808
8d0b94af
MKL
3809 for_each_possible_cpu(cpu) {
3810 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3811
3812 INIT_LIST_HEAD(&ul->head);
3813 spin_lock_init(&ul->lock);
3814 }
3815
433d49c3
DL
3816out:
3817 return ret;
3818
d189634e
TG
3819out_register_late_subsys:
3820 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3821fib6_rules_init:
433d49c3
DL
3822 fib6_rules_cleanup();
3823xfrm6_init:
433d49c3 3824 xfrm6_fini();
2a0c451a
TG
3825out_fib6_init:
3826 fib6_gc_cleanup();
8ed67789
DL
3827out_register_subsys:
3828 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3829out_register_inetpeer:
3830 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3831out_dst_entries:
3832 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3833out_kmem_cache:
f2fc6a54 3834 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3835 goto out;
1da177e4
LT
3836}
3837
3838void ip6_route_cleanup(void)
3839{
8ed67789 3840 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3841 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3842 fib6_rules_cleanup();
1da177e4 3843 xfrm6_fini();
1da177e4 3844 fib6_gc_cleanup();
c3426b47 3845 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3846 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3847 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3848 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3849}