Merge tag 'wireless-drivers-for-davem-2016-01-29' of git://git.kernel.org/pub/scm...
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
904af04d 57#include <net/dst_metadata.h>
1da177e4 58#include <net/xfrm.h>
8d71740c 59#include <net/netevent.h>
21713ebc 60#include <net/netlink.h>
51ebd318 61#include <net/nexthop.h>
19e42e45 62#include <net/lwtunnel.h>
904af04d 63#include <net/ip_tunnels.h>
ca254490 64#include <net/l3mdev.h>
b811580d 65#include <trace/events/fib6.h>
1da177e4
LT
66
67#include <asm/uaccess.h>
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
afc154e9 73enum rt6_nud_state {
7e980569
JB
74 RT6_NUD_FAIL_HARD = -3,
75 RT6_NUD_FAIL_PROBE = -2,
76 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
77 RT6_NUD_SUCCEED = 1
78};
79
83a09abd 80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
4b32b5ad 99static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
1da177e4 101
70ceb4f5 102#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 103static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
104 const struct in6_addr *prefix, int prefixlen,
105 const struct in6_addr *gwaddr, int ifindex,
95c96174 106 unsigned int pref);
efa2cea0 107static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
108 const struct in6_addr *prefix, int prefixlen,
109 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
110#endif
111
8d0b94af
MKL
112struct uncached_list {
113 spinlock_t lock;
114 struct list_head head;
115};
116
117static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
118
119static void rt6_uncached_list_add(struct rt6_info *rt)
120{
121 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
122
123 rt->dst.flags |= DST_NOCACHE;
124 rt->rt6i_uncached_list = ul;
125
126 spin_lock_bh(&ul->lock);
127 list_add_tail(&rt->rt6i_uncached, &ul->head);
128 spin_unlock_bh(&ul->lock);
129}
130
131static void rt6_uncached_list_del(struct rt6_info *rt)
132{
133 if (!list_empty(&rt->rt6i_uncached)) {
134 struct uncached_list *ul = rt->rt6i_uncached_list;
135
136 spin_lock_bh(&ul->lock);
137 list_del(&rt->rt6i_uncached);
138 spin_unlock_bh(&ul->lock);
139 }
140}
141
142static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
143{
144 struct net_device *loopback_dev = net->loopback_dev;
145 int cpu;
146
e332bc67
EB
147 if (dev == loopback_dev)
148 return;
149
8d0b94af
MKL
150 for_each_possible_cpu(cpu) {
151 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
152 struct rt6_info *rt;
153
154 spin_lock_bh(&ul->lock);
155 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
156 struct inet6_dev *rt_idev = rt->rt6i_idev;
157 struct net_device *rt_dev = rt->dst.dev;
158
e332bc67 159 if (rt_idev->dev == dev) {
8d0b94af
MKL
160 rt->rt6i_idev = in6_dev_get(loopback_dev);
161 in6_dev_put(rt_idev);
162 }
163
e332bc67 164 if (rt_dev == dev) {
8d0b94af
MKL
165 rt->dst.dev = loopback_dev;
166 dev_hold(rt->dst.dev);
167 dev_put(rt_dev);
168 }
169 }
170 spin_unlock_bh(&ul->lock);
171 }
172}
173
d52d3997
MKL
174static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
175{
176 return dst_metrics_write_ptr(rt->dst.from);
177}
178
06582540
DM
179static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
180{
4b32b5ad 181 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 182
d52d3997
MKL
183 if (rt->rt6i_flags & RTF_PCPU)
184 return rt6_pcpu_cow_metrics(rt);
185 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
186 return NULL;
187 else
3b471175 188 return dst_cow_metrics_generic(dst, old);
06582540
DM
189}
190
f894cbf8
DM
191static inline const void *choose_neigh_daddr(struct rt6_info *rt,
192 struct sk_buff *skb,
193 const void *daddr)
39232973
DM
194{
195 struct in6_addr *p = &rt->rt6i_gateway;
196
a7563f34 197 if (!ipv6_addr_any(p))
39232973 198 return (const void *) p;
f894cbf8
DM
199 else if (skb)
200 return &ipv6_hdr(skb)->daddr;
39232973
DM
201 return daddr;
202}
203
f894cbf8
DM
204static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
205 struct sk_buff *skb,
206 const void *daddr)
d3aaeb38 207{
39232973
DM
208 struct rt6_info *rt = (struct rt6_info *) dst;
209 struct neighbour *n;
210
f894cbf8 211 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 212 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
213 if (n)
214 return n;
215 return neigh_create(&nd_tbl, daddr, dst->dev);
216}
217
9a7ec3a9 218static struct dst_ops ip6_dst_ops_template = {
1da177e4 219 .family = AF_INET6,
1da177e4
LT
220 .gc = ip6_dst_gc,
221 .gc_thresh = 1024,
222 .check = ip6_dst_check,
0dbaee3b 223 .default_advmss = ip6_default_advmss,
ebb762f2 224 .mtu = ip6_mtu,
06582540 225 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
226 .destroy = ip6_dst_destroy,
227 .ifdown = ip6_dst_ifdown,
228 .negative_advice = ip6_negative_advice,
229 .link_failure = ip6_link_failure,
230 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 231 .redirect = rt6_do_redirect,
9f8955cc 232 .local_out = __ip6_local_out,
d3aaeb38 233 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
234};
235
ebb762f2 236static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 237{
618f9bc7
SK
238 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
239
240 return mtu ? : dst->dev->mtu;
ec831ea7
RD
241}
242
6700c270
DM
243static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
244 struct sk_buff *skb, u32 mtu)
14e50e57
DM
245{
246}
247
6700c270
DM
248static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
249 struct sk_buff *skb)
b587ee3b
DM
250{
251}
252
14e50e57
DM
253static struct dst_ops ip6_dst_blackhole_ops = {
254 .family = AF_INET6,
14e50e57
DM
255 .destroy = ip6_dst_destroy,
256 .check = ip6_dst_check,
ebb762f2 257 .mtu = ip6_blackhole_mtu,
214f45c9 258 .default_advmss = ip6_default_advmss,
14e50e57 259 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 260 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 261 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 262 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
263};
264
62fa8a84 265static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 266 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
267};
268
fb0af4c7 269static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
270 .dst = {
271 .__refcnt = ATOMIC_INIT(1),
272 .__use = 1,
2c20cbd7 273 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 274 .error = -ENETUNREACH,
d8d1f30b
CG
275 .input = ip6_pkt_discard,
276 .output = ip6_pkt_discard_out,
1da177e4
LT
277 },
278 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 279 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
280 .rt6i_metric = ~(u32) 0,
281 .rt6i_ref = ATOMIC_INIT(1),
282};
283
101367c2
TG
284#ifdef CONFIG_IPV6_MULTIPLE_TABLES
285
fb0af4c7 286static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
287 .dst = {
288 .__refcnt = ATOMIC_INIT(1),
289 .__use = 1,
2c20cbd7 290 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 291 .error = -EACCES,
d8d1f30b
CG
292 .input = ip6_pkt_prohibit,
293 .output = ip6_pkt_prohibit_out,
101367c2
TG
294 },
295 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 296 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
297 .rt6i_metric = ~(u32) 0,
298 .rt6i_ref = ATOMIC_INIT(1),
299};
300
fb0af4c7 301static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
302 .dst = {
303 .__refcnt = ATOMIC_INIT(1),
304 .__use = 1,
2c20cbd7 305 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 306 .error = -EINVAL,
d8d1f30b 307 .input = dst_discard,
ede2059d 308 .output = dst_discard_out,
101367c2
TG
309 },
310 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 311 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
312 .rt6i_metric = ~(u32) 0,
313 .rt6i_ref = ATOMIC_INIT(1),
314};
315
316#endif
317
ebfa45f0
MKL
318static void rt6_info_init(struct rt6_info *rt)
319{
320 struct dst_entry *dst = &rt->dst;
321
322 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
323 INIT_LIST_HEAD(&rt->rt6i_siblings);
324 INIT_LIST_HEAD(&rt->rt6i_uncached);
325}
326
1da177e4 327/* allocate dst with ip6_dst_ops */
d52d3997
MKL
328static struct rt6_info *__ip6_dst_alloc(struct net *net,
329 struct net_device *dev,
ad706862 330 int flags)
1da177e4 331{
97bab73f 332 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 333 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 334
ebfa45f0
MKL
335 if (rt)
336 rt6_info_init(rt);
8104891b 337
cf911662 338 return rt;
1da177e4
LT
339}
340
d52d3997
MKL
341static struct rt6_info *ip6_dst_alloc(struct net *net,
342 struct net_device *dev,
ad706862 343 int flags)
d52d3997 344{
ad706862 345 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
346
347 if (rt) {
348 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
349 if (rt->rt6i_pcpu) {
350 int cpu;
351
352 for_each_possible_cpu(cpu) {
353 struct rt6_info **p;
354
355 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
356 /* no one shares rt */
357 *p = NULL;
358 }
359 } else {
360 dst_destroy((struct dst_entry *)rt);
361 return NULL;
362 }
363 }
364
365 return rt;
366}
367
1da177e4
LT
368static void ip6_dst_destroy(struct dst_entry *dst)
369{
370 struct rt6_info *rt = (struct rt6_info *)dst;
ecd98837 371 struct dst_entry *from = dst->from;
8d0b94af 372 struct inet6_dev *idev;
1da177e4 373
4b32b5ad 374 dst_destroy_metrics_generic(dst);
87775312 375 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
376 rt6_uncached_list_del(rt);
377
378 idev = rt->rt6i_idev;
38308473 379 if (idev) {
1da177e4
LT
380 rt->rt6i_idev = NULL;
381 in6_dev_put(idev);
1ab1457c 382 }
1716a961 383
ecd98837
YH
384 dst->from = NULL;
385 dst_release(from);
b3419363
DM
386}
387
1da177e4
LT
388static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
389 int how)
390{
391 struct rt6_info *rt = (struct rt6_info *)dst;
392 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 393 struct net_device *loopback_dev =
c346dca1 394 dev_net(dev)->loopback_dev;
1da177e4 395
97cac082
DM
396 if (dev != loopback_dev) {
397 if (idev && idev->dev == dev) {
398 struct inet6_dev *loopback_idev =
399 in6_dev_get(loopback_dev);
400 if (loopback_idev) {
401 rt->rt6i_idev = loopback_idev;
402 in6_dev_put(idev);
403 }
404 }
1da177e4
LT
405 }
406}
407
5973fb1e
MKL
408static bool __rt6_check_expired(const struct rt6_info *rt)
409{
410 if (rt->rt6i_flags & RTF_EXPIRES)
411 return time_after(jiffies, rt->dst.expires);
412 else
413 return false;
414}
415
a50feda5 416static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 417{
1716a961
G
418 if (rt->rt6i_flags & RTF_EXPIRES) {
419 if (time_after(jiffies, rt->dst.expires))
a50feda5 420 return true;
1716a961 421 } else if (rt->dst.from) {
3fd91fb3 422 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 423 }
a50feda5 424 return false;
1da177e4
LT
425}
426
51ebd318
ND
427/* Multipath route selection:
428 * Hash based function using packet header and flowlabel.
429 * Adapted from fib_info_hashfn()
430 */
431static int rt6_info_hash_nhsfn(unsigned int candidate_count,
432 const struct flowi6 *fl6)
433{
644d0e65 434 return get_hash_from_flowi6(fl6) % candidate_count;
51ebd318
ND
435}
436
437static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
438 struct flowi6 *fl6, int oif,
439 int strict)
51ebd318
ND
440{
441 struct rt6_info *sibling, *next_sibling;
442 int route_choosen;
443
444 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
445 /* Don't change the route, if route_choosen == 0
446 * (siblings does not include ourself)
447 */
448 if (route_choosen)
449 list_for_each_entry_safe(sibling, next_sibling,
450 &match->rt6i_siblings, rt6i_siblings) {
451 route_choosen--;
452 if (route_choosen == 0) {
52bd4c0c
ND
453 if (rt6_score_route(sibling, oif, strict) < 0)
454 break;
51ebd318
ND
455 match = sibling;
456 break;
457 }
458 }
459 return match;
460}
461
1da177e4 462/*
c71099ac 463 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
464 */
465
8ed67789
DL
466static inline struct rt6_info *rt6_device_match(struct net *net,
467 struct rt6_info *rt,
b71d1d42 468 const struct in6_addr *saddr,
1da177e4 469 int oif,
d420895e 470 int flags)
1da177e4
LT
471{
472 struct rt6_info *local = NULL;
473 struct rt6_info *sprt;
474
dd3abc4e
YH
475 if (!oif && ipv6_addr_any(saddr))
476 goto out;
477
d8d1f30b 478 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 479 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
480
481 if (oif) {
1da177e4
LT
482 if (dev->ifindex == oif)
483 return sprt;
484 if (dev->flags & IFF_LOOPBACK) {
38308473 485 if (!sprt->rt6i_idev ||
1da177e4 486 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 487 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 488 continue;
17fb0b2b
DA
489 if (local &&
490 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
491 continue;
492 }
493 local = sprt;
494 }
dd3abc4e
YH
495 } else {
496 if (ipv6_chk_addr(net, saddr, dev,
497 flags & RT6_LOOKUP_F_IFACE))
498 return sprt;
1da177e4 499 }
dd3abc4e 500 }
1da177e4 501
dd3abc4e 502 if (oif) {
1da177e4
LT
503 if (local)
504 return local;
505
d420895e 506 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 507 return net->ipv6.ip6_null_entry;
1da177e4 508 }
dd3abc4e 509out:
1da177e4
LT
510 return rt;
511}
512
27097255 513#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
514struct __rt6_probe_work {
515 struct work_struct work;
516 struct in6_addr target;
517 struct net_device *dev;
518};
519
520static void rt6_probe_deferred(struct work_struct *w)
521{
522 struct in6_addr mcaddr;
523 struct __rt6_probe_work *work =
524 container_of(w, struct __rt6_probe_work, work);
525
526 addrconf_addr_solict_mult(&work->target, &mcaddr);
304d888b 527 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
c2f17e82 528 dev_put(work->dev);
662f5533 529 kfree(work);
c2f17e82
HFS
530}
531
27097255
YH
532static void rt6_probe(struct rt6_info *rt)
533{
990edb42 534 struct __rt6_probe_work *work;
f2c31e32 535 struct neighbour *neigh;
27097255
YH
536 /*
537 * Okay, this does not seem to be appropriate
538 * for now, however, we need to check if it
539 * is really so; aka Router Reachability Probing.
540 *
541 * Router Reachability Probe MUST be rate-limited
542 * to no more than one per minute.
543 */
2152caea 544 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 545 return;
2152caea
YH
546 rcu_read_lock_bh();
547 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
548 if (neigh) {
8d6c31bf
MKL
549 if (neigh->nud_state & NUD_VALID)
550 goto out;
551
990edb42 552 work = NULL;
2152caea 553 write_lock(&neigh->lock);
990edb42
MKL
554 if (!(neigh->nud_state & NUD_VALID) &&
555 time_after(jiffies,
556 neigh->updated +
557 rt->rt6i_idev->cnf.rtr_probe_interval)) {
558 work = kmalloc(sizeof(*work), GFP_ATOMIC);
559 if (work)
560 __neigh_set_probe_once(neigh);
c2f17e82 561 }
2152caea 562 write_unlock(&neigh->lock);
990edb42
MKL
563 } else {
564 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 565 }
990edb42
MKL
566
567 if (work) {
568 INIT_WORK(&work->work, rt6_probe_deferred);
569 work->target = rt->rt6i_gateway;
570 dev_hold(rt->dst.dev);
571 work->dev = rt->dst.dev;
572 schedule_work(&work->work);
573 }
574
8d6c31bf 575out:
2152caea 576 rcu_read_unlock_bh();
27097255
YH
577}
578#else
579static inline void rt6_probe(struct rt6_info *rt)
580{
27097255
YH
581}
582#endif
583
1da177e4 584/*
554cfb7e 585 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 586 */
b6f99a21 587static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 588{
d1918542 589 struct net_device *dev = rt->dst.dev;
161980f4 590 if (!oif || dev->ifindex == oif)
554cfb7e 591 return 2;
161980f4
DM
592 if ((dev->flags & IFF_LOOPBACK) &&
593 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
594 return 1;
595 return 0;
554cfb7e 596}
1da177e4 597
afc154e9 598static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 599{
f2c31e32 600 struct neighbour *neigh;
afc154e9 601 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 602
4d0c5911
YH
603 if (rt->rt6i_flags & RTF_NONEXTHOP ||
604 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 605 return RT6_NUD_SUCCEED;
145a3621
YH
606
607 rcu_read_lock_bh();
608 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
609 if (neigh) {
610 read_lock(&neigh->lock);
554cfb7e 611 if (neigh->nud_state & NUD_VALID)
afc154e9 612 ret = RT6_NUD_SUCCEED;
398bcbeb 613#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 614 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 615 ret = RT6_NUD_SUCCEED;
7e980569
JB
616 else
617 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 618#endif
145a3621 619 read_unlock(&neigh->lock);
afc154e9
HFS
620 } else {
621 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 622 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 623 }
145a3621
YH
624 rcu_read_unlock_bh();
625
a5a81f0b 626 return ret;
1da177e4
LT
627}
628
554cfb7e
YH
629static int rt6_score_route(struct rt6_info *rt, int oif,
630 int strict)
1da177e4 631{
a5a81f0b 632 int m;
1ab1457c 633
4d0c5911 634 m = rt6_check_dev(rt, oif);
77d16f45 635 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 636 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
637#ifdef CONFIG_IPV6_ROUTER_PREF
638 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
639#endif
afc154e9
HFS
640 if (strict & RT6_LOOKUP_F_REACHABLE) {
641 int n = rt6_check_neigh(rt);
642 if (n < 0)
643 return n;
644 }
554cfb7e
YH
645 return m;
646}
647
f11e6659 648static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
649 int *mpri, struct rt6_info *match,
650 bool *do_rr)
554cfb7e 651{
f11e6659 652 int m;
afc154e9 653 bool match_do_rr = false;
35103d11
AG
654 struct inet6_dev *idev = rt->rt6i_idev;
655 struct net_device *dev = rt->dst.dev;
656
657 if (dev && !netif_carrier_ok(dev) &&
658 idev->cnf.ignore_routes_with_linkdown)
659 goto out;
f11e6659
DM
660
661 if (rt6_check_expired(rt))
662 goto out;
663
664 m = rt6_score_route(rt, oif, strict);
7e980569 665 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
666 match_do_rr = true;
667 m = 0; /* lowest valid score */
7e980569 668 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 669 goto out;
afc154e9
HFS
670 }
671
672 if (strict & RT6_LOOKUP_F_REACHABLE)
673 rt6_probe(rt);
f11e6659 674
7e980569 675 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 676 if (m > *mpri) {
afc154e9 677 *do_rr = match_do_rr;
f11e6659
DM
678 *mpri = m;
679 match = rt;
f11e6659 680 }
f11e6659
DM
681out:
682 return match;
683}
684
685static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
686 struct rt6_info *rr_head,
afc154e9
HFS
687 u32 metric, int oif, int strict,
688 bool *do_rr)
f11e6659 689{
9fbdcfaf 690 struct rt6_info *rt, *match, *cont;
554cfb7e 691 int mpri = -1;
1da177e4 692
f11e6659 693 match = NULL;
9fbdcfaf
SK
694 cont = NULL;
695 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
696 if (rt->rt6i_metric != metric) {
697 cont = rt;
698 break;
699 }
700
701 match = find_match(rt, oif, strict, &mpri, match, do_rr);
702 }
703
704 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
705 if (rt->rt6i_metric != metric) {
706 cont = rt;
707 break;
708 }
709
afc154e9 710 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
711 }
712
713 if (match || !cont)
714 return match;
715
716 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 717 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 718
f11e6659
DM
719 return match;
720}
1da177e4 721
f11e6659
DM
722static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
723{
724 struct rt6_info *match, *rt0;
8ed67789 725 struct net *net;
afc154e9 726 bool do_rr = false;
1da177e4 727
f11e6659
DM
728 rt0 = fn->rr_ptr;
729 if (!rt0)
730 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 731
afc154e9
HFS
732 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
733 &do_rr);
1da177e4 734
afc154e9 735 if (do_rr) {
d8d1f30b 736 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 737
554cfb7e 738 /* no entries matched; do round-robin */
f11e6659
DM
739 if (!next || next->rt6i_metric != rt0->rt6i_metric)
740 next = fn->leaf;
741
742 if (next != rt0)
743 fn->rr_ptr = next;
1da177e4 744 }
1da177e4 745
d1918542 746 net = dev_net(rt0->dst.dev);
a02cec21 747 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
748}
749
8b9df265
MKL
750static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
751{
752 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
753}
754
70ceb4f5
YH
755#ifdef CONFIG_IPV6_ROUTE_INFO
756int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 757 const struct in6_addr *gwaddr)
70ceb4f5 758{
c346dca1 759 struct net *net = dev_net(dev);
70ceb4f5
YH
760 struct route_info *rinfo = (struct route_info *) opt;
761 struct in6_addr prefix_buf, *prefix;
762 unsigned int pref;
4bed72e4 763 unsigned long lifetime;
70ceb4f5
YH
764 struct rt6_info *rt;
765
766 if (len < sizeof(struct route_info)) {
767 return -EINVAL;
768 }
769
770 /* Sanity check for prefix_len and length */
771 if (rinfo->length > 3) {
772 return -EINVAL;
773 } else if (rinfo->prefix_len > 128) {
774 return -EINVAL;
775 } else if (rinfo->prefix_len > 64) {
776 if (rinfo->length < 2) {
777 return -EINVAL;
778 }
779 } else if (rinfo->prefix_len > 0) {
780 if (rinfo->length < 1) {
781 return -EINVAL;
782 }
783 }
784
785 pref = rinfo->route_pref;
786 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 787 return -EINVAL;
70ceb4f5 788
4bed72e4 789 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
790
791 if (rinfo->length == 3)
792 prefix = (struct in6_addr *)rinfo->prefix;
793 else {
794 /* this function is safe */
795 ipv6_addr_prefix(&prefix_buf,
796 (struct in6_addr *)rinfo->prefix,
797 rinfo->prefix_len);
798 prefix = &prefix_buf;
799 }
800
f104a567
DJ
801 if (rinfo->prefix_len == 0)
802 rt = rt6_get_dflt_router(gwaddr, dev);
803 else
804 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
805 gwaddr, dev->ifindex);
70ceb4f5
YH
806
807 if (rt && !lifetime) {
e0a1ad73 808 ip6_del_rt(rt);
70ceb4f5
YH
809 rt = NULL;
810 }
811
812 if (!rt && lifetime)
efa2cea0 813 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
814 pref);
815 else if (rt)
816 rt->rt6i_flags = RTF_ROUTEINFO |
817 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
818
819 if (rt) {
1716a961
G
820 if (!addrconf_finite_timeout(lifetime))
821 rt6_clean_expires(rt);
822 else
823 rt6_set_expires(rt, jiffies + HZ * lifetime);
824
94e187c0 825 ip6_rt_put(rt);
70ceb4f5
YH
826 }
827 return 0;
828}
829#endif
830
a3c00e46
MKL
831static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
832 struct in6_addr *saddr)
833{
834 struct fib6_node *pn;
835 while (1) {
836 if (fn->fn_flags & RTN_TL_ROOT)
837 return NULL;
838 pn = fn->parent;
839 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
840 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
841 else
842 fn = pn;
843 if (fn->fn_flags & RTN_RTINFO)
844 return fn;
845 }
846}
c71099ac 847
8ed67789
DL
848static struct rt6_info *ip6_pol_route_lookup(struct net *net,
849 struct fib6_table *table,
4c9483b2 850 struct flowi6 *fl6, int flags)
1da177e4
LT
851{
852 struct fib6_node *fn;
853 struct rt6_info *rt;
854
c71099ac 855 read_lock_bh(&table->tb6_lock);
4c9483b2 856 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
857restart:
858 rt = fn->leaf;
4c9483b2 859 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 860 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 861 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
862 if (rt == net->ipv6.ip6_null_entry) {
863 fn = fib6_backtrack(fn, &fl6->saddr);
864 if (fn)
865 goto restart;
866 }
d8d1f30b 867 dst_use(&rt->dst, jiffies);
c71099ac 868 read_unlock_bh(&table->tb6_lock);
b811580d
DA
869
870 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
871
c71099ac
TG
872 return rt;
873
874}
875
67ba4152 876struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
877 int flags)
878{
879 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
880}
881EXPORT_SYMBOL_GPL(ip6_route_lookup);
882
9acd9f3a
YH
883struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
884 const struct in6_addr *saddr, int oif, int strict)
c71099ac 885{
4c9483b2
DM
886 struct flowi6 fl6 = {
887 .flowi6_oif = oif,
888 .daddr = *daddr,
c71099ac
TG
889 };
890 struct dst_entry *dst;
77d16f45 891 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 892
adaa70bb 893 if (saddr) {
4c9483b2 894 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
895 flags |= RT6_LOOKUP_F_HAS_SADDR;
896 }
897
4c9483b2 898 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
899 if (dst->error == 0)
900 return (struct rt6_info *) dst;
901
902 dst_release(dst);
903
1da177e4
LT
904 return NULL;
905}
7159039a
YH
906EXPORT_SYMBOL(rt6_lookup);
907
c71099ac 908/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
909 It takes new route entry, the addition fails by any reason the
910 route is freed. In any case, if caller does not hold it, it may
911 be destroyed.
912 */
913
e5fd387a 914static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
e715b6d3 915 struct mx6_config *mxc)
1da177e4
LT
916{
917 int err;
c71099ac 918 struct fib6_table *table;
1da177e4 919
c71099ac
TG
920 table = rt->rt6i_table;
921 write_lock_bh(&table->tb6_lock);
e715b6d3 922 err = fib6_add(&table->tb6_root, rt, info, mxc);
c71099ac 923 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
924
925 return err;
926}
927
40e22e8f
TG
928int ip6_ins_rt(struct rt6_info *rt)
929{
e715b6d3
FW
930 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
931 struct mx6_config mxc = { .mx = NULL, };
932
933 return __ip6_ins_rt(rt, &info, &mxc);
40e22e8f
TG
934}
935
8b9df265
MKL
936static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
937 const struct in6_addr *daddr,
938 const struct in6_addr *saddr)
1da177e4 939{
1da177e4
LT
940 struct rt6_info *rt;
941
942 /*
943 * Clone the route.
944 */
945
d52d3997 946 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 947 ort = (struct rt6_info *)ort->dst.from;
1da177e4 948
ad706862 949 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
83a09abd
MKL
950
951 if (!rt)
952 return NULL;
953
954 ip6_rt_copy_init(rt, ort);
955 rt->rt6i_flags |= RTF_CACHE;
956 rt->rt6i_metric = 0;
957 rt->dst.flags |= DST_HOST;
958 rt->rt6i_dst.addr = *daddr;
959 rt->rt6i_dst.plen = 128;
1da177e4 960
83a09abd
MKL
961 if (!rt6_is_gw_or_nonexthop(ort)) {
962 if (ort->rt6i_dst.plen != 128 &&
963 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
964 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 965#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
966 if (rt->rt6i_src.plen && saddr) {
967 rt->rt6i_src.addr = *saddr;
968 rt->rt6i_src.plen = 128;
8b9df265 969 }
83a09abd 970#endif
95a9a5ba 971 }
1da177e4 972
95a9a5ba
YH
973 return rt;
974}
1da177e4 975
d52d3997
MKL
976static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
977{
978 struct rt6_info *pcpu_rt;
979
980 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
ad706862 981 rt->dst.dev, rt->dst.flags);
d52d3997
MKL
982
983 if (!pcpu_rt)
984 return NULL;
985 ip6_rt_copy_init(pcpu_rt, rt);
986 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
987 pcpu_rt->rt6i_flags |= RTF_PCPU;
988 return pcpu_rt;
989}
990
991/* It should be called with read_lock_bh(&tb6_lock) acquired */
992static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
993{
a73e4195 994 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
995
996 p = this_cpu_ptr(rt->rt6i_pcpu);
997 pcpu_rt = *p;
998
a73e4195
MKL
999 if (pcpu_rt) {
1000 dst_hold(&pcpu_rt->dst);
1001 rt6_dst_from_metrics_check(pcpu_rt);
1002 }
1003 return pcpu_rt;
1004}
1005
1006static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1007{
9c7370a1 1008 struct fib6_table *table = rt->rt6i_table;
a73e4195 1009 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1010
1011 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1012 if (!pcpu_rt) {
1013 struct net *net = dev_net(rt->dst.dev);
1014
9c7370a1
MKL
1015 dst_hold(&net->ipv6.ip6_null_entry->dst);
1016 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1017 }
1018
9c7370a1
MKL
1019 read_lock_bh(&table->tb6_lock);
1020 if (rt->rt6i_pcpu) {
1021 p = this_cpu_ptr(rt->rt6i_pcpu);
1022 prev = cmpxchg(p, NULL, pcpu_rt);
1023 if (prev) {
1024 /* If someone did it before us, return prev instead */
1025 dst_destroy(&pcpu_rt->dst);
1026 pcpu_rt = prev;
1027 }
1028 } else {
1029 /* rt has been removed from the fib6 tree
1030 * before we have a chance to acquire the read_lock.
1031 * In this case, don't brother to create a pcpu rt
1032 * since rt is going away anyway. The next
1033 * dst_check() will trigger a re-lookup.
1034 */
d52d3997 1035 dst_destroy(&pcpu_rt->dst);
9c7370a1 1036 pcpu_rt = rt;
d52d3997 1037 }
d52d3997
MKL
1038 dst_hold(&pcpu_rt->dst);
1039 rt6_dst_from_metrics_check(pcpu_rt);
9c7370a1 1040 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1041 return pcpu_rt;
1042}
1043
8ed67789 1044static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 1045 struct flowi6 *fl6, int flags)
1da177e4 1046{
367efcb9 1047 struct fib6_node *fn, *saved_fn;
45e4fd26 1048 struct rt6_info *rt;
c71099ac 1049 int strict = 0;
1da177e4 1050
77d16f45 1051 strict |= flags & RT6_LOOKUP_F_IFACE;
367efcb9
MKL
1052 if (net->ipv6.devconf_all->forwarding == 0)
1053 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1054
c71099ac 1055 read_lock_bh(&table->tb6_lock);
1da177e4 1056
4c9483b2 1057 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1058 saved_fn = fn;
1da177e4 1059
ca254490
DA
1060 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1061 oif = 0;
1062
a3c00e46 1063redo_rt6_select:
367efcb9 1064 rt = rt6_select(fn, oif, strict);
52bd4c0c 1065 if (rt->rt6i_nsiblings)
367efcb9 1066 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1067 if (rt == net->ipv6.ip6_null_entry) {
1068 fn = fib6_backtrack(fn, &fl6->saddr);
1069 if (fn)
1070 goto redo_rt6_select;
367efcb9
MKL
1071 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1072 /* also consider unreachable route */
1073 strict &= ~RT6_LOOKUP_F_REACHABLE;
1074 fn = saved_fn;
1075 goto redo_rt6_select;
367efcb9 1076 }
a3c00e46
MKL
1077 }
1078
fb9de91e 1079
3da59bd9 1080 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1081 dst_use(&rt->dst, jiffies);
1082 read_unlock_bh(&table->tb6_lock);
1083
1084 rt6_dst_from_metrics_check(rt);
b811580d
DA
1085
1086 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
d52d3997 1087 return rt;
3da59bd9
MKL
1088 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1089 !(rt->rt6i_flags & RTF_GATEWAY))) {
1090 /* Create a RTF_CACHE clone which will not be
1091 * owned by the fib6 tree. It is for the special case where
1092 * the daddr in the skb during the neighbor look-up is different
1093 * from the fl6->daddr used to look-up route here.
1094 */
1095
1096 struct rt6_info *uncached_rt;
1097
d52d3997
MKL
1098 dst_use(&rt->dst, jiffies);
1099 read_unlock_bh(&table->tb6_lock);
1100
3da59bd9
MKL
1101 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1102 dst_release(&rt->dst);
c71099ac 1103
3da59bd9 1104 if (uncached_rt)
8d0b94af 1105 rt6_uncached_list_add(uncached_rt);
3da59bd9
MKL
1106 else
1107 uncached_rt = net->ipv6.ip6_null_entry;
d52d3997 1108
3da59bd9 1109 dst_hold(&uncached_rt->dst);
b811580d
DA
1110
1111 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
3da59bd9 1112 return uncached_rt;
3da59bd9 1113
d52d3997
MKL
1114 } else {
1115 /* Get a percpu copy */
1116
1117 struct rt6_info *pcpu_rt;
1118
1119 rt->dst.lastuse = jiffies;
1120 rt->dst.__use++;
1121 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1122
9c7370a1
MKL
1123 if (pcpu_rt) {
1124 read_unlock_bh(&table->tb6_lock);
1125 } else {
1126 /* We have to do the read_unlock first
1127 * because rt6_make_pcpu_route() may trigger
1128 * ip6_dst_gc() which will take the write_lock.
1129 */
1130 dst_hold(&rt->dst);
1131 read_unlock_bh(&table->tb6_lock);
a73e4195 1132 pcpu_rt = rt6_make_pcpu_route(rt);
9c7370a1
MKL
1133 dst_release(&rt->dst);
1134 }
d52d3997 1135
b811580d 1136 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
d52d3997 1137 return pcpu_rt;
9c7370a1 1138
d52d3997 1139 }
1da177e4
LT
1140}
1141
8ed67789 1142static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1143 struct flowi6 *fl6, int flags)
4acad72d 1144{
4c9483b2 1145 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1146}
1147
72331bc0
SL
1148static struct dst_entry *ip6_route_input_lookup(struct net *net,
1149 struct net_device *dev,
1150 struct flowi6 *fl6, int flags)
1151{
1152 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1153 flags |= RT6_LOOKUP_F_IFACE;
1154
1155 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1156}
1157
c71099ac
TG
1158void ip6_route_input(struct sk_buff *skb)
1159{
b71d1d42 1160 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1161 struct net *net = dev_net(skb->dev);
adaa70bb 1162 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1163 struct ip_tunnel_info *tun_info;
4c9483b2 1164 struct flowi6 fl6 = {
ca254490 1165 .flowi6_iif = l3mdev_fib_oif(skb->dev),
4c9483b2
DM
1166 .daddr = iph->daddr,
1167 .saddr = iph->saddr,
6502ca52 1168 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1169 .flowi6_mark = skb->mark,
1170 .flowi6_proto = iph->nexthdr,
c71099ac 1171 };
adaa70bb 1172
904af04d 1173 tun_info = skb_tunnel_info(skb);
46fa062a 1174 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1175 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
06e9d040 1176 skb_dst_drop(skb);
72331bc0 1177 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1178}
1179
8ed67789 1180static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1181 struct flowi6 *fl6, int flags)
1da177e4 1182{
4c9483b2 1183 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1184}
1185
67ba4152 1186struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 1187 struct flowi6 *fl6)
c71099ac 1188{
ca254490 1189 struct dst_entry *dst;
c71099ac 1190 int flags = 0;
d46a9d67 1191 bool any_src;
c71099ac 1192
ca254490
DA
1193 dst = l3mdev_rt6_dst_by_oif(net, fl6);
1194 if (dst)
1195 return dst;
1196
1fb9489b 1197 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1198
d46a9d67 1199 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1200 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1201 (fl6->flowi6_oif && any_src))
77d16f45 1202 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1203
d46a9d67 1204 if (!any_src)
adaa70bb 1205 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1206 else if (sk)
1207 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1208
4c9483b2 1209 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1210}
7159039a 1211EXPORT_SYMBOL(ip6_route_output);
1da177e4 1212
2774c131 1213struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1214{
5c1e6aa3 1215 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1216 struct dst_entry *new = NULL;
1217
f5b0a874 1218 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1219 if (rt) {
0a1f5962 1220 rt6_info_init(rt);
8104891b 1221
0a1f5962 1222 new = &rt->dst;
14e50e57 1223 new->__use = 1;
352e512c 1224 new->input = dst_discard;
ede2059d 1225 new->output = dst_discard_out;
14e50e57 1226
0a1f5962 1227 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1228 rt->rt6i_idev = ort->rt6i_idev;
1229 if (rt->rt6i_idev)
1230 in6_dev_hold(rt->rt6i_idev);
14e50e57 1231
4e3fd7a0 1232 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1233 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1234 rt->rt6i_metric = 0;
1235
1236 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1237#ifdef CONFIG_IPV6_SUBTREES
1238 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1239#endif
1240
1241 dst_free(new);
1242 }
1243
69ead7af
DM
1244 dst_release(dst_orig);
1245 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1246}
14e50e57 1247
1da177e4
LT
1248/*
1249 * Destination cache support functions
1250 */
1251
4b32b5ad
MKL
1252static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1253{
1254 if (rt->dst.from &&
1255 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1256 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1257}
1258
3da59bd9
MKL
1259static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1260{
1261 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1262 return NULL;
1263
1264 if (rt6_check_expired(rt))
1265 return NULL;
1266
1267 return &rt->dst;
1268}
1269
1270static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1271{
5973fb1e
MKL
1272 if (!__rt6_check_expired(rt) &&
1273 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3da59bd9
MKL
1274 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1275 return &rt->dst;
1276 else
1277 return NULL;
1278}
1279
1da177e4
LT
1280static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1281{
1282 struct rt6_info *rt;
1283
1284 rt = (struct rt6_info *) dst;
1285
6f3118b5
ND
1286 /* All IPV6 dsts are created with ->obsolete set to the value
1287 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1288 * into this function always.
1289 */
e3bc10bd 1290
4b32b5ad
MKL
1291 rt6_dst_from_metrics_check(rt);
1292
02bcf4e0
MKL
1293 if (rt->rt6i_flags & RTF_PCPU ||
1294 (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
3da59bd9
MKL
1295 return rt6_dst_from_check(rt, cookie);
1296 else
1297 return rt6_check(rt, cookie);
1da177e4
LT
1298}
1299
1300static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1301{
1302 struct rt6_info *rt = (struct rt6_info *) dst;
1303
1304 if (rt) {
54c1a859
YH
1305 if (rt->rt6i_flags & RTF_CACHE) {
1306 if (rt6_check_expired(rt)) {
1307 ip6_del_rt(rt);
1308 dst = NULL;
1309 }
1310 } else {
1da177e4 1311 dst_release(dst);
54c1a859
YH
1312 dst = NULL;
1313 }
1da177e4 1314 }
54c1a859 1315 return dst;
1da177e4
LT
1316}
1317
1318static void ip6_link_failure(struct sk_buff *skb)
1319{
1320 struct rt6_info *rt;
1321
3ffe533c 1322 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1323
adf30907 1324 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1325 if (rt) {
1eb4f758
HFS
1326 if (rt->rt6i_flags & RTF_CACHE) {
1327 dst_hold(&rt->dst);
8e3d5be7 1328 ip6_del_rt(rt);
1eb4f758 1329 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1330 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1331 }
1da177e4
LT
1332 }
1333}
1334
45e4fd26
MKL
1335static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1336{
1337 struct net *net = dev_net(rt->dst.dev);
1338
1339 rt->rt6i_flags |= RTF_MODIFIED;
1340 rt->rt6i_pmtu = mtu;
1341 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1342}
1343
0d3f6d29
MKL
1344static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1345{
1346 return !(rt->rt6i_flags & RTF_CACHE) &&
1347 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1348}
1349
45e4fd26
MKL
1350static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1351 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1352{
67ba4152 1353 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1354
45e4fd26
MKL
1355 if (rt6->rt6i_flags & RTF_LOCAL)
1356 return;
81aded24 1357
45e4fd26
MKL
1358 dst_confirm(dst);
1359 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1360 if (mtu >= dst_mtu(dst))
1361 return;
9d289715 1362
0d3f6d29 1363 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26
MKL
1364 rt6_do_update_pmtu(rt6, mtu);
1365 } else {
1366 const struct in6_addr *daddr, *saddr;
1367 struct rt6_info *nrt6;
1368
1369 if (iph) {
1370 daddr = &iph->daddr;
1371 saddr = &iph->saddr;
1372 } else if (sk) {
1373 daddr = &sk->sk_v6_daddr;
1374 saddr = &inet6_sk(sk)->saddr;
1375 } else {
1376 return;
1377 }
1378 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1379 if (nrt6) {
1380 rt6_do_update_pmtu(nrt6, mtu);
1381
1382 /* ip6_ins_rt(nrt6) will bump the
1383 * rt6->rt6i_node->fn_sernum
1384 * which will fail the next rt6_check() and
1385 * invalidate the sk->sk_dst_cache.
1386 */
1387 ip6_ins_rt(nrt6);
1388 }
1da177e4
LT
1389 }
1390}
1391
45e4fd26
MKL
1392static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1393 struct sk_buff *skb, u32 mtu)
1394{
1395 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1396}
1397
42ae66c8
DM
1398void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1399 int oif, u32 mark)
81aded24
DM
1400{
1401 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1402 struct dst_entry *dst;
1403 struct flowi6 fl6;
1404
1405 memset(&fl6, 0, sizeof(fl6));
1406 fl6.flowi6_oif = oif;
1b3c61dc 1407 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1408 fl6.daddr = iph->daddr;
1409 fl6.saddr = iph->saddr;
6502ca52 1410 fl6.flowlabel = ip6_flowinfo(iph);
81aded24
DM
1411
1412 dst = ip6_route_output(net, NULL, &fl6);
1413 if (!dst->error)
45e4fd26 1414 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1415 dst_release(dst);
1416}
1417EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1418
1419void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1420{
1421 ip6_update_pmtu(skb, sock_net(sk), mtu,
1422 sk->sk_bound_dev_if, sk->sk_mark);
1423}
1424EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1425
b55b76b2
DJ
1426/* Handle redirects */
1427struct ip6rd_flowi {
1428 struct flowi6 fl6;
1429 struct in6_addr gateway;
1430};
1431
1432static struct rt6_info *__ip6_route_redirect(struct net *net,
1433 struct fib6_table *table,
1434 struct flowi6 *fl6,
1435 int flags)
1436{
1437 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1438 struct rt6_info *rt;
1439 struct fib6_node *fn;
1440
1441 /* Get the "current" route for this destination and
1442 * check if the redirect has come from approriate router.
1443 *
1444 * RFC 4861 specifies that redirects should only be
1445 * accepted if they come from the nexthop to the target.
1446 * Due to the way the routes are chosen, this notion
1447 * is a bit fuzzy and one might need to check all possible
1448 * routes.
1449 */
1450
1451 read_lock_bh(&table->tb6_lock);
1452 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1453restart:
1454 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1455 if (rt6_check_expired(rt))
1456 continue;
1457 if (rt->dst.error)
1458 break;
1459 if (!(rt->rt6i_flags & RTF_GATEWAY))
1460 continue;
1461 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1462 continue;
1463 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1464 continue;
1465 break;
1466 }
1467
1468 if (!rt)
1469 rt = net->ipv6.ip6_null_entry;
1470 else if (rt->dst.error) {
1471 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1472 goto out;
1473 }
1474
1475 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1476 fn = fib6_backtrack(fn, &fl6->saddr);
1477 if (fn)
1478 goto restart;
b55b76b2 1479 }
a3c00e46 1480
b0a1ba59 1481out:
b55b76b2
DJ
1482 dst_hold(&rt->dst);
1483
1484 read_unlock_bh(&table->tb6_lock);
1485
b811580d 1486 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
b55b76b2
DJ
1487 return rt;
1488};
1489
1490static struct dst_entry *ip6_route_redirect(struct net *net,
1491 const struct flowi6 *fl6,
1492 const struct in6_addr *gateway)
1493{
1494 int flags = RT6_LOOKUP_F_HAS_SADDR;
1495 struct ip6rd_flowi rdfl;
1496
1497 rdfl.fl6 = *fl6;
1498 rdfl.gateway = *gateway;
1499
1500 return fib6_rule_lookup(net, &rdfl.fl6,
1501 flags, __ip6_route_redirect);
1502}
1503
3a5ad2ee
DM
1504void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1505{
1506 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1507 struct dst_entry *dst;
1508 struct flowi6 fl6;
1509
1510 memset(&fl6, 0, sizeof(fl6));
e374c618 1511 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1512 fl6.flowi6_oif = oif;
1513 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1514 fl6.daddr = iph->daddr;
1515 fl6.saddr = iph->saddr;
6502ca52 1516 fl6.flowlabel = ip6_flowinfo(iph);
3a5ad2ee 1517
b55b76b2
DJ
1518 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1519 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1520 dst_release(dst);
1521}
1522EXPORT_SYMBOL_GPL(ip6_redirect);
1523
c92a59ec
DJ
1524void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1525 u32 mark)
1526{
1527 const struct ipv6hdr *iph = ipv6_hdr(skb);
1528 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1529 struct dst_entry *dst;
1530 struct flowi6 fl6;
1531
1532 memset(&fl6, 0, sizeof(fl6));
e374c618 1533 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1534 fl6.flowi6_oif = oif;
1535 fl6.flowi6_mark = mark;
c92a59ec
DJ
1536 fl6.daddr = msg->dest;
1537 fl6.saddr = iph->daddr;
1538
b55b76b2
DJ
1539 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1540 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1541 dst_release(dst);
1542}
1543
3a5ad2ee
DM
1544void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1545{
1546 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1547}
1548EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1549
0dbaee3b 1550static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1551{
0dbaee3b
DM
1552 struct net_device *dev = dst->dev;
1553 unsigned int mtu = dst_mtu(dst);
1554 struct net *net = dev_net(dev);
1555
1da177e4
LT
1556 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1557
5578689a
DL
1558 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1559 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1560
1561 /*
1ab1457c
YH
1562 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1563 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1564 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1565 * rely only on pmtu discovery"
1566 */
1567 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1568 mtu = IPV6_MAXPLEN;
1569 return mtu;
1570}
1571
ebb762f2 1572static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1573{
4b32b5ad
MKL
1574 const struct rt6_info *rt = (const struct rt6_info *)dst;
1575 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1576 struct inet6_dev *idev;
618f9bc7 1577
4b32b5ad
MKL
1578 if (mtu)
1579 goto out;
1580
1581 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1582 if (mtu)
30f78d8e 1583 goto out;
618f9bc7
SK
1584
1585 mtu = IPV6_MIN_MTU;
d33e4553
DM
1586
1587 rcu_read_lock();
1588 idev = __in6_dev_get(dst->dev);
1589 if (idev)
1590 mtu = idev->cnf.mtu6;
1591 rcu_read_unlock();
1592
30f78d8e
ED
1593out:
1594 return min_t(unsigned int, mtu, IP6_MAX_MTU);
d33e4553
DM
1595}
1596
3b00944c
YH
1597static struct dst_entry *icmp6_dst_gc_list;
1598static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1599
3b00944c 1600struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1601 struct flowi6 *fl6)
1da177e4 1602{
87a11578 1603 struct dst_entry *dst;
1da177e4
LT
1604 struct rt6_info *rt;
1605 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1606 struct net *net = dev_net(dev);
1da177e4 1607
38308473 1608 if (unlikely(!idev))
122bdf67 1609 return ERR_PTR(-ENODEV);
1da177e4 1610
ad706862 1611 rt = ip6_dst_alloc(net, dev, 0);
38308473 1612 if (unlikely(!rt)) {
1da177e4 1613 in6_dev_put(idev);
87a11578 1614 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1615 goto out;
1616 }
1617
8e2ec639
YZ
1618 rt->dst.flags |= DST_HOST;
1619 rt->dst.output = ip6_output;
d8d1f30b 1620 atomic_set(&rt->dst.__refcnt, 1);
550bab42 1621 rt->rt6i_gateway = fl6->daddr;
87a11578 1622 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1623 rt->rt6i_dst.plen = 128;
1624 rt->rt6i_idev = idev;
14edd87d 1625 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1626
3b00944c 1627 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1628 rt->dst.next = icmp6_dst_gc_list;
1629 icmp6_dst_gc_list = &rt->dst;
3b00944c 1630 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1631
5578689a 1632 fib6_force_start_gc(net);
1da177e4 1633
87a11578
DM
1634 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1635
1da177e4 1636out:
87a11578 1637 return dst;
1da177e4
LT
1638}
1639
3d0f24a7 1640int icmp6_dst_gc(void)
1da177e4 1641{
e9476e95 1642 struct dst_entry *dst, **pprev;
3d0f24a7 1643 int more = 0;
1da177e4 1644
3b00944c
YH
1645 spin_lock_bh(&icmp6_dst_lock);
1646 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1647
1da177e4
LT
1648 while ((dst = *pprev) != NULL) {
1649 if (!atomic_read(&dst->__refcnt)) {
1650 *pprev = dst->next;
1651 dst_free(dst);
1da177e4
LT
1652 } else {
1653 pprev = &dst->next;
3d0f24a7 1654 ++more;
1da177e4
LT
1655 }
1656 }
1657
3b00944c 1658 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1659
3d0f24a7 1660 return more;
1da177e4
LT
1661}
1662
1e493d19
DM
1663static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1664 void *arg)
1665{
1666 struct dst_entry *dst, **pprev;
1667
1668 spin_lock_bh(&icmp6_dst_lock);
1669 pprev = &icmp6_dst_gc_list;
1670 while ((dst = *pprev) != NULL) {
1671 struct rt6_info *rt = (struct rt6_info *) dst;
1672 if (func(rt, arg)) {
1673 *pprev = dst->next;
1674 dst_free(dst);
1675 } else {
1676 pprev = &dst->next;
1677 }
1678 }
1679 spin_unlock_bh(&icmp6_dst_lock);
1680}
1681
569d3645 1682static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1683{
86393e52 1684 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1685 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1686 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1687 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1688 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1689 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1690 int entries;
7019b78e 1691
fc66f95c 1692 entries = dst_entries_get_fast(ops);
49a18d86 1693 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1694 entries <= rt_max_size)
1da177e4
LT
1695 goto out;
1696
6891a346 1697 net->ipv6.ip6_rt_gc_expire++;
14956643 1698 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1699 entries = dst_entries_get_slow(ops);
1700 if (entries < ops->gc_thresh)
7019b78e 1701 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1702out:
7019b78e 1703 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1704 return entries > rt_max_size;
1da177e4
LT
1705}
1706
e715b6d3
FW
1707static int ip6_convert_metrics(struct mx6_config *mxc,
1708 const struct fib6_config *cfg)
1709{
c3a8d947 1710 bool ecn_ca = false;
e715b6d3
FW
1711 struct nlattr *nla;
1712 int remaining;
1713 u32 *mp;
1714
63159f29 1715 if (!cfg->fc_mx)
e715b6d3
FW
1716 return 0;
1717
1718 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1719 if (unlikely(!mp))
1720 return -ENOMEM;
1721
1722 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1723 int type = nla_type(nla);
1bb14807 1724 u32 val;
e715b6d3 1725
1bb14807
DB
1726 if (!type)
1727 continue;
1728 if (unlikely(type > RTAX_MAX))
1729 goto err;
ea697639 1730
1bb14807
DB
1731 if (type == RTAX_CC_ALGO) {
1732 char tmp[TCP_CA_NAME_MAX];
e715b6d3 1733
1bb14807 1734 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 1735 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
1736 if (val == TCP_CA_UNSPEC)
1737 goto err;
1738 } else {
1739 val = nla_get_u32(nla);
e715b6d3 1740 }
b8d3e416
DB
1741 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1742 goto err;
1bb14807
DB
1743
1744 mp[type - 1] = val;
1745 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
1746 }
1747
c3a8d947
DB
1748 if (ecn_ca) {
1749 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1750 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1751 }
e715b6d3 1752
c3a8d947 1753 mxc->mx = mp;
e715b6d3
FW
1754 return 0;
1755 err:
1756 kfree(mp);
1757 return -EINVAL;
1758}
1da177e4 1759
8c5b83f0 1760static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1da177e4 1761{
5578689a 1762 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1763 struct rt6_info *rt = NULL;
1764 struct net_device *dev = NULL;
1765 struct inet6_dev *idev = NULL;
c71099ac 1766 struct fib6_table *table;
1da177e4 1767 int addr_type;
8c5b83f0 1768 int err = -EINVAL;
1da177e4 1769
86872cb5 1770 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
8c5b83f0 1771 goto out;
1da177e4 1772#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1773 if (cfg->fc_src_len)
8c5b83f0 1774 goto out;
1da177e4 1775#endif
86872cb5 1776 if (cfg->fc_ifindex) {
1da177e4 1777 err = -ENODEV;
5578689a 1778 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1779 if (!dev)
1780 goto out;
1781 idev = in6_dev_get(dev);
1782 if (!idev)
1783 goto out;
1784 }
1785
86872cb5
TG
1786 if (cfg->fc_metric == 0)
1787 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1788
d71314b4 1789 err = -ENOBUFS;
38308473
DM
1790 if (cfg->fc_nlinfo.nlh &&
1791 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1792 table = fib6_get_table(net, cfg->fc_table);
38308473 1793 if (!table) {
f3213831 1794 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1795 table = fib6_new_table(net, cfg->fc_table);
1796 }
1797 } else {
1798 table = fib6_new_table(net, cfg->fc_table);
1799 }
38308473
DM
1800
1801 if (!table)
c71099ac 1802 goto out;
c71099ac 1803
ad706862
MKL
1804 rt = ip6_dst_alloc(net, NULL,
1805 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 1806
38308473 1807 if (!rt) {
1da177e4
LT
1808 err = -ENOMEM;
1809 goto out;
1810 }
1811
1716a961
G
1812 if (cfg->fc_flags & RTF_EXPIRES)
1813 rt6_set_expires(rt, jiffies +
1814 clock_t_to_jiffies(cfg->fc_expires));
1815 else
1816 rt6_clean_expires(rt);
1da177e4 1817
86872cb5
TG
1818 if (cfg->fc_protocol == RTPROT_UNSPEC)
1819 cfg->fc_protocol = RTPROT_BOOT;
1820 rt->rt6i_protocol = cfg->fc_protocol;
1821
1822 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1823
1824 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1825 rt->dst.input = ip6_mc_input;
ab79ad14
1826 else if (cfg->fc_flags & RTF_LOCAL)
1827 rt->dst.input = ip6_input;
1da177e4 1828 else
d8d1f30b 1829 rt->dst.input = ip6_forward;
1da177e4 1830
d8d1f30b 1831 rt->dst.output = ip6_output;
1da177e4 1832
19e42e45
RP
1833 if (cfg->fc_encap) {
1834 struct lwtunnel_state *lwtstate;
1835
1836 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
127eb7cd
TH
1837 cfg->fc_encap, AF_INET6, cfg,
1838 &lwtstate);
19e42e45
RP
1839 if (err)
1840 goto out;
61adedf3
JB
1841 rt->dst.lwtstate = lwtstate_get(lwtstate);
1842 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1843 rt->dst.lwtstate->orig_output = rt->dst.output;
1844 rt->dst.output = lwtunnel_output;
25368623 1845 }
61adedf3
JB
1846 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1847 rt->dst.lwtstate->orig_input = rt->dst.input;
1848 rt->dst.input = lwtunnel_input;
25368623 1849 }
19e42e45
RP
1850 }
1851
86872cb5
TG
1852 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1853 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 1854 if (rt->rt6i_dst.plen == 128)
e5fd387a 1855 rt->dst.flags |= DST_HOST;
e5fd387a 1856
1da177e4 1857#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1858 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1859 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1860#endif
1861
86872cb5 1862 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1863
1864 /* We cannot add true routes via loopback here,
1865 they would result in kernel looping; promote them to reject routes
1866 */
86872cb5 1867 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1868 (dev && (dev->flags & IFF_LOOPBACK) &&
1869 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1870 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1871 /* hold loopback dev/idev if we haven't done so. */
5578689a 1872 if (dev != net->loopback_dev) {
1da177e4
LT
1873 if (dev) {
1874 dev_put(dev);
1875 in6_dev_put(idev);
1876 }
5578689a 1877 dev = net->loopback_dev;
1da177e4
LT
1878 dev_hold(dev);
1879 idev = in6_dev_get(dev);
1880 if (!idev) {
1881 err = -ENODEV;
1882 goto out;
1883 }
1884 }
1da177e4 1885 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1886 switch (cfg->fc_type) {
1887 case RTN_BLACKHOLE:
1888 rt->dst.error = -EINVAL;
ede2059d 1889 rt->dst.output = dst_discard_out;
7150aede 1890 rt->dst.input = dst_discard;
ef2c7d7b
ND
1891 break;
1892 case RTN_PROHIBIT:
1893 rt->dst.error = -EACCES;
7150aede
K
1894 rt->dst.output = ip6_pkt_prohibit_out;
1895 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 1896 break;
b4949ab2 1897 case RTN_THROW:
0315e382 1898 case RTN_UNREACHABLE:
ef2c7d7b 1899 default:
7150aede 1900 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
1901 : (cfg->fc_type == RTN_UNREACHABLE)
1902 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
1903 rt->dst.output = ip6_pkt_discard_out;
1904 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
1905 break;
1906 }
1da177e4
LT
1907 goto install_route;
1908 }
1909
86872cb5 1910 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1911 const struct in6_addr *gw_addr;
1da177e4
LT
1912 int gwa_type;
1913
86872cb5 1914 gw_addr = &cfg->fc_gateway;
330567b7 1915 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
1916
1917 /* if gw_addr is local we will fail to detect this in case
1918 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1919 * will return already-added prefix route via interface that
1920 * prefix route was assigned to, which might be non-loopback.
1921 */
1922 err = -EINVAL;
330567b7
FW
1923 if (ipv6_chk_addr_and_flags(net, gw_addr,
1924 gwa_type & IPV6_ADDR_LINKLOCAL ?
1925 dev : NULL, 0, 0))
48ed7b26
FW
1926 goto out;
1927
4e3fd7a0 1928 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1929
1930 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1931 struct rt6_info *grt;
1932
1933 /* IPv6 strictly inhibits using not link-local
1934 addresses as nexthop address.
1935 Otherwise, router will not able to send redirects.
1936 It is very good, but in some (rare!) circumstances
1937 (SIT, PtP, NBMA NOARP links) it is handy to allow
1938 some exceptions. --ANK
1939 */
38308473 1940 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1941 goto out;
1942
5578689a 1943 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1944
1945 err = -EHOSTUNREACH;
38308473 1946 if (!grt)
1da177e4
LT
1947 goto out;
1948 if (dev) {
d1918542 1949 if (dev != grt->dst.dev) {
94e187c0 1950 ip6_rt_put(grt);
1da177e4
LT
1951 goto out;
1952 }
1953 } else {
d1918542 1954 dev = grt->dst.dev;
1da177e4
LT
1955 idev = grt->rt6i_idev;
1956 dev_hold(dev);
1957 in6_dev_hold(grt->rt6i_idev);
1958 }
38308473 1959 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1960 err = 0;
94e187c0 1961 ip6_rt_put(grt);
1da177e4
LT
1962
1963 if (err)
1964 goto out;
1965 }
1966 err = -EINVAL;
38308473 1967 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1968 goto out;
1969 }
1970
1971 err = -ENODEV;
38308473 1972 if (!dev)
1da177e4
LT
1973 goto out;
1974
c3968a85
DW
1975 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1976 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1977 err = -EINVAL;
1978 goto out;
1979 }
4e3fd7a0 1980 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1981 rt->rt6i_prefsrc.plen = 128;
1982 } else
1983 rt->rt6i_prefsrc.plen = 0;
1984
86872cb5 1985 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1986
1987install_route:
d8d1f30b 1988 rt->dst.dev = dev;
1da177e4 1989 rt->rt6i_idev = idev;
c71099ac 1990 rt->rt6i_table = table;
63152fc0 1991
c346dca1 1992 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1993
8c5b83f0 1994 return rt;
6b9ea5a6
RP
1995out:
1996 if (dev)
1997 dev_put(dev);
1998 if (idev)
1999 in6_dev_put(idev);
2000 if (rt)
2001 dst_free(&rt->dst);
2002
8c5b83f0 2003 return ERR_PTR(err);
6b9ea5a6
RP
2004}
2005
2006int ip6_route_add(struct fib6_config *cfg)
2007{
2008 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2009 struct rt6_info *rt;
6b9ea5a6
RP
2010 int err;
2011
8c5b83f0
RP
2012 rt = ip6_route_info_create(cfg);
2013 if (IS_ERR(rt)) {
2014 err = PTR_ERR(rt);
2015 rt = NULL;
6b9ea5a6 2016 goto out;
8c5b83f0 2017 }
6b9ea5a6 2018
e715b6d3
FW
2019 err = ip6_convert_metrics(&mxc, cfg);
2020 if (err)
2021 goto out;
1da177e4 2022
e715b6d3
FW
2023 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2024
2025 kfree(mxc.mx);
6b9ea5a6 2026
e715b6d3 2027 return err;
1da177e4 2028out:
1da177e4 2029 if (rt)
d8d1f30b 2030 dst_free(&rt->dst);
6b9ea5a6 2031
1da177e4
LT
2032 return err;
2033}
2034
86872cb5 2035static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2036{
2037 int err;
c71099ac 2038 struct fib6_table *table;
d1918542 2039 struct net *net = dev_net(rt->dst.dev);
1da177e4 2040
8e3d5be7
MKL
2041 if (rt == net->ipv6.ip6_null_entry ||
2042 rt->dst.flags & DST_NOCACHE) {
6825a26c
G
2043 err = -ENOENT;
2044 goto out;
2045 }
6c813a72 2046
c71099ac
TG
2047 table = rt->rt6i_table;
2048 write_lock_bh(&table->tb6_lock);
86872cb5 2049 err = fib6_del(rt, info);
c71099ac 2050 write_unlock_bh(&table->tb6_lock);
1da177e4 2051
6825a26c 2052out:
94e187c0 2053 ip6_rt_put(rt);
1da177e4
LT
2054 return err;
2055}
2056
e0a1ad73
TG
2057int ip6_del_rt(struct rt6_info *rt)
2058{
4d1169c1 2059 struct nl_info info = {
d1918542 2060 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2061 };
528c4ceb 2062 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2063}
2064
86872cb5 2065static int ip6_route_del(struct fib6_config *cfg)
1da177e4 2066{
c71099ac 2067 struct fib6_table *table;
1da177e4
LT
2068 struct fib6_node *fn;
2069 struct rt6_info *rt;
2070 int err = -ESRCH;
2071
5578689a 2072 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 2073 if (!table)
c71099ac
TG
2074 return err;
2075
2076 read_lock_bh(&table->tb6_lock);
1da177e4 2077
c71099ac 2078 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
2079 &cfg->fc_dst, cfg->fc_dst_len,
2080 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 2081
1da177e4 2082 if (fn) {
d8d1f30b 2083 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2084 if ((rt->rt6i_flags & RTF_CACHE) &&
2085 !(cfg->fc_flags & RTF_CACHE))
2086 continue;
86872cb5 2087 if (cfg->fc_ifindex &&
d1918542
DM
2088 (!rt->dst.dev ||
2089 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2090 continue;
86872cb5
TG
2091 if (cfg->fc_flags & RTF_GATEWAY &&
2092 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2093 continue;
86872cb5 2094 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2095 continue;
d8d1f30b 2096 dst_hold(&rt->dst);
c71099ac 2097 read_unlock_bh(&table->tb6_lock);
1da177e4 2098
86872cb5 2099 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
2100 }
2101 }
c71099ac 2102 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2103
2104 return err;
2105}
2106
6700c270 2107static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2108{
a6279458 2109 struct netevent_redirect netevent;
e8599ff4 2110 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2111 struct ndisc_options ndopts;
2112 struct inet6_dev *in6_dev;
2113 struct neighbour *neigh;
71bcdba0 2114 struct rd_msg *msg;
6e157b6a
DM
2115 int optlen, on_link;
2116 u8 *lladdr;
e8599ff4 2117
29a3cad5 2118 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2119 optlen -= sizeof(*msg);
e8599ff4
DM
2120
2121 if (optlen < 0) {
6e157b6a 2122 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2123 return;
2124 }
2125
71bcdba0 2126 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2127
71bcdba0 2128 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2129 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2130 return;
2131 }
2132
6e157b6a 2133 on_link = 0;
71bcdba0 2134 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2135 on_link = 1;
71bcdba0 2136 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2137 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2138 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2139 return;
2140 }
2141
2142 in6_dev = __in6_dev_get(skb->dev);
2143 if (!in6_dev)
2144 return;
2145 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2146 return;
2147
2148 /* RFC2461 8.1:
2149 * The IP source address of the Redirect MUST be the same as the current
2150 * first-hop router for the specified ICMP Destination Address.
2151 */
2152
71bcdba0 2153 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2154 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2155 return;
2156 }
6e157b6a
DM
2157
2158 lladdr = NULL;
e8599ff4
DM
2159 if (ndopts.nd_opts_tgt_lladdr) {
2160 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2161 skb->dev);
2162 if (!lladdr) {
2163 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2164 return;
2165 }
2166 }
2167
6e157b6a 2168 rt = (struct rt6_info *) dst;
ec13ad1d 2169 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 2170 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2171 return;
6e157b6a 2172 }
e8599ff4 2173
6e157b6a
DM
2174 /* Redirect received -> path was valid.
2175 * Look, redirects are sent only in response to data packets,
2176 * so that this nexthop apparently is reachable. --ANK
2177 */
2178 dst_confirm(&rt->dst);
a6279458 2179
71bcdba0 2180 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2181 if (!neigh)
2182 return;
a6279458 2183
1da177e4
LT
2184 /*
2185 * We have finally decided to accept it.
2186 */
2187
1ab1457c 2188 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
2189 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2190 NEIGH_UPDATE_F_OVERRIDE|
2191 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2192 NEIGH_UPDATE_F_ISROUTER))
2193 );
2194
83a09abd 2195 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2196 if (!nrt)
1da177e4
LT
2197 goto out;
2198
2199 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2200 if (on_link)
2201 nrt->rt6i_flags &= ~RTF_GATEWAY;
2202
4e3fd7a0 2203 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2204
40e22e8f 2205 if (ip6_ins_rt(nrt))
1da177e4
LT
2206 goto out;
2207
d8d1f30b
CG
2208 netevent.old = &rt->dst;
2209 netevent.new = &nrt->dst;
71bcdba0 2210 netevent.daddr = &msg->dest;
60592833 2211 netevent.neigh = neigh;
8d71740c
TT
2212 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2213
38308473 2214 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2215 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2216 ip6_del_rt(rt);
1da177e4
LT
2217 }
2218
2219out:
e8599ff4 2220 neigh_release(neigh);
6e157b6a
DM
2221}
2222
1da177e4
LT
2223/*
2224 * Misc support functions
2225 */
2226
4b32b5ad
MKL
2227static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2228{
2229 BUG_ON(from->dst.from);
2230
2231 rt->rt6i_flags &= ~RTF_EXPIRES;
2232 dst_hold(&from->dst);
2233 rt->dst.from = &from->dst;
2234 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2235}
2236
83a09abd
MKL
2237static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2238{
2239 rt->dst.input = ort->dst.input;
2240 rt->dst.output = ort->dst.output;
2241 rt->rt6i_dst = ort->rt6i_dst;
2242 rt->dst.error = ort->dst.error;
2243 rt->rt6i_idev = ort->rt6i_idev;
2244 if (rt->rt6i_idev)
2245 in6_dev_hold(rt->rt6i_idev);
2246 rt->dst.lastuse = jiffies;
2247 rt->rt6i_gateway = ort->rt6i_gateway;
2248 rt->rt6i_flags = ort->rt6i_flags;
2249 rt6_set_from(rt, ort);
2250 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2251#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2252 rt->rt6i_src = ort->rt6i_src;
1da177e4 2253#endif
83a09abd
MKL
2254 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2255 rt->rt6i_table = ort->rt6i_table;
61adedf3 2256 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2257}
2258
70ceb4f5 2259#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2260static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
2261 const struct in6_addr *prefix, int prefixlen,
2262 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
2263{
2264 struct fib6_node *fn;
2265 struct rt6_info *rt = NULL;
c71099ac
TG
2266 struct fib6_table *table;
2267
efa2cea0 2268 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 2269 if (!table)
c71099ac 2270 return NULL;
70ceb4f5 2271
5744dd9b 2272 read_lock_bh(&table->tb6_lock);
67ba4152 2273 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2274 if (!fn)
2275 goto out;
2276
d8d1f30b 2277 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2278 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2279 continue;
2280 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2281 continue;
2282 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2283 continue;
d8d1f30b 2284 dst_hold(&rt->dst);
70ceb4f5
YH
2285 break;
2286 }
2287out:
5744dd9b 2288 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2289 return rt;
2290}
2291
efa2cea0 2292static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
2293 const struct in6_addr *prefix, int prefixlen,
2294 const struct in6_addr *gwaddr, int ifindex,
95c96174 2295 unsigned int pref)
70ceb4f5 2296{
86872cb5 2297 struct fib6_config cfg = {
238fc7ea 2298 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2299 .fc_ifindex = ifindex,
2300 .fc_dst_len = prefixlen,
2301 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2302 RTF_UP | RTF_PREF(pref),
15e47304 2303 .fc_nlinfo.portid = 0,
efa2cea0
DL
2304 .fc_nlinfo.nlh = NULL,
2305 .fc_nlinfo.nl_net = net,
86872cb5
TG
2306 };
2307
ca254490 2308 cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
4e3fd7a0
AD
2309 cfg.fc_dst = *prefix;
2310 cfg.fc_gateway = *gwaddr;
70ceb4f5 2311
e317da96
YH
2312 /* We should treat it as a default route if prefix length is 0. */
2313 if (!prefixlen)
86872cb5 2314 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2315
86872cb5 2316 ip6_route_add(&cfg);
70ceb4f5 2317
efa2cea0 2318 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
2319}
2320#endif
2321
b71d1d42 2322struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2323{
1da177e4 2324 struct rt6_info *rt;
c71099ac 2325 struct fib6_table *table;
1da177e4 2326
c346dca1 2327 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 2328 if (!table)
c71099ac 2329 return NULL;
1da177e4 2330
5744dd9b 2331 read_lock_bh(&table->tb6_lock);
67ba4152 2332 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2333 if (dev == rt->dst.dev &&
045927ff 2334 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2335 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2336 break;
2337 }
2338 if (rt)
d8d1f30b 2339 dst_hold(&rt->dst);
5744dd9b 2340 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2341 return rt;
2342}
2343
b71d1d42 2344struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2345 struct net_device *dev,
2346 unsigned int pref)
1da177e4 2347{
86872cb5 2348 struct fib6_config cfg = {
ca254490 2349 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 2350 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2351 .fc_ifindex = dev->ifindex,
2352 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2353 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2354 .fc_nlinfo.portid = 0,
5578689a 2355 .fc_nlinfo.nlh = NULL,
c346dca1 2356 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2357 };
1da177e4 2358
4e3fd7a0 2359 cfg.fc_gateway = *gwaddr;
1da177e4 2360
86872cb5 2361 ip6_route_add(&cfg);
1da177e4 2362
1da177e4
LT
2363 return rt6_get_dflt_router(gwaddr, dev);
2364}
2365
7b4da532 2366void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
2367{
2368 struct rt6_info *rt;
c71099ac
TG
2369 struct fib6_table *table;
2370
2371 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 2372 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 2373 if (!table)
c71099ac 2374 return;
1da177e4
LT
2375
2376restart:
c71099ac 2377 read_lock_bh(&table->tb6_lock);
d8d1f30b 2378 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2379 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2380 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2381 dst_hold(&rt->dst);
c71099ac 2382 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2383 ip6_del_rt(rt);
1da177e4
LT
2384 goto restart;
2385 }
2386 }
c71099ac 2387 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2388}
2389
5578689a
DL
2390static void rtmsg_to_fib6_config(struct net *net,
2391 struct in6_rtmsg *rtmsg,
86872cb5
TG
2392 struct fib6_config *cfg)
2393{
2394 memset(cfg, 0, sizeof(*cfg));
2395
ca254490
DA
2396 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2397 : RT6_TABLE_MAIN;
86872cb5
TG
2398 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2399 cfg->fc_metric = rtmsg->rtmsg_metric;
2400 cfg->fc_expires = rtmsg->rtmsg_info;
2401 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2402 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2403 cfg->fc_flags = rtmsg->rtmsg_flags;
2404
5578689a 2405 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2406
4e3fd7a0
AD
2407 cfg->fc_dst = rtmsg->rtmsg_dst;
2408 cfg->fc_src = rtmsg->rtmsg_src;
2409 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2410}
2411
5578689a 2412int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2413{
86872cb5 2414 struct fib6_config cfg;
1da177e4
LT
2415 struct in6_rtmsg rtmsg;
2416 int err;
2417
67ba4152 2418 switch (cmd) {
1da177e4
LT
2419 case SIOCADDRT: /* Add a route */
2420 case SIOCDELRT: /* Delete a route */
af31f412 2421 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2422 return -EPERM;
2423 err = copy_from_user(&rtmsg, arg,
2424 sizeof(struct in6_rtmsg));
2425 if (err)
2426 return -EFAULT;
86872cb5 2427
5578689a 2428 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2429
1da177e4
LT
2430 rtnl_lock();
2431 switch (cmd) {
2432 case SIOCADDRT:
86872cb5 2433 err = ip6_route_add(&cfg);
1da177e4
LT
2434 break;
2435 case SIOCDELRT:
86872cb5 2436 err = ip6_route_del(&cfg);
1da177e4
LT
2437 break;
2438 default:
2439 err = -EINVAL;
2440 }
2441 rtnl_unlock();
2442
2443 return err;
3ff50b79 2444 }
1da177e4
LT
2445
2446 return -EINVAL;
2447}
2448
2449/*
2450 * Drop the packet on the floor
2451 */
2452
d5fdd6ba 2453static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2454{
612f09e8 2455 int type;
adf30907 2456 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2457 switch (ipstats_mib_noroutes) {
2458 case IPSTATS_MIB_INNOROUTES:
0660e03f 2459 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2460 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2461 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2462 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2463 break;
2464 }
2465 /* FALLTHROUGH */
2466 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2467 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2468 ipstats_mib_noroutes);
612f09e8
YH
2469 break;
2470 }
3ffe533c 2471 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2472 kfree_skb(skb);
2473 return 0;
2474}
2475
9ce8ade0
TG
2476static int ip6_pkt_discard(struct sk_buff *skb)
2477{
612f09e8 2478 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2479}
2480
ede2059d 2481static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 2482{
adf30907 2483 skb->dev = skb_dst(skb)->dev;
612f09e8 2484 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2485}
2486
9ce8ade0
TG
2487static int ip6_pkt_prohibit(struct sk_buff *skb)
2488{
612f09e8 2489 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2490}
2491
ede2059d 2492static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 2493{
adf30907 2494 skb->dev = skb_dst(skb)->dev;
612f09e8 2495 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2496}
2497
1da177e4
LT
2498/*
2499 * Allocate a dst for local (unicast / anycast) address.
2500 */
2501
2502struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2503 const struct in6_addr *addr,
8f031519 2504 bool anycast)
1da177e4 2505{
ca254490 2506 u32 tb_id;
c346dca1 2507 struct net *net = dev_net(idev->dev);
a3300ef4 2508 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
ad706862 2509 DST_NOCOUNT);
a3300ef4 2510 if (!rt)
1da177e4
LT
2511 return ERR_PTR(-ENOMEM);
2512
1da177e4
LT
2513 in6_dev_hold(idev);
2514
11d53b49 2515 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2516 rt->dst.input = ip6_input;
2517 rt->dst.output = ip6_output;
1da177e4 2518 rt->rt6i_idev = idev;
1da177e4
LT
2519
2520 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2521 if (anycast)
2522 rt->rt6i_flags |= RTF_ANYCAST;
2523 else
1da177e4 2524 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2525
550bab42 2526 rt->rt6i_gateway = *addr;
4e3fd7a0 2527 rt->rt6i_dst.addr = *addr;
1da177e4 2528 rt->rt6i_dst.plen = 128;
ca254490
DA
2529 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2530 rt->rt6i_table = fib6_get_table(net, tb_id);
8e3d5be7 2531 rt->dst.flags |= DST_NOCACHE;
1da177e4 2532
d8d1f30b 2533 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2534
2535 return rt;
2536}
2537
c3968a85
DW
2538int ip6_route_get_saddr(struct net *net,
2539 struct rt6_info *rt,
b71d1d42 2540 const struct in6_addr *daddr,
c3968a85
DW
2541 unsigned int prefs,
2542 struct in6_addr *saddr)
2543{
e16e888b
MS
2544 struct inet6_dev *idev =
2545 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
c3968a85 2546 int err = 0;
e16e888b 2547 if (rt && rt->rt6i_prefsrc.plen)
4e3fd7a0 2548 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2549 else
2550 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2551 daddr, prefs, saddr);
2552 return err;
2553}
2554
2555/* remove deleted ip from prefsrc entries */
2556struct arg_dev_net_ip {
2557 struct net_device *dev;
2558 struct net *net;
2559 struct in6_addr *addr;
2560};
2561
2562static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2563{
2564 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2565 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2566 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2567
d1918542 2568 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2569 rt != net->ipv6.ip6_null_entry &&
2570 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2571 /* remove prefsrc entry */
2572 rt->rt6i_prefsrc.plen = 0;
2573 }
2574 return 0;
2575}
2576
2577void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2578{
2579 struct net *net = dev_net(ifp->idev->dev);
2580 struct arg_dev_net_ip adni = {
2581 .dev = ifp->idev->dev,
2582 .net = net,
2583 .addr = &ifp->addr,
2584 };
0c3584d5 2585 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2586}
2587
be7a010d
DJ
2588#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2589#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2590
2591/* Remove routers and update dst entries when gateway turn into host. */
2592static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2593{
2594 struct in6_addr *gateway = (struct in6_addr *)arg;
2595
2596 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2597 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2598 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2599 return -1;
2600 }
2601 return 0;
2602}
2603
2604void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2605{
2606 fib6_clean_all(net, fib6_clean_tohost, gateway);
2607}
2608
8ed67789
DL
2609struct arg_dev_net {
2610 struct net_device *dev;
2611 struct net *net;
2612};
2613
1da177e4
LT
2614static int fib6_ifdown(struct rt6_info *rt, void *arg)
2615{
bc3ef660 2616 const struct arg_dev_net *adn = arg;
2617 const struct net_device *dev = adn->dev;
8ed67789 2618
d1918542 2619 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2620 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2621 return -1;
c159d30c 2622
1da177e4
LT
2623 return 0;
2624}
2625
f3db4851 2626void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2627{
8ed67789
DL
2628 struct arg_dev_net adn = {
2629 .dev = dev,
2630 .net = net,
2631 };
2632
0c3584d5 2633 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2634 icmp6_clean_all(fib6_ifdown, &adn);
e332bc67
EB
2635 if (dev)
2636 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
2637}
2638
95c96174 2639struct rt6_mtu_change_arg {
1da177e4 2640 struct net_device *dev;
95c96174 2641 unsigned int mtu;
1da177e4
LT
2642};
2643
2644static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2645{
2646 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2647 struct inet6_dev *idev;
2648
2649 /* In IPv6 pmtu discovery is not optional,
2650 so that RTAX_MTU lock cannot disable it.
2651 We still use this lock to block changes
2652 caused by addrconf/ndisc.
2653 */
2654
2655 idev = __in6_dev_get(arg->dev);
38308473 2656 if (!idev)
1da177e4
LT
2657 return 0;
2658
2659 /* For administrative MTU increase, there is no way to discover
2660 IPv6 PMTU increase, so PMTU increase should be updated here.
2661 Since RFC 1981 doesn't include administrative MTU increase
2662 update PMTU increase is a MUST. (i.e. jumbo frame)
2663 */
2664 /*
2665 If new MTU is less than route PMTU, this new MTU will be the
2666 lowest MTU in the path, update the route PMTU to reflect PMTU
2667 decreases; if new MTU is greater than route PMTU, and the
2668 old MTU is the lowest MTU in the path, update the route PMTU
2669 to reflect the increase. In this case if the other nodes' MTU
2670 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2671 PMTU discouvery.
2672 */
d1918542 2673 if (rt->dst.dev == arg->dev &&
4b32b5ad
MKL
2674 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2675 if (rt->rt6i_flags & RTF_CACHE) {
2676 /* For RTF_CACHE with rt6i_pmtu == 0
2677 * (i.e. a redirected route),
2678 * the metrics of its rt->dst.from has already
2679 * been updated.
2680 */
2681 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2682 rt->rt6i_pmtu = arg->mtu;
2683 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2684 (dst_mtu(&rt->dst) < arg->mtu &&
2685 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2686 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2687 }
566cfd8f 2688 }
1da177e4
LT
2689 return 0;
2690}
2691
95c96174 2692void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2693{
c71099ac
TG
2694 struct rt6_mtu_change_arg arg = {
2695 .dev = dev,
2696 .mtu = mtu,
2697 };
1da177e4 2698
0c3584d5 2699 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2700}
2701
ef7c79ed 2702static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2703 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2704 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2705 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2706 [RTA_PRIORITY] = { .type = NLA_U32 },
2707 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2708 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2709 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
2710 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2711 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 2712 [RTA_EXPIRES] = { .type = NLA_U32 },
86872cb5
TG
2713};
2714
2715static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2716 struct fib6_config *cfg)
1da177e4 2717{
86872cb5
TG
2718 struct rtmsg *rtm;
2719 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2720 unsigned int pref;
86872cb5 2721 int err;
1da177e4 2722
86872cb5
TG
2723 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2724 if (err < 0)
2725 goto errout;
1da177e4 2726
86872cb5
TG
2727 err = -EINVAL;
2728 rtm = nlmsg_data(nlh);
2729 memset(cfg, 0, sizeof(*cfg));
2730
2731 cfg->fc_table = rtm->rtm_table;
2732 cfg->fc_dst_len = rtm->rtm_dst_len;
2733 cfg->fc_src_len = rtm->rtm_src_len;
2734 cfg->fc_flags = RTF_UP;
2735 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2736 cfg->fc_type = rtm->rtm_type;
86872cb5 2737
ef2c7d7b
ND
2738 if (rtm->rtm_type == RTN_UNREACHABLE ||
2739 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2740 rtm->rtm_type == RTN_PROHIBIT ||
2741 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2742 cfg->fc_flags |= RTF_REJECT;
2743
ab79ad14
2744 if (rtm->rtm_type == RTN_LOCAL)
2745 cfg->fc_flags |= RTF_LOCAL;
2746
1f56a01f
MKL
2747 if (rtm->rtm_flags & RTM_F_CLONED)
2748 cfg->fc_flags |= RTF_CACHE;
2749
15e47304 2750 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2751 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2752 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2753
2754 if (tb[RTA_GATEWAY]) {
67b61f6c 2755 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2756 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2757 }
86872cb5
TG
2758
2759 if (tb[RTA_DST]) {
2760 int plen = (rtm->rtm_dst_len + 7) >> 3;
2761
2762 if (nla_len(tb[RTA_DST]) < plen)
2763 goto errout;
2764
2765 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2766 }
86872cb5
TG
2767
2768 if (tb[RTA_SRC]) {
2769 int plen = (rtm->rtm_src_len + 7) >> 3;
2770
2771 if (nla_len(tb[RTA_SRC]) < plen)
2772 goto errout;
2773
2774 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2775 }
86872cb5 2776
c3968a85 2777 if (tb[RTA_PREFSRC])
67b61f6c 2778 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 2779
86872cb5
TG
2780 if (tb[RTA_OIF])
2781 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2782
2783 if (tb[RTA_PRIORITY])
2784 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2785
2786 if (tb[RTA_METRICS]) {
2787 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2788 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2789 }
86872cb5
TG
2790
2791 if (tb[RTA_TABLE])
2792 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2793
51ebd318
ND
2794 if (tb[RTA_MULTIPATH]) {
2795 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2796 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2797 }
2798
c78ba6d6
LR
2799 if (tb[RTA_PREF]) {
2800 pref = nla_get_u8(tb[RTA_PREF]);
2801 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2802 pref != ICMPV6_ROUTER_PREF_HIGH)
2803 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2804 cfg->fc_flags |= RTF_PREF(pref);
2805 }
2806
19e42e45
RP
2807 if (tb[RTA_ENCAP])
2808 cfg->fc_encap = tb[RTA_ENCAP];
2809
2810 if (tb[RTA_ENCAP_TYPE])
2811 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2812
32bc201e
XL
2813 if (tb[RTA_EXPIRES]) {
2814 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2815
2816 if (addrconf_finite_timeout(timeout)) {
2817 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2818 cfg->fc_flags |= RTF_EXPIRES;
2819 }
2820 }
2821
86872cb5
TG
2822 err = 0;
2823errout:
2824 return err;
1da177e4
LT
2825}
2826
6b9ea5a6
RP
2827struct rt6_nh {
2828 struct rt6_info *rt6_info;
2829 struct fib6_config r_cfg;
2830 struct mx6_config mxc;
2831 struct list_head next;
2832};
2833
2834static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2835{
2836 struct rt6_nh *nh;
2837
2838 list_for_each_entry(nh, rt6_nh_list, next) {
2839 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2840 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2841 nh->r_cfg.fc_ifindex);
2842 }
2843}
2844
2845static int ip6_route_info_append(struct list_head *rt6_nh_list,
2846 struct rt6_info *rt, struct fib6_config *r_cfg)
2847{
2848 struct rt6_nh *nh;
2849 struct rt6_info *rtnh;
2850 int err = -EEXIST;
2851
2852 list_for_each_entry(nh, rt6_nh_list, next) {
2853 /* check if rt6_info already exists */
2854 rtnh = nh->rt6_info;
2855
2856 if (rtnh->dst.dev == rt->dst.dev &&
2857 rtnh->rt6i_idev == rt->rt6i_idev &&
2858 ipv6_addr_equal(&rtnh->rt6i_gateway,
2859 &rt->rt6i_gateway))
2860 return err;
2861 }
2862
2863 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2864 if (!nh)
2865 return -ENOMEM;
2866 nh->rt6_info = rt;
2867 err = ip6_convert_metrics(&nh->mxc, r_cfg);
2868 if (err) {
2869 kfree(nh);
2870 return err;
2871 }
2872 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2873 list_add_tail(&nh->next, rt6_nh_list);
2874
2875 return 0;
2876}
2877
2878static int ip6_route_multipath_add(struct fib6_config *cfg)
51ebd318
ND
2879{
2880 struct fib6_config r_cfg;
2881 struct rtnexthop *rtnh;
6b9ea5a6
RP
2882 struct rt6_info *rt;
2883 struct rt6_nh *err_nh;
2884 struct rt6_nh *nh, *nh_safe;
51ebd318
ND
2885 int remaining;
2886 int attrlen;
6b9ea5a6
RP
2887 int err = 1;
2888 int nhn = 0;
2889 int replace = (cfg->fc_nlinfo.nlh &&
2890 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2891 LIST_HEAD(rt6_nh_list);
51ebd318 2892
35f1b4e9 2893 remaining = cfg->fc_mp_len;
51ebd318 2894 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 2895
6b9ea5a6
RP
2896 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2897 * rt6_info structs per nexthop
2898 */
51ebd318
ND
2899 while (rtnh_ok(rtnh, remaining)) {
2900 memcpy(&r_cfg, cfg, sizeof(*cfg));
2901 if (rtnh->rtnh_ifindex)
2902 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2903
2904 attrlen = rtnh_attrlen(rtnh);
2905 if (attrlen > 0) {
2906 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2907
2908 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2909 if (nla) {
67b61f6c 2910 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
2911 r_cfg.fc_flags |= RTF_GATEWAY;
2912 }
19e42e45
RP
2913 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2914 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2915 if (nla)
2916 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 2917 }
6b9ea5a6 2918
8c5b83f0
RP
2919 rt = ip6_route_info_create(&r_cfg);
2920 if (IS_ERR(rt)) {
2921 err = PTR_ERR(rt);
2922 rt = NULL;
6b9ea5a6 2923 goto cleanup;
8c5b83f0 2924 }
6b9ea5a6
RP
2925
2926 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 2927 if (err) {
6b9ea5a6
RP
2928 dst_free(&rt->dst);
2929 goto cleanup;
2930 }
2931
2932 rtnh = rtnh_next(rtnh, &remaining);
2933 }
2934
2935 err_nh = NULL;
2936 list_for_each_entry(nh, &rt6_nh_list, next) {
2937 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2938 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2939 nh->rt6_info = NULL;
2940 if (err) {
2941 if (replace && nhn)
2942 ip6_print_replace_route_err(&rt6_nh_list);
2943 err_nh = nh;
2944 goto add_errout;
51ebd318 2945 }
6b9ea5a6 2946
1a72418b 2947 /* Because each route is added like a single route we remove
27596472
MK
2948 * these flags after the first nexthop: if there is a collision,
2949 * we have already failed to add the first nexthop:
2950 * fib6_add_rt2node() has rejected it; when replacing, old
2951 * nexthops have been replaced by first new, the rest should
2952 * be added to it.
1a72418b 2953 */
27596472
MK
2954 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2955 NLM_F_REPLACE);
6b9ea5a6
RP
2956 nhn++;
2957 }
2958
2959 goto cleanup;
2960
2961add_errout:
2962 /* Delete routes that were already added */
2963 list_for_each_entry(nh, &rt6_nh_list, next) {
2964 if (err_nh == nh)
2965 break;
2966 ip6_route_del(&nh->r_cfg);
2967 }
2968
2969cleanup:
2970 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2971 if (nh->rt6_info)
2972 dst_free(&nh->rt6_info->dst);
52fe51f8 2973 kfree(nh->mxc.mx);
6b9ea5a6
RP
2974 list_del(&nh->next);
2975 kfree(nh);
2976 }
2977
2978 return err;
2979}
2980
2981static int ip6_route_multipath_del(struct fib6_config *cfg)
2982{
2983 struct fib6_config r_cfg;
2984 struct rtnexthop *rtnh;
2985 int remaining;
2986 int attrlen;
2987 int err = 1, last_err = 0;
2988
2989 remaining = cfg->fc_mp_len;
2990 rtnh = (struct rtnexthop *)cfg->fc_mp;
2991
2992 /* Parse a Multipath Entry */
2993 while (rtnh_ok(rtnh, remaining)) {
2994 memcpy(&r_cfg, cfg, sizeof(*cfg));
2995 if (rtnh->rtnh_ifindex)
2996 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2997
2998 attrlen = rtnh_attrlen(rtnh);
2999 if (attrlen > 0) {
3000 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3001
3002 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3003 if (nla) {
3004 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3005 r_cfg.fc_flags |= RTF_GATEWAY;
3006 }
3007 }
3008 err = ip6_route_del(&r_cfg);
3009 if (err)
3010 last_err = err;
3011
51ebd318
ND
3012 rtnh = rtnh_next(rtnh, &remaining);
3013 }
3014
3015 return last_err;
3016}
3017
67ba4152 3018static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3019{
86872cb5
TG
3020 struct fib6_config cfg;
3021 int err;
1da177e4 3022
86872cb5
TG
3023 err = rtm_to_fib6_config(skb, nlh, &cfg);
3024 if (err < 0)
3025 return err;
3026
51ebd318 3027 if (cfg.fc_mp)
6b9ea5a6 3028 return ip6_route_multipath_del(&cfg);
51ebd318
ND
3029 else
3030 return ip6_route_del(&cfg);
1da177e4
LT
3031}
3032
67ba4152 3033static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3034{
86872cb5
TG
3035 struct fib6_config cfg;
3036 int err;
1da177e4 3037
86872cb5
TG
3038 err = rtm_to_fib6_config(skb, nlh, &cfg);
3039 if (err < 0)
3040 return err;
3041
51ebd318 3042 if (cfg.fc_mp)
6b9ea5a6 3043 return ip6_route_multipath_add(&cfg);
51ebd318
ND
3044 else
3045 return ip6_route_add(&cfg);
1da177e4
LT
3046}
3047
19e42e45 3048static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f
TG
3049{
3050 return NLMSG_ALIGN(sizeof(struct rtmsg))
3051 + nla_total_size(16) /* RTA_SRC */
3052 + nla_total_size(16) /* RTA_DST */
3053 + nla_total_size(16) /* RTA_GATEWAY */
3054 + nla_total_size(16) /* RTA_PREFSRC */
3055 + nla_total_size(4) /* RTA_TABLE */
3056 + nla_total_size(4) /* RTA_IIF */
3057 + nla_total_size(4) /* RTA_OIF */
3058 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3059 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3060 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3061 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3062 + nla_total_size(1) /* RTA_PREF */
61adedf3 3063 + lwtunnel_get_encap_size(rt->dst.lwtstate);
339bf98f
TG
3064}
3065
191cd582
BH
3066static int rt6_fill_node(struct net *net,
3067 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 3068 struct in6_addr *dst, struct in6_addr *src,
15e47304 3069 int iif, int type, u32 portid, u32 seq,
7bc570c8 3070 int prefix, int nowait, unsigned int flags)
1da177e4 3071{
4b32b5ad 3072 u32 metrics[RTAX_MAX];
1da177e4 3073 struct rtmsg *rtm;
2d7202bf 3074 struct nlmsghdr *nlh;
e3703b3d 3075 long expires;
9e762a4a 3076 u32 table;
1da177e4
LT
3077
3078 if (prefix) { /* user wants prefix routes only */
3079 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3080 /* success since this is not a prefix route */
3081 return 1;
3082 }
3083 }
3084
15e47304 3085 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 3086 if (!nlh)
26932566 3087 return -EMSGSIZE;
2d7202bf
TG
3088
3089 rtm = nlmsg_data(nlh);
1da177e4
LT
3090 rtm->rtm_family = AF_INET6;
3091 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3092 rtm->rtm_src_len = rt->rt6i_src.plen;
3093 rtm->rtm_tos = 0;
c71099ac 3094 if (rt->rt6i_table)
9e762a4a 3095 table = rt->rt6i_table->tb6_id;
c71099ac 3096 else
9e762a4a
PM
3097 table = RT6_TABLE_UNSPEC;
3098 rtm->rtm_table = table;
c78679e8
DM
3099 if (nla_put_u32(skb, RTA_TABLE, table))
3100 goto nla_put_failure;
ef2c7d7b
ND
3101 if (rt->rt6i_flags & RTF_REJECT) {
3102 switch (rt->dst.error) {
3103 case -EINVAL:
3104 rtm->rtm_type = RTN_BLACKHOLE;
3105 break;
3106 case -EACCES:
3107 rtm->rtm_type = RTN_PROHIBIT;
3108 break;
b4949ab2
ND
3109 case -EAGAIN:
3110 rtm->rtm_type = RTN_THROW;
3111 break;
ef2c7d7b
ND
3112 default:
3113 rtm->rtm_type = RTN_UNREACHABLE;
3114 break;
3115 }
3116 }
38308473 3117 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 3118 rtm->rtm_type = RTN_LOCAL;
d1918542 3119 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
3120 rtm->rtm_type = RTN_LOCAL;
3121 else
3122 rtm->rtm_type = RTN_UNICAST;
3123 rtm->rtm_flags = 0;
35103d11 3124 if (!netif_carrier_ok(rt->dst.dev)) {
cea45e20 3125 rtm->rtm_flags |= RTNH_F_LINKDOWN;
35103d11
AG
3126 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3127 rtm->rtm_flags |= RTNH_F_DEAD;
3128 }
1da177e4
LT
3129 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3130 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 3131 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 3132 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
3133 else if (rt->rt6i_flags & RTF_ADDRCONF) {
3134 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3135 rtm->rtm_protocol = RTPROT_RA;
3136 else
3137 rtm->rtm_protocol = RTPROT_KERNEL;
3138 }
1da177e4 3139
38308473 3140 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
3141 rtm->rtm_flags |= RTM_F_CLONED;
3142
3143 if (dst) {
930345ea 3144 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 3145 goto nla_put_failure;
1ab1457c 3146 rtm->rtm_dst_len = 128;
1da177e4 3147 } else if (rtm->rtm_dst_len)
930345ea 3148 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 3149 goto nla_put_failure;
1da177e4
LT
3150#ifdef CONFIG_IPV6_SUBTREES
3151 if (src) {
930345ea 3152 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 3153 goto nla_put_failure;
1ab1457c 3154 rtm->rtm_src_len = 128;
c78679e8 3155 } else if (rtm->rtm_src_len &&
930345ea 3156 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 3157 goto nla_put_failure;
1da177e4 3158#endif
7bc570c8
YH
3159 if (iif) {
3160#ifdef CONFIG_IPV6_MROUTE
3161 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 3162 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
3163 if (err <= 0) {
3164 if (!nowait) {
3165 if (err == 0)
3166 return 0;
3167 goto nla_put_failure;
3168 } else {
3169 if (err == -EMSGSIZE)
3170 goto nla_put_failure;
3171 }
3172 }
3173 } else
3174#endif
c78679e8
DM
3175 if (nla_put_u32(skb, RTA_IIF, iif))
3176 goto nla_put_failure;
7bc570c8 3177 } else if (dst) {
1da177e4 3178 struct in6_addr saddr_buf;
c78679e8 3179 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 3180 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3181 goto nla_put_failure;
1da177e4 3182 }
2d7202bf 3183
c3968a85
DW
3184 if (rt->rt6i_prefsrc.plen) {
3185 struct in6_addr saddr_buf;
4e3fd7a0 3186 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 3187 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3188 goto nla_put_failure;
c3968a85
DW
3189 }
3190
4b32b5ad
MKL
3191 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3192 if (rt->rt6i_pmtu)
3193 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3194 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
3195 goto nla_put_failure;
3196
dd0cbf29 3197 if (rt->rt6i_flags & RTF_GATEWAY) {
930345ea 3198 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
94f826b8 3199 goto nla_put_failure;
94f826b8 3200 }
2d7202bf 3201
c78679e8
DM
3202 if (rt->dst.dev &&
3203 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3204 goto nla_put_failure;
3205 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3206 goto nla_put_failure;
8253947e
LW
3207
3208 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 3209
87a50699 3210 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 3211 goto nla_put_failure;
2d7202bf 3212
c78ba6d6
LR
3213 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3214 goto nla_put_failure;
3215
61adedf3 3216 lwtunnel_fill_encap(skb, rt->dst.lwtstate);
19e42e45 3217
053c095a
JB
3218 nlmsg_end(skb, nlh);
3219 return 0;
2d7202bf
TG
3220
3221nla_put_failure:
26932566
PM
3222 nlmsg_cancel(skb, nlh);
3223 return -EMSGSIZE;
1da177e4
LT
3224}
3225
1b43af54 3226int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
3227{
3228 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3229 int prefix;
3230
2d7202bf
TG
3231 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3232 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
3233 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3234 } else
3235 prefix = 0;
3236
191cd582
BH
3237 return rt6_fill_node(arg->net,
3238 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 3239 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 3240 prefix, 0, NLM_F_MULTI);
1da177e4
LT
3241}
3242
67ba4152 3243static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 3244{
3b1e0a65 3245 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
3246 struct nlattr *tb[RTA_MAX+1];
3247 struct rt6_info *rt;
1da177e4 3248 struct sk_buff *skb;
ab364a6f 3249 struct rtmsg *rtm;
4c9483b2 3250 struct flowi6 fl6;
72331bc0 3251 int err, iif = 0, oif = 0;
1da177e4 3252
ab364a6f
TG
3253 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3254 if (err < 0)
3255 goto errout;
1da177e4 3256
ab364a6f 3257 err = -EINVAL;
4c9483b2 3258 memset(&fl6, 0, sizeof(fl6));
1da177e4 3259
ab364a6f
TG
3260 if (tb[RTA_SRC]) {
3261 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3262 goto errout;
3263
4e3fd7a0 3264 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
3265 }
3266
3267 if (tb[RTA_DST]) {
3268 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3269 goto errout;
3270
4e3fd7a0 3271 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
3272 }
3273
3274 if (tb[RTA_IIF])
3275 iif = nla_get_u32(tb[RTA_IIF]);
3276
3277 if (tb[RTA_OIF])
72331bc0 3278 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 3279
2e47b291
LC
3280 if (tb[RTA_MARK])
3281 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3282
1da177e4
LT
3283 if (iif) {
3284 struct net_device *dev;
72331bc0
SL
3285 int flags = 0;
3286
5578689a 3287 dev = __dev_get_by_index(net, iif);
1da177e4
LT
3288 if (!dev) {
3289 err = -ENODEV;
ab364a6f 3290 goto errout;
1da177e4 3291 }
72331bc0
SL
3292
3293 fl6.flowi6_iif = iif;
3294
3295 if (!ipv6_addr_any(&fl6.saddr))
3296 flags |= RT6_LOOKUP_F_HAS_SADDR;
3297
3298 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3299 flags);
3300 } else {
3301 fl6.flowi6_oif = oif;
3302
ca254490
DA
3303 if (netif_index_is_l3_master(net, oif)) {
3304 fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3305 FLOWI_FLAG_SKIP_NH_OIF;
3306 }
3307
72331bc0 3308 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
3309 }
3310
ab364a6f 3311 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 3312 if (!skb) {
94e187c0 3313 ip6_rt_put(rt);
ab364a6f
TG
3314 err = -ENOBUFS;
3315 goto errout;
3316 }
1da177e4 3317
ab364a6f
TG
3318 /* Reserve room for dummy headers, this skb can pass
3319 through good chunk of routing engine.
3320 */
459a98ed 3321 skb_reset_mac_header(skb);
ab364a6f 3322 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 3323
d8d1f30b 3324 skb_dst_set(skb, &rt->dst);
1da177e4 3325
4c9483b2 3326 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 3327 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 3328 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 3329 if (err < 0) {
ab364a6f
TG
3330 kfree_skb(skb);
3331 goto errout;
1da177e4
LT
3332 }
3333
15e47304 3334 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 3335errout:
1da177e4 3336 return err;
1da177e4
LT
3337}
3338
37a1d361
RP
3339void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3340 unsigned int nlm_flags)
1da177e4
LT
3341{
3342 struct sk_buff *skb;
5578689a 3343 struct net *net = info->nl_net;
528c4ceb
DL
3344 u32 seq;
3345 int err;
3346
3347 err = -ENOBUFS;
38308473 3348 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 3349
19e42e45 3350 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 3351 if (!skb)
21713ebc
TG
3352 goto errout;
3353
191cd582 3354 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
37a1d361 3355 event, info->portid, seq, 0, 0, nlm_flags);
26932566
PM
3356 if (err < 0) {
3357 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3358 WARN_ON(err == -EMSGSIZE);
3359 kfree_skb(skb);
3360 goto errout;
3361 }
15e47304 3362 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
3363 info->nlh, gfp_any());
3364 return;
21713ebc
TG
3365errout:
3366 if (err < 0)
5578689a 3367 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
3368}
3369
8ed67789 3370static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 3371 unsigned long event, void *ptr)
8ed67789 3372{
351638e7 3373 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 3374 struct net *net = dev_net(dev);
8ed67789
DL
3375
3376 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 3377 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
3378 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3379#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3380 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 3381 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 3382 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
3383 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3384#endif
3385 }
3386
3387 return NOTIFY_OK;
3388}
3389
1da177e4
LT
3390/*
3391 * /proc
3392 */
3393
3394#ifdef CONFIG_PROC_FS
3395
33120b30
AD
3396static const struct file_operations ipv6_route_proc_fops = {
3397 .owner = THIS_MODULE,
3398 .open = ipv6_route_open,
3399 .read = seq_read,
3400 .llseek = seq_lseek,
8d2ca1d7 3401 .release = seq_release_net,
33120b30
AD
3402};
3403
1da177e4
LT
3404static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3405{
69ddb805 3406 struct net *net = (struct net *)seq->private;
1da177e4 3407 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
3408 net->ipv6.rt6_stats->fib_nodes,
3409 net->ipv6.rt6_stats->fib_route_nodes,
3410 net->ipv6.rt6_stats->fib_rt_alloc,
3411 net->ipv6.rt6_stats->fib_rt_entries,
3412 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 3413 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 3414 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
3415
3416 return 0;
3417}
3418
3419static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3420{
de05c557 3421 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
3422}
3423
9a32144e 3424static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
3425 .owner = THIS_MODULE,
3426 .open = rt6_stats_seq_open,
3427 .read = seq_read,
3428 .llseek = seq_lseek,
b6fcbdb4 3429 .release = single_release_net,
1da177e4
LT
3430};
3431#endif /* CONFIG_PROC_FS */
3432
3433#ifdef CONFIG_SYSCTL
3434
1da177e4 3435static
fe2c6338 3436int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
3437 void __user *buffer, size_t *lenp, loff_t *ppos)
3438{
c486da34
LAG
3439 struct net *net;
3440 int delay;
3441 if (!write)
1da177e4 3442 return -EINVAL;
c486da34
LAG
3443
3444 net = (struct net *)ctl->extra1;
3445 delay = net->ipv6.sysctl.flush_delay;
3446 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3447 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3448 return 0;
1da177e4
LT
3449}
3450
fe2c6338 3451struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3452 {
1da177e4 3453 .procname = "flush",
4990509f 3454 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3455 .maxlen = sizeof(int),
89c8b3a1 3456 .mode = 0200,
6d9f239a 3457 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3458 },
3459 {
1da177e4 3460 .procname = "gc_thresh",
9a7ec3a9 3461 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3462 .maxlen = sizeof(int),
3463 .mode = 0644,
6d9f239a 3464 .proc_handler = proc_dointvec,
1da177e4
LT
3465 },
3466 {
1da177e4 3467 .procname = "max_size",
4990509f 3468 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3469 .maxlen = sizeof(int),
3470 .mode = 0644,
6d9f239a 3471 .proc_handler = proc_dointvec,
1da177e4
LT
3472 },
3473 {
1da177e4 3474 .procname = "gc_min_interval",
4990509f 3475 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3476 .maxlen = sizeof(int),
3477 .mode = 0644,
6d9f239a 3478 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3479 },
3480 {
1da177e4 3481 .procname = "gc_timeout",
4990509f 3482 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3483 .maxlen = sizeof(int),
3484 .mode = 0644,
6d9f239a 3485 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3486 },
3487 {
1da177e4 3488 .procname = "gc_interval",
4990509f 3489 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3490 .maxlen = sizeof(int),
3491 .mode = 0644,
6d9f239a 3492 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3493 },
3494 {
1da177e4 3495 .procname = "gc_elasticity",
4990509f 3496 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3497 .maxlen = sizeof(int),
3498 .mode = 0644,
f3d3f616 3499 .proc_handler = proc_dointvec,
1da177e4
LT
3500 },
3501 {
1da177e4 3502 .procname = "mtu_expires",
4990509f 3503 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3504 .maxlen = sizeof(int),
3505 .mode = 0644,
6d9f239a 3506 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3507 },
3508 {
1da177e4 3509 .procname = "min_adv_mss",
4990509f 3510 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3511 .maxlen = sizeof(int),
3512 .mode = 0644,
f3d3f616 3513 .proc_handler = proc_dointvec,
1da177e4
LT
3514 },
3515 {
1da177e4 3516 .procname = "gc_min_interval_ms",
4990509f 3517 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3518 .maxlen = sizeof(int),
3519 .mode = 0644,
6d9f239a 3520 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3521 },
f8572d8f 3522 { }
1da177e4
LT
3523};
3524
2c8c1e72 3525struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3526{
3527 struct ctl_table *table;
3528
3529 table = kmemdup(ipv6_route_table_template,
3530 sizeof(ipv6_route_table_template),
3531 GFP_KERNEL);
5ee09105
YH
3532
3533 if (table) {
3534 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3535 table[0].extra1 = net;
86393e52 3536 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3537 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3538 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3539 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3540 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3541 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3542 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3543 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3544 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3545
3546 /* Don't export sysctls to unprivileged users */
3547 if (net->user_ns != &init_user_ns)
3548 table[0].procname = NULL;
5ee09105
YH
3549 }
3550
760f2d01
DL
3551 return table;
3552}
1da177e4
LT
3553#endif
3554
2c8c1e72 3555static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3556{
633d424b 3557 int ret = -ENOMEM;
8ed67789 3558
86393e52
AD
3559 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3560 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3561
fc66f95c
ED
3562 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3563 goto out_ip6_dst_ops;
3564
8ed67789
DL
3565 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3566 sizeof(*net->ipv6.ip6_null_entry),
3567 GFP_KERNEL);
3568 if (!net->ipv6.ip6_null_entry)
fc66f95c 3569 goto out_ip6_dst_entries;
d8d1f30b 3570 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3571 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3572 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3573 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3574 ip6_template_metrics, true);
8ed67789
DL
3575
3576#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3577 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3578 sizeof(*net->ipv6.ip6_prohibit_entry),
3579 GFP_KERNEL);
68fffc67
PZ
3580 if (!net->ipv6.ip6_prohibit_entry)
3581 goto out_ip6_null_entry;
d8d1f30b 3582 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3583 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3584 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3585 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3586 ip6_template_metrics, true);
8ed67789
DL
3587
3588 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3589 sizeof(*net->ipv6.ip6_blk_hole_entry),
3590 GFP_KERNEL);
68fffc67
PZ
3591 if (!net->ipv6.ip6_blk_hole_entry)
3592 goto out_ip6_prohibit_entry;
d8d1f30b 3593 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3594 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3595 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3596 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3597 ip6_template_metrics, true);
8ed67789
DL
3598#endif
3599
b339a47c
PZ
3600 net->ipv6.sysctl.flush_delay = 0;
3601 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3602 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3603 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3604 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3605 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3606 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3607 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3608
6891a346
BT
3609 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3610
8ed67789
DL
3611 ret = 0;
3612out:
3613 return ret;
f2fc6a54 3614
68fffc67
PZ
3615#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3616out_ip6_prohibit_entry:
3617 kfree(net->ipv6.ip6_prohibit_entry);
3618out_ip6_null_entry:
3619 kfree(net->ipv6.ip6_null_entry);
3620#endif
fc66f95c
ED
3621out_ip6_dst_entries:
3622 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3623out_ip6_dst_ops:
f2fc6a54 3624 goto out;
cdb18761
DL
3625}
3626
2c8c1e72 3627static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3628{
8ed67789
DL
3629 kfree(net->ipv6.ip6_null_entry);
3630#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3631 kfree(net->ipv6.ip6_prohibit_entry);
3632 kfree(net->ipv6.ip6_blk_hole_entry);
3633#endif
41bb78b4 3634 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3635}
3636
d189634e
TG
3637static int __net_init ip6_route_net_init_late(struct net *net)
3638{
3639#ifdef CONFIG_PROC_FS
d4beaa66
G
3640 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3641 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
3642#endif
3643 return 0;
3644}
3645
3646static void __net_exit ip6_route_net_exit_late(struct net *net)
3647{
3648#ifdef CONFIG_PROC_FS
ece31ffd
G
3649 remove_proc_entry("ipv6_route", net->proc_net);
3650 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
3651#endif
3652}
3653
cdb18761
DL
3654static struct pernet_operations ip6_route_net_ops = {
3655 .init = ip6_route_net_init,
3656 .exit = ip6_route_net_exit,
3657};
3658
c3426b47
DM
3659static int __net_init ipv6_inetpeer_init(struct net *net)
3660{
3661 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3662
3663 if (!bp)
3664 return -ENOMEM;
3665 inet_peer_base_init(bp);
3666 net->ipv6.peers = bp;
3667 return 0;
3668}
3669
3670static void __net_exit ipv6_inetpeer_exit(struct net *net)
3671{
3672 struct inet_peer_base *bp = net->ipv6.peers;
3673
3674 net->ipv6.peers = NULL;
56a6b248 3675 inetpeer_invalidate_tree(bp);
c3426b47
DM
3676 kfree(bp);
3677}
3678
2b823f72 3679static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3680 .init = ipv6_inetpeer_init,
3681 .exit = ipv6_inetpeer_exit,
3682};
3683
d189634e
TG
3684static struct pernet_operations ip6_route_net_late_ops = {
3685 .init = ip6_route_net_init_late,
3686 .exit = ip6_route_net_exit_late,
3687};
3688
8ed67789
DL
3689static struct notifier_block ip6_route_dev_notifier = {
3690 .notifier_call = ip6_route_dev_notify,
3691 .priority = 0,
3692};
3693
433d49c3 3694int __init ip6_route_init(void)
1da177e4 3695{
433d49c3 3696 int ret;
8d0b94af 3697 int cpu;
433d49c3 3698
9a7ec3a9
DL
3699 ret = -ENOMEM;
3700 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3701 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3702 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3703 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3704 goto out;
14e50e57 3705
fc66f95c 3706 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3707 if (ret)
bdb3289f 3708 goto out_kmem_cache;
bdb3289f 3709
c3426b47
DM
3710 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3711 if (ret)
e8803b6c 3712 goto out_dst_entries;
2a0c451a 3713
7e52b33b
DM
3714 ret = register_pernet_subsys(&ip6_route_net_ops);
3715 if (ret)
3716 goto out_register_inetpeer;
c3426b47 3717
5dc121e9
AE
3718 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3719
8ed67789
DL
3720 /* Registering of the loopback is done before this portion of code,
3721 * the loopback reference in rt6_info will not be taken, do it
3722 * manually for init_net */
d8d1f30b 3723 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3724 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3725 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3726 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3727 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3728 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3729 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3730 #endif
e8803b6c 3731 ret = fib6_init();
433d49c3 3732 if (ret)
8ed67789 3733 goto out_register_subsys;
433d49c3 3734
433d49c3
DL
3735 ret = xfrm6_init();
3736 if (ret)
e8803b6c 3737 goto out_fib6_init;
c35b7e72 3738
433d49c3
DL
3739 ret = fib6_rules_init();
3740 if (ret)
3741 goto xfrm6_init;
7e5449c2 3742
d189634e
TG
3743 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3744 if (ret)
3745 goto fib6_rules_init;
3746
433d49c3 3747 ret = -ENOBUFS;
c7ac8679
GR
3748 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3749 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3750 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3751 goto out_register_late_subsys;
c127ea2c 3752
8ed67789 3753 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3754 if (ret)
d189634e 3755 goto out_register_late_subsys;
8ed67789 3756
8d0b94af
MKL
3757 for_each_possible_cpu(cpu) {
3758 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3759
3760 INIT_LIST_HEAD(&ul->head);
3761 spin_lock_init(&ul->lock);
3762 }
3763
433d49c3
DL
3764out:
3765 return ret;
3766
d189634e
TG
3767out_register_late_subsys:
3768 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3769fib6_rules_init:
433d49c3
DL
3770 fib6_rules_cleanup();
3771xfrm6_init:
433d49c3 3772 xfrm6_fini();
2a0c451a
TG
3773out_fib6_init:
3774 fib6_gc_cleanup();
8ed67789
DL
3775out_register_subsys:
3776 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3777out_register_inetpeer:
3778 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3779out_dst_entries:
3780 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3781out_kmem_cache:
f2fc6a54 3782 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3783 goto out;
1da177e4
LT
3784}
3785
3786void ip6_route_cleanup(void)
3787{
8ed67789 3788 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3789 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3790 fib6_rules_cleanup();
1da177e4 3791 xfrm6_fini();
1da177e4 3792 fib6_gc_cleanup();
c3426b47 3793 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3794 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3795 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3796 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3797}