ppp, slip: Validate VJ compression slot parameters completely
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
904af04d 57#include <net/dst_metadata.h>
1da177e4 58#include <net/xfrm.h>
8d71740c 59#include <net/netevent.h>
21713ebc 60#include <net/netlink.h>
51ebd318 61#include <net/nexthop.h>
19e42e45 62#include <net/lwtunnel.h>
904af04d 63#include <net/ip_tunnels.h>
1da177e4
LT
64
65#include <asm/uaccess.h>
66
67#ifdef CONFIG_SYSCTL
68#include <linux/sysctl.h>
69#endif
70
afc154e9 71enum rt6_nud_state {
7e980569
JB
72 RT6_NUD_FAIL_HARD = -3,
73 RT6_NUD_FAIL_PROBE = -2,
74 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
75 RT6_NUD_SUCCEED = 1
76};
77
83a09abd 78static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 79static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 80static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 81static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
82static struct dst_entry *ip6_negative_advice(struct dst_entry *);
83static void ip6_dst_destroy(struct dst_entry *);
84static void ip6_dst_ifdown(struct dst_entry *,
85 struct net_device *dev, int how);
569d3645 86static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
87
88static int ip6_pkt_discard(struct sk_buff *skb);
aad88724 89static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
7150aede 90static int ip6_pkt_prohibit(struct sk_buff *skb);
aad88724 91static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
1da177e4 92static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
93static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
94 struct sk_buff *skb, u32 mtu);
95static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb);
4b32b5ad 97static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 98static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
1da177e4 99
70ceb4f5 100#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 101static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
102 const struct in6_addr *prefix, int prefixlen,
103 const struct in6_addr *gwaddr, int ifindex,
95c96174 104 unsigned int pref);
efa2cea0 105static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
106 const struct in6_addr *prefix, int prefixlen,
107 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
108#endif
109
8d0b94af
MKL
110struct uncached_list {
111 spinlock_t lock;
112 struct list_head head;
113};
114
115static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
116
117static void rt6_uncached_list_add(struct rt6_info *rt)
118{
119 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
120
121 rt->dst.flags |= DST_NOCACHE;
122 rt->rt6i_uncached_list = ul;
123
124 spin_lock_bh(&ul->lock);
125 list_add_tail(&rt->rt6i_uncached, &ul->head);
126 spin_unlock_bh(&ul->lock);
127}
128
129static void rt6_uncached_list_del(struct rt6_info *rt)
130{
131 if (!list_empty(&rt->rt6i_uncached)) {
132 struct uncached_list *ul = rt->rt6i_uncached_list;
133
134 spin_lock_bh(&ul->lock);
135 list_del(&rt->rt6i_uncached);
136 spin_unlock_bh(&ul->lock);
137 }
138}
139
140static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
141{
142 struct net_device *loopback_dev = net->loopback_dev;
143 int cpu;
144
e332bc67
EB
145 if (dev == loopback_dev)
146 return;
147
8d0b94af
MKL
148 for_each_possible_cpu(cpu) {
149 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
150 struct rt6_info *rt;
151
152 spin_lock_bh(&ul->lock);
153 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
154 struct inet6_dev *rt_idev = rt->rt6i_idev;
155 struct net_device *rt_dev = rt->dst.dev;
156
e332bc67 157 if (rt_idev->dev == dev) {
8d0b94af
MKL
158 rt->rt6i_idev = in6_dev_get(loopback_dev);
159 in6_dev_put(rt_idev);
160 }
161
e332bc67 162 if (rt_dev == dev) {
8d0b94af
MKL
163 rt->dst.dev = loopback_dev;
164 dev_hold(rt->dst.dev);
165 dev_put(rt_dev);
166 }
167 }
168 spin_unlock_bh(&ul->lock);
169 }
170}
171
d52d3997
MKL
172static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
173{
174 return dst_metrics_write_ptr(rt->dst.from);
175}
176
06582540
DM
177static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
178{
4b32b5ad 179 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 180
d52d3997
MKL
181 if (rt->rt6i_flags & RTF_PCPU)
182 return rt6_pcpu_cow_metrics(rt);
183 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
184 return NULL;
185 else
3b471175 186 return dst_cow_metrics_generic(dst, old);
06582540
DM
187}
188
f894cbf8
DM
189static inline const void *choose_neigh_daddr(struct rt6_info *rt,
190 struct sk_buff *skb,
191 const void *daddr)
39232973
DM
192{
193 struct in6_addr *p = &rt->rt6i_gateway;
194
a7563f34 195 if (!ipv6_addr_any(p))
39232973 196 return (const void *) p;
f894cbf8
DM
197 else if (skb)
198 return &ipv6_hdr(skb)->daddr;
39232973
DM
199 return daddr;
200}
201
f894cbf8
DM
202static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
203 struct sk_buff *skb,
204 const void *daddr)
d3aaeb38 205{
39232973
DM
206 struct rt6_info *rt = (struct rt6_info *) dst;
207 struct neighbour *n;
208
f894cbf8 209 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 210 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
211 if (n)
212 return n;
213 return neigh_create(&nd_tbl, daddr, dst->dev);
214}
215
9a7ec3a9 216static struct dst_ops ip6_dst_ops_template = {
1da177e4 217 .family = AF_INET6,
1da177e4
LT
218 .gc = ip6_dst_gc,
219 .gc_thresh = 1024,
220 .check = ip6_dst_check,
0dbaee3b 221 .default_advmss = ip6_default_advmss,
ebb762f2 222 .mtu = ip6_mtu,
06582540 223 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
224 .destroy = ip6_dst_destroy,
225 .ifdown = ip6_dst_ifdown,
226 .negative_advice = ip6_negative_advice,
227 .link_failure = ip6_link_failure,
228 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 229 .redirect = rt6_do_redirect,
1ac06e03 230 .local_out = __ip6_local_out,
d3aaeb38 231 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
232};
233
ebb762f2 234static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 235{
618f9bc7
SK
236 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
237
238 return mtu ? : dst->dev->mtu;
ec831ea7
RD
239}
240
6700c270
DM
241static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
242 struct sk_buff *skb, u32 mtu)
14e50e57
DM
243{
244}
245
6700c270
DM
246static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
247 struct sk_buff *skb)
b587ee3b
DM
248{
249}
250
14e50e57
DM
251static struct dst_ops ip6_dst_blackhole_ops = {
252 .family = AF_INET6,
14e50e57
DM
253 .destroy = ip6_dst_destroy,
254 .check = ip6_dst_check,
ebb762f2 255 .mtu = ip6_blackhole_mtu,
214f45c9 256 .default_advmss = ip6_default_advmss,
14e50e57 257 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 258 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 259 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 260 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
261};
262
62fa8a84 263static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 264 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
265};
266
fb0af4c7 267static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
268 .dst = {
269 .__refcnt = ATOMIC_INIT(1),
270 .__use = 1,
2c20cbd7 271 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 272 .error = -ENETUNREACH,
d8d1f30b
CG
273 .input = ip6_pkt_discard,
274 .output = ip6_pkt_discard_out,
1da177e4
LT
275 },
276 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 277 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
278 .rt6i_metric = ~(u32) 0,
279 .rt6i_ref = ATOMIC_INIT(1),
280};
281
101367c2
TG
282#ifdef CONFIG_IPV6_MULTIPLE_TABLES
283
fb0af4c7 284static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
285 .dst = {
286 .__refcnt = ATOMIC_INIT(1),
287 .__use = 1,
2c20cbd7 288 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 289 .error = -EACCES,
d8d1f30b
CG
290 .input = ip6_pkt_prohibit,
291 .output = ip6_pkt_prohibit_out,
101367c2
TG
292 },
293 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 294 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
295 .rt6i_metric = ~(u32) 0,
296 .rt6i_ref = ATOMIC_INIT(1),
297};
298
fb0af4c7 299static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
300 .dst = {
301 .__refcnt = ATOMIC_INIT(1),
302 .__use = 1,
2c20cbd7 303 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 304 .error = -EINVAL,
d8d1f30b 305 .input = dst_discard,
aad88724 306 .output = dst_discard_sk,
101367c2
TG
307 },
308 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 309 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
310 .rt6i_metric = ~(u32) 0,
311 .rt6i_ref = ATOMIC_INIT(1),
312};
313
314#endif
315
ebfa45f0
MKL
316static void rt6_info_init(struct rt6_info *rt)
317{
318 struct dst_entry *dst = &rt->dst;
319
320 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
321 INIT_LIST_HEAD(&rt->rt6i_siblings);
322 INIT_LIST_HEAD(&rt->rt6i_uncached);
323}
324
1da177e4 325/* allocate dst with ip6_dst_ops */
d52d3997
MKL
326static struct rt6_info *__ip6_dst_alloc(struct net *net,
327 struct net_device *dev,
ad706862 328 int flags)
1da177e4 329{
97bab73f 330 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 331 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 332
ebfa45f0
MKL
333 if (rt)
334 rt6_info_init(rt);
8104891b 335
cf911662 336 return rt;
1da177e4
LT
337}
338
d52d3997
MKL
339static struct rt6_info *ip6_dst_alloc(struct net *net,
340 struct net_device *dev,
ad706862 341 int flags)
d52d3997 342{
ad706862 343 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
344
345 if (rt) {
346 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347 if (rt->rt6i_pcpu) {
348 int cpu;
349
350 for_each_possible_cpu(cpu) {
351 struct rt6_info **p;
352
353 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354 /* no one shares rt */
355 *p = NULL;
356 }
357 } else {
358 dst_destroy((struct dst_entry *)rt);
359 return NULL;
360 }
361 }
362
363 return rt;
364}
365
1da177e4
LT
366static void ip6_dst_destroy(struct dst_entry *dst)
367{
368 struct rt6_info *rt = (struct rt6_info *)dst;
ecd98837 369 struct dst_entry *from = dst->from;
8d0b94af 370 struct inet6_dev *idev;
1da177e4 371
4b32b5ad 372 dst_destroy_metrics_generic(dst);
87775312 373 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
374 rt6_uncached_list_del(rt);
375
376 idev = rt->rt6i_idev;
38308473 377 if (idev) {
1da177e4
LT
378 rt->rt6i_idev = NULL;
379 in6_dev_put(idev);
1ab1457c 380 }
1716a961 381
ecd98837
YH
382 dst->from = NULL;
383 dst_release(from);
b3419363
DM
384}
385
1da177e4
LT
386static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
387 int how)
388{
389 struct rt6_info *rt = (struct rt6_info *)dst;
390 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 391 struct net_device *loopback_dev =
c346dca1 392 dev_net(dev)->loopback_dev;
1da177e4 393
97cac082
DM
394 if (dev != loopback_dev) {
395 if (idev && idev->dev == dev) {
396 struct inet6_dev *loopback_idev =
397 in6_dev_get(loopback_dev);
398 if (loopback_idev) {
399 rt->rt6i_idev = loopback_idev;
400 in6_dev_put(idev);
401 }
402 }
1da177e4
LT
403 }
404}
405
a50feda5 406static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 407{
1716a961
G
408 if (rt->rt6i_flags & RTF_EXPIRES) {
409 if (time_after(jiffies, rt->dst.expires))
a50feda5 410 return true;
1716a961 411 } else if (rt->dst.from) {
3fd91fb3 412 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 413 }
a50feda5 414 return false;
1da177e4
LT
415}
416
51ebd318
ND
417/* Multipath route selection:
418 * Hash based function using packet header and flowlabel.
419 * Adapted from fib_info_hashfn()
420 */
421static int rt6_info_hash_nhsfn(unsigned int candidate_count,
422 const struct flowi6 *fl6)
423{
424 unsigned int val = fl6->flowi6_proto;
425
c08977bb
YH
426 val ^= ipv6_addr_hash(&fl6->daddr);
427 val ^= ipv6_addr_hash(&fl6->saddr);
51ebd318
ND
428
429 /* Work only if this not encapsulated */
430 switch (fl6->flowi6_proto) {
431 case IPPROTO_UDP:
432 case IPPROTO_TCP:
433 case IPPROTO_SCTP:
b3ce5ae1
ND
434 val ^= (__force u16)fl6->fl6_sport;
435 val ^= (__force u16)fl6->fl6_dport;
51ebd318
ND
436 break;
437
438 case IPPROTO_ICMPV6:
b3ce5ae1
ND
439 val ^= (__force u16)fl6->fl6_icmp_type;
440 val ^= (__force u16)fl6->fl6_icmp_code;
51ebd318
ND
441 break;
442 }
443 /* RFC6438 recommands to use flowlabel */
b3ce5ae1 444 val ^= (__force u32)fl6->flowlabel;
51ebd318
ND
445
446 /* Perhaps, we need to tune, this function? */
447 val = val ^ (val >> 7) ^ (val >> 12);
448 return val % candidate_count;
449}
450
451static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
452 struct flowi6 *fl6, int oif,
453 int strict)
51ebd318
ND
454{
455 struct rt6_info *sibling, *next_sibling;
456 int route_choosen;
457
458 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
459 /* Don't change the route, if route_choosen == 0
460 * (siblings does not include ourself)
461 */
462 if (route_choosen)
463 list_for_each_entry_safe(sibling, next_sibling,
464 &match->rt6i_siblings, rt6i_siblings) {
465 route_choosen--;
466 if (route_choosen == 0) {
52bd4c0c
ND
467 if (rt6_score_route(sibling, oif, strict) < 0)
468 break;
51ebd318
ND
469 match = sibling;
470 break;
471 }
472 }
473 return match;
474}
475
1da177e4 476/*
c71099ac 477 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
478 */
479
8ed67789
DL
480static inline struct rt6_info *rt6_device_match(struct net *net,
481 struct rt6_info *rt,
b71d1d42 482 const struct in6_addr *saddr,
1da177e4 483 int oif,
d420895e 484 int flags)
1da177e4
LT
485{
486 struct rt6_info *local = NULL;
487 struct rt6_info *sprt;
488
dd3abc4e
YH
489 if (!oif && ipv6_addr_any(saddr))
490 goto out;
491
d8d1f30b 492 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 493 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
494
495 if (oif) {
1da177e4
LT
496 if (dev->ifindex == oif)
497 return sprt;
498 if (dev->flags & IFF_LOOPBACK) {
38308473 499 if (!sprt->rt6i_idev ||
1da177e4 500 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 501 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 502 continue;
1ab1457c 503 if (local && (!oif ||
1da177e4
LT
504 local->rt6i_idev->dev->ifindex == oif))
505 continue;
506 }
507 local = sprt;
508 }
dd3abc4e
YH
509 } else {
510 if (ipv6_chk_addr(net, saddr, dev,
511 flags & RT6_LOOKUP_F_IFACE))
512 return sprt;
1da177e4 513 }
dd3abc4e 514 }
1da177e4 515
dd3abc4e 516 if (oif) {
1da177e4
LT
517 if (local)
518 return local;
519
d420895e 520 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 521 return net->ipv6.ip6_null_entry;
1da177e4 522 }
dd3abc4e 523out:
1da177e4
LT
524 return rt;
525}
526
27097255 527#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
528struct __rt6_probe_work {
529 struct work_struct work;
530 struct in6_addr target;
531 struct net_device *dev;
532};
533
534static void rt6_probe_deferred(struct work_struct *w)
535{
536 struct in6_addr mcaddr;
537 struct __rt6_probe_work *work =
538 container_of(w, struct __rt6_probe_work, work);
539
540 addrconf_addr_solict_mult(&work->target, &mcaddr);
ab450605 541 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
c2f17e82 542 dev_put(work->dev);
662f5533 543 kfree(work);
c2f17e82
HFS
544}
545
27097255
YH
546static void rt6_probe(struct rt6_info *rt)
547{
990edb42 548 struct __rt6_probe_work *work;
f2c31e32 549 struct neighbour *neigh;
27097255
YH
550 /*
551 * Okay, this does not seem to be appropriate
552 * for now, however, we need to check if it
553 * is really so; aka Router Reachability Probing.
554 *
555 * Router Reachability Probe MUST be rate-limited
556 * to no more than one per minute.
557 */
2152caea 558 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 559 return;
2152caea
YH
560 rcu_read_lock_bh();
561 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
562 if (neigh) {
8d6c31bf
MKL
563 if (neigh->nud_state & NUD_VALID)
564 goto out;
565
990edb42 566 work = NULL;
2152caea 567 write_lock(&neigh->lock);
990edb42
MKL
568 if (!(neigh->nud_state & NUD_VALID) &&
569 time_after(jiffies,
570 neigh->updated +
571 rt->rt6i_idev->cnf.rtr_probe_interval)) {
572 work = kmalloc(sizeof(*work), GFP_ATOMIC);
573 if (work)
574 __neigh_set_probe_once(neigh);
c2f17e82 575 }
2152caea 576 write_unlock(&neigh->lock);
990edb42
MKL
577 } else {
578 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 579 }
990edb42
MKL
580
581 if (work) {
582 INIT_WORK(&work->work, rt6_probe_deferred);
583 work->target = rt->rt6i_gateway;
584 dev_hold(rt->dst.dev);
585 work->dev = rt->dst.dev;
586 schedule_work(&work->work);
587 }
588
8d6c31bf 589out:
2152caea 590 rcu_read_unlock_bh();
27097255
YH
591}
592#else
593static inline void rt6_probe(struct rt6_info *rt)
594{
27097255
YH
595}
596#endif
597
1da177e4 598/*
554cfb7e 599 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 600 */
b6f99a21 601static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 602{
d1918542 603 struct net_device *dev = rt->dst.dev;
161980f4 604 if (!oif || dev->ifindex == oif)
554cfb7e 605 return 2;
161980f4
DM
606 if ((dev->flags & IFF_LOOPBACK) &&
607 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
608 return 1;
609 return 0;
554cfb7e 610}
1da177e4 611
afc154e9 612static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 613{
f2c31e32 614 struct neighbour *neigh;
afc154e9 615 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 616
4d0c5911
YH
617 if (rt->rt6i_flags & RTF_NONEXTHOP ||
618 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 619 return RT6_NUD_SUCCEED;
145a3621
YH
620
621 rcu_read_lock_bh();
622 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
623 if (neigh) {
624 read_lock(&neigh->lock);
554cfb7e 625 if (neigh->nud_state & NUD_VALID)
afc154e9 626 ret = RT6_NUD_SUCCEED;
398bcbeb 627#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 628 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 629 ret = RT6_NUD_SUCCEED;
7e980569
JB
630 else
631 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 632#endif
145a3621 633 read_unlock(&neigh->lock);
afc154e9
HFS
634 } else {
635 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 636 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 637 }
145a3621
YH
638 rcu_read_unlock_bh();
639
a5a81f0b 640 return ret;
1da177e4
LT
641}
642
554cfb7e
YH
643static int rt6_score_route(struct rt6_info *rt, int oif,
644 int strict)
1da177e4 645{
a5a81f0b 646 int m;
1ab1457c 647
4d0c5911 648 m = rt6_check_dev(rt, oif);
77d16f45 649 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 650 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
651#ifdef CONFIG_IPV6_ROUTER_PREF
652 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
653#endif
afc154e9
HFS
654 if (strict & RT6_LOOKUP_F_REACHABLE) {
655 int n = rt6_check_neigh(rt);
656 if (n < 0)
657 return n;
658 }
554cfb7e
YH
659 return m;
660}
661
f11e6659 662static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
663 int *mpri, struct rt6_info *match,
664 bool *do_rr)
554cfb7e 665{
f11e6659 666 int m;
afc154e9 667 bool match_do_rr = false;
35103d11
AG
668 struct inet6_dev *idev = rt->rt6i_idev;
669 struct net_device *dev = rt->dst.dev;
670
671 if (dev && !netif_carrier_ok(dev) &&
672 idev->cnf.ignore_routes_with_linkdown)
673 goto out;
f11e6659
DM
674
675 if (rt6_check_expired(rt))
676 goto out;
677
678 m = rt6_score_route(rt, oif, strict);
7e980569 679 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
680 match_do_rr = true;
681 m = 0; /* lowest valid score */
7e980569 682 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 683 goto out;
afc154e9
HFS
684 }
685
686 if (strict & RT6_LOOKUP_F_REACHABLE)
687 rt6_probe(rt);
f11e6659 688
7e980569 689 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 690 if (m > *mpri) {
afc154e9 691 *do_rr = match_do_rr;
f11e6659
DM
692 *mpri = m;
693 match = rt;
f11e6659 694 }
f11e6659
DM
695out:
696 return match;
697}
698
699static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
700 struct rt6_info *rr_head,
afc154e9
HFS
701 u32 metric, int oif, int strict,
702 bool *do_rr)
f11e6659 703{
9fbdcfaf 704 struct rt6_info *rt, *match, *cont;
554cfb7e 705 int mpri = -1;
1da177e4 706
f11e6659 707 match = NULL;
9fbdcfaf
SK
708 cont = NULL;
709 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
710 if (rt->rt6i_metric != metric) {
711 cont = rt;
712 break;
713 }
714
715 match = find_match(rt, oif, strict, &mpri, match, do_rr);
716 }
717
718 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
719 if (rt->rt6i_metric != metric) {
720 cont = rt;
721 break;
722 }
723
afc154e9 724 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
725 }
726
727 if (match || !cont)
728 return match;
729
730 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 731 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 732
f11e6659
DM
733 return match;
734}
1da177e4 735
f11e6659
DM
736static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
737{
738 struct rt6_info *match, *rt0;
8ed67789 739 struct net *net;
afc154e9 740 bool do_rr = false;
1da177e4 741
f11e6659
DM
742 rt0 = fn->rr_ptr;
743 if (!rt0)
744 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 745
afc154e9
HFS
746 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
747 &do_rr);
1da177e4 748
afc154e9 749 if (do_rr) {
d8d1f30b 750 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 751
554cfb7e 752 /* no entries matched; do round-robin */
f11e6659
DM
753 if (!next || next->rt6i_metric != rt0->rt6i_metric)
754 next = fn->leaf;
755
756 if (next != rt0)
757 fn->rr_ptr = next;
1da177e4 758 }
1da177e4 759
d1918542 760 net = dev_net(rt0->dst.dev);
a02cec21 761 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
762}
763
8b9df265
MKL
764static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
765{
766 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
767}
768
70ceb4f5
YH
769#ifdef CONFIG_IPV6_ROUTE_INFO
770int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 771 const struct in6_addr *gwaddr)
70ceb4f5 772{
c346dca1 773 struct net *net = dev_net(dev);
70ceb4f5
YH
774 struct route_info *rinfo = (struct route_info *) opt;
775 struct in6_addr prefix_buf, *prefix;
776 unsigned int pref;
4bed72e4 777 unsigned long lifetime;
70ceb4f5
YH
778 struct rt6_info *rt;
779
780 if (len < sizeof(struct route_info)) {
781 return -EINVAL;
782 }
783
784 /* Sanity check for prefix_len and length */
785 if (rinfo->length > 3) {
786 return -EINVAL;
787 } else if (rinfo->prefix_len > 128) {
788 return -EINVAL;
789 } else if (rinfo->prefix_len > 64) {
790 if (rinfo->length < 2) {
791 return -EINVAL;
792 }
793 } else if (rinfo->prefix_len > 0) {
794 if (rinfo->length < 1) {
795 return -EINVAL;
796 }
797 }
798
799 pref = rinfo->route_pref;
800 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 801 return -EINVAL;
70ceb4f5 802
4bed72e4 803 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
804
805 if (rinfo->length == 3)
806 prefix = (struct in6_addr *)rinfo->prefix;
807 else {
808 /* this function is safe */
809 ipv6_addr_prefix(&prefix_buf,
810 (struct in6_addr *)rinfo->prefix,
811 rinfo->prefix_len);
812 prefix = &prefix_buf;
813 }
814
f104a567
DJ
815 if (rinfo->prefix_len == 0)
816 rt = rt6_get_dflt_router(gwaddr, dev);
817 else
818 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
819 gwaddr, dev->ifindex);
70ceb4f5
YH
820
821 if (rt && !lifetime) {
e0a1ad73 822 ip6_del_rt(rt);
70ceb4f5
YH
823 rt = NULL;
824 }
825
826 if (!rt && lifetime)
efa2cea0 827 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
828 pref);
829 else if (rt)
830 rt->rt6i_flags = RTF_ROUTEINFO |
831 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
832
833 if (rt) {
1716a961
G
834 if (!addrconf_finite_timeout(lifetime))
835 rt6_clean_expires(rt);
836 else
837 rt6_set_expires(rt, jiffies + HZ * lifetime);
838
94e187c0 839 ip6_rt_put(rt);
70ceb4f5
YH
840 }
841 return 0;
842}
843#endif
844
a3c00e46
MKL
845static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
846 struct in6_addr *saddr)
847{
848 struct fib6_node *pn;
849 while (1) {
850 if (fn->fn_flags & RTN_TL_ROOT)
851 return NULL;
852 pn = fn->parent;
853 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
854 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
855 else
856 fn = pn;
857 if (fn->fn_flags & RTN_RTINFO)
858 return fn;
859 }
860}
c71099ac 861
8ed67789
DL
862static struct rt6_info *ip6_pol_route_lookup(struct net *net,
863 struct fib6_table *table,
4c9483b2 864 struct flowi6 *fl6, int flags)
1da177e4
LT
865{
866 struct fib6_node *fn;
867 struct rt6_info *rt;
868
c71099ac 869 read_lock_bh(&table->tb6_lock);
4c9483b2 870 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
871restart:
872 rt = fn->leaf;
4c9483b2 873 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 874 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 875 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
876 if (rt == net->ipv6.ip6_null_entry) {
877 fn = fib6_backtrack(fn, &fl6->saddr);
878 if (fn)
879 goto restart;
880 }
d8d1f30b 881 dst_use(&rt->dst, jiffies);
c71099ac 882 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
883 return rt;
884
885}
886
67ba4152 887struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
888 int flags)
889{
890 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
891}
892EXPORT_SYMBOL_GPL(ip6_route_lookup);
893
9acd9f3a
YH
894struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
895 const struct in6_addr *saddr, int oif, int strict)
c71099ac 896{
4c9483b2
DM
897 struct flowi6 fl6 = {
898 .flowi6_oif = oif,
899 .daddr = *daddr,
c71099ac
TG
900 };
901 struct dst_entry *dst;
77d16f45 902 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 903
adaa70bb 904 if (saddr) {
4c9483b2 905 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
906 flags |= RT6_LOOKUP_F_HAS_SADDR;
907 }
908
4c9483b2 909 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
910 if (dst->error == 0)
911 return (struct rt6_info *) dst;
912
913 dst_release(dst);
914
1da177e4
LT
915 return NULL;
916}
7159039a
YH
917EXPORT_SYMBOL(rt6_lookup);
918
c71099ac 919/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
920 It takes new route entry, the addition fails by any reason the
921 route is freed. In any case, if caller does not hold it, it may
922 be destroyed.
923 */
924
e5fd387a 925static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
e715b6d3 926 struct mx6_config *mxc)
1da177e4
LT
927{
928 int err;
c71099ac 929 struct fib6_table *table;
1da177e4 930
c71099ac
TG
931 table = rt->rt6i_table;
932 write_lock_bh(&table->tb6_lock);
e715b6d3 933 err = fib6_add(&table->tb6_root, rt, info, mxc);
c71099ac 934 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
935
936 return err;
937}
938
40e22e8f
TG
939int ip6_ins_rt(struct rt6_info *rt)
940{
e715b6d3
FW
941 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
942 struct mx6_config mxc = { .mx = NULL, };
943
944 return __ip6_ins_rt(rt, &info, &mxc);
40e22e8f
TG
945}
946
8b9df265
MKL
947static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
948 const struct in6_addr *daddr,
949 const struct in6_addr *saddr)
1da177e4 950{
1da177e4
LT
951 struct rt6_info *rt;
952
953 /*
954 * Clone the route.
955 */
956
d52d3997 957 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 958 ort = (struct rt6_info *)ort->dst.from;
1da177e4 959
ad706862 960 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
83a09abd
MKL
961
962 if (!rt)
963 return NULL;
964
965 ip6_rt_copy_init(rt, ort);
966 rt->rt6i_flags |= RTF_CACHE;
967 rt->rt6i_metric = 0;
968 rt->dst.flags |= DST_HOST;
969 rt->rt6i_dst.addr = *daddr;
970 rt->rt6i_dst.plen = 128;
1da177e4 971
83a09abd
MKL
972 if (!rt6_is_gw_or_nonexthop(ort)) {
973 if (ort->rt6i_dst.plen != 128 &&
974 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
975 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 976#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
977 if (rt->rt6i_src.plen && saddr) {
978 rt->rt6i_src.addr = *saddr;
979 rt->rt6i_src.plen = 128;
8b9df265 980 }
83a09abd 981#endif
95a9a5ba 982 }
1da177e4 983
95a9a5ba
YH
984 return rt;
985}
1da177e4 986
d52d3997
MKL
987static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
988{
989 struct rt6_info *pcpu_rt;
990
991 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
ad706862 992 rt->dst.dev, rt->dst.flags);
d52d3997
MKL
993
994 if (!pcpu_rt)
995 return NULL;
996 ip6_rt_copy_init(pcpu_rt, rt);
997 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
998 pcpu_rt->rt6i_flags |= RTF_PCPU;
999 return pcpu_rt;
1000}
1001
1002/* It should be called with read_lock_bh(&tb6_lock) acquired */
1003static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1004{
a73e4195 1005 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1006
1007 p = this_cpu_ptr(rt->rt6i_pcpu);
1008 pcpu_rt = *p;
1009
a73e4195
MKL
1010 if (pcpu_rt) {
1011 dst_hold(&pcpu_rt->dst);
1012 rt6_dst_from_metrics_check(pcpu_rt);
1013 }
1014 return pcpu_rt;
1015}
1016
1017static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1018{
9c7370a1 1019 struct fib6_table *table = rt->rt6i_table;
a73e4195 1020 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1021
1022 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1023 if (!pcpu_rt) {
1024 struct net *net = dev_net(rt->dst.dev);
1025
9c7370a1
MKL
1026 dst_hold(&net->ipv6.ip6_null_entry->dst);
1027 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1028 }
1029
9c7370a1
MKL
1030 read_lock_bh(&table->tb6_lock);
1031 if (rt->rt6i_pcpu) {
1032 p = this_cpu_ptr(rt->rt6i_pcpu);
1033 prev = cmpxchg(p, NULL, pcpu_rt);
1034 if (prev) {
1035 /* If someone did it before us, return prev instead */
1036 dst_destroy(&pcpu_rt->dst);
1037 pcpu_rt = prev;
1038 }
1039 } else {
1040 /* rt has been removed from the fib6 tree
1041 * before we have a chance to acquire the read_lock.
1042 * In this case, don't brother to create a pcpu rt
1043 * since rt is going away anyway. The next
1044 * dst_check() will trigger a re-lookup.
1045 */
d52d3997 1046 dst_destroy(&pcpu_rt->dst);
9c7370a1 1047 pcpu_rt = rt;
d52d3997 1048 }
d52d3997
MKL
1049 dst_hold(&pcpu_rt->dst);
1050 rt6_dst_from_metrics_check(pcpu_rt);
9c7370a1 1051 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1052 return pcpu_rt;
1053}
1054
8ed67789 1055static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 1056 struct flowi6 *fl6, int flags)
1da177e4 1057{
367efcb9 1058 struct fib6_node *fn, *saved_fn;
45e4fd26 1059 struct rt6_info *rt;
c71099ac 1060 int strict = 0;
1da177e4 1061
77d16f45 1062 strict |= flags & RT6_LOOKUP_F_IFACE;
367efcb9
MKL
1063 if (net->ipv6.devconf_all->forwarding == 0)
1064 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1065
c71099ac 1066 read_lock_bh(&table->tb6_lock);
1da177e4 1067
4c9483b2 1068 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1069 saved_fn = fn;
1da177e4 1070
f1900fb5
DA
1071 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1072 oif = 0;
1073
a3c00e46 1074redo_rt6_select:
367efcb9 1075 rt = rt6_select(fn, oif, strict);
52bd4c0c 1076 if (rt->rt6i_nsiblings)
367efcb9 1077 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1078 if (rt == net->ipv6.ip6_null_entry) {
1079 fn = fib6_backtrack(fn, &fl6->saddr);
1080 if (fn)
1081 goto redo_rt6_select;
367efcb9
MKL
1082 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1083 /* also consider unreachable route */
1084 strict &= ~RT6_LOOKUP_F_REACHABLE;
1085 fn = saved_fn;
1086 goto redo_rt6_select;
367efcb9 1087 }
a3c00e46
MKL
1088 }
1089
fb9de91e 1090
3da59bd9 1091 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1092 dst_use(&rt->dst, jiffies);
1093 read_unlock_bh(&table->tb6_lock);
1094
1095 rt6_dst_from_metrics_check(rt);
1096 return rt;
3da59bd9
MKL
1097 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1098 !(rt->rt6i_flags & RTF_GATEWAY))) {
1099 /* Create a RTF_CACHE clone which will not be
1100 * owned by the fib6 tree. It is for the special case where
1101 * the daddr in the skb during the neighbor look-up is different
1102 * from the fl6->daddr used to look-up route here.
1103 */
1104
1105 struct rt6_info *uncached_rt;
1106
d52d3997
MKL
1107 dst_use(&rt->dst, jiffies);
1108 read_unlock_bh(&table->tb6_lock);
1109
3da59bd9
MKL
1110 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1111 dst_release(&rt->dst);
c71099ac 1112
3da59bd9 1113 if (uncached_rt)
8d0b94af 1114 rt6_uncached_list_add(uncached_rt);
3da59bd9
MKL
1115 else
1116 uncached_rt = net->ipv6.ip6_null_entry;
d52d3997 1117
3da59bd9
MKL
1118 dst_hold(&uncached_rt->dst);
1119 return uncached_rt;
3da59bd9 1120
d52d3997
MKL
1121 } else {
1122 /* Get a percpu copy */
1123
1124 struct rt6_info *pcpu_rt;
1125
1126 rt->dst.lastuse = jiffies;
1127 rt->dst.__use++;
1128 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1129
9c7370a1
MKL
1130 if (pcpu_rt) {
1131 read_unlock_bh(&table->tb6_lock);
1132 } else {
1133 /* We have to do the read_unlock first
1134 * because rt6_make_pcpu_route() may trigger
1135 * ip6_dst_gc() which will take the write_lock.
1136 */
1137 dst_hold(&rt->dst);
1138 read_unlock_bh(&table->tb6_lock);
a73e4195 1139 pcpu_rt = rt6_make_pcpu_route(rt);
9c7370a1
MKL
1140 dst_release(&rt->dst);
1141 }
d52d3997
MKL
1142
1143 return pcpu_rt;
9c7370a1 1144
d52d3997 1145 }
1da177e4
LT
1146}
1147
8ed67789 1148static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1149 struct flowi6 *fl6, int flags)
4acad72d 1150{
4c9483b2 1151 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1152}
1153
72331bc0
SL
1154static struct dst_entry *ip6_route_input_lookup(struct net *net,
1155 struct net_device *dev,
1156 struct flowi6 *fl6, int flags)
1157{
1158 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1159 flags |= RT6_LOOKUP_F_IFACE;
1160
1161 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1162}
1163
c71099ac
TG
1164void ip6_route_input(struct sk_buff *skb)
1165{
b71d1d42 1166 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1167 struct net *net = dev_net(skb->dev);
adaa70bb 1168 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1169 struct ip_tunnel_info *tun_info;
4c9483b2
DM
1170 struct flowi6 fl6 = {
1171 .flowi6_iif = skb->dev->ifindex,
1172 .daddr = iph->daddr,
1173 .saddr = iph->saddr,
6502ca52 1174 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1175 .flowi6_mark = skb->mark,
1176 .flowi6_proto = iph->nexthdr,
c71099ac 1177 };
adaa70bb 1178
904af04d 1179 tun_info = skb_tunnel_info(skb);
46fa062a 1180 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1181 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
06e9d040 1182 skb_dst_drop(skb);
72331bc0 1183 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1184}
1185
8ed67789 1186static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1187 struct flowi6 *fl6, int flags)
1da177e4 1188{
4c9483b2 1189 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1190}
1191
67ba4152 1192struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 1193 struct flowi6 *fl6)
c71099ac
TG
1194{
1195 int flags = 0;
d46a9d67 1196 bool any_src;
c71099ac 1197
1fb9489b 1198 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1199
d46a9d67 1200 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1201 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1202 (fl6->flowi6_oif && any_src))
77d16f45 1203 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1204
d46a9d67 1205 if (!any_src)
adaa70bb 1206 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1207 else if (sk)
1208 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1209
4c9483b2 1210 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1211}
7159039a 1212EXPORT_SYMBOL(ip6_route_output);
1da177e4 1213
2774c131 1214struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1215{
5c1e6aa3 1216 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1217 struct dst_entry *new = NULL;
1218
f5b0a874 1219 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1220 if (rt) {
0a1f5962 1221 rt6_info_init(rt);
8104891b 1222
0a1f5962 1223 new = &rt->dst;
14e50e57 1224 new->__use = 1;
352e512c 1225 new->input = dst_discard;
aad88724 1226 new->output = dst_discard_sk;
14e50e57 1227
0a1f5962 1228 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1229 rt->rt6i_idev = ort->rt6i_idev;
1230 if (rt->rt6i_idev)
1231 in6_dev_hold(rt->rt6i_idev);
14e50e57 1232
4e3fd7a0 1233 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1234 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1235 rt->rt6i_metric = 0;
1236
1237 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1238#ifdef CONFIG_IPV6_SUBTREES
1239 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1240#endif
1241
1242 dst_free(new);
1243 }
1244
69ead7af
DM
1245 dst_release(dst_orig);
1246 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1247}
14e50e57 1248
1da177e4
LT
1249/*
1250 * Destination cache support functions
1251 */
1252
4b32b5ad
MKL
1253static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1254{
1255 if (rt->dst.from &&
1256 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1257 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1258}
1259
3da59bd9
MKL
1260static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1261{
1262 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1263 return NULL;
1264
1265 if (rt6_check_expired(rt))
1266 return NULL;
1267
1268 return &rt->dst;
1269}
1270
1271static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1272{
1273 if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1274 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1275 return &rt->dst;
1276 else
1277 return NULL;
1278}
1279
1da177e4
LT
1280static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1281{
1282 struct rt6_info *rt;
1283
1284 rt = (struct rt6_info *) dst;
1285
6f3118b5
ND
1286 /* All IPV6 dsts are created with ->obsolete set to the value
1287 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1288 * into this function always.
1289 */
e3bc10bd 1290
4b32b5ad
MKL
1291 rt6_dst_from_metrics_check(rt);
1292
d52d3997 1293 if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
3da59bd9
MKL
1294 return rt6_dst_from_check(rt, cookie);
1295 else
1296 return rt6_check(rt, cookie);
1da177e4
LT
1297}
1298
1299static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1300{
1301 struct rt6_info *rt = (struct rt6_info *) dst;
1302
1303 if (rt) {
54c1a859
YH
1304 if (rt->rt6i_flags & RTF_CACHE) {
1305 if (rt6_check_expired(rt)) {
1306 ip6_del_rt(rt);
1307 dst = NULL;
1308 }
1309 } else {
1da177e4 1310 dst_release(dst);
54c1a859
YH
1311 dst = NULL;
1312 }
1da177e4 1313 }
54c1a859 1314 return dst;
1da177e4
LT
1315}
1316
1317static void ip6_link_failure(struct sk_buff *skb)
1318{
1319 struct rt6_info *rt;
1320
3ffe533c 1321 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1322
adf30907 1323 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1324 if (rt) {
1eb4f758
HFS
1325 if (rt->rt6i_flags & RTF_CACHE) {
1326 dst_hold(&rt->dst);
8e3d5be7 1327 ip6_del_rt(rt);
1eb4f758 1328 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1329 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1330 }
1da177e4
LT
1331 }
1332}
1333
45e4fd26
MKL
1334static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1335{
1336 struct net *net = dev_net(rt->dst.dev);
1337
1338 rt->rt6i_flags |= RTF_MODIFIED;
1339 rt->rt6i_pmtu = mtu;
1340 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1341}
1342
1343static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1344 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1345{
67ba4152 1346 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1347
45e4fd26
MKL
1348 if (rt6->rt6i_flags & RTF_LOCAL)
1349 return;
81aded24 1350
45e4fd26
MKL
1351 dst_confirm(dst);
1352 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1353 if (mtu >= dst_mtu(dst))
1354 return;
9d289715 1355
45e4fd26
MKL
1356 if (rt6->rt6i_flags & RTF_CACHE) {
1357 rt6_do_update_pmtu(rt6, mtu);
1358 } else {
1359 const struct in6_addr *daddr, *saddr;
1360 struct rt6_info *nrt6;
1361
1362 if (iph) {
1363 daddr = &iph->daddr;
1364 saddr = &iph->saddr;
1365 } else if (sk) {
1366 daddr = &sk->sk_v6_daddr;
1367 saddr = &inet6_sk(sk)->saddr;
1368 } else {
1369 return;
1370 }
1371 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1372 if (nrt6) {
1373 rt6_do_update_pmtu(nrt6, mtu);
1374
1375 /* ip6_ins_rt(nrt6) will bump the
1376 * rt6->rt6i_node->fn_sernum
1377 * which will fail the next rt6_check() and
1378 * invalidate the sk->sk_dst_cache.
1379 */
1380 ip6_ins_rt(nrt6);
1381 }
1da177e4
LT
1382 }
1383}
1384
45e4fd26
MKL
1385static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1386 struct sk_buff *skb, u32 mtu)
1387{
1388 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1389}
1390
42ae66c8
DM
1391void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1392 int oif, u32 mark)
81aded24
DM
1393{
1394 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1395 struct dst_entry *dst;
1396 struct flowi6 fl6;
1397
1398 memset(&fl6, 0, sizeof(fl6));
1399 fl6.flowi6_oif = oif;
1b3c61dc 1400 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1401 fl6.daddr = iph->daddr;
1402 fl6.saddr = iph->saddr;
6502ca52 1403 fl6.flowlabel = ip6_flowinfo(iph);
81aded24
DM
1404
1405 dst = ip6_route_output(net, NULL, &fl6);
1406 if (!dst->error)
45e4fd26 1407 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1408 dst_release(dst);
1409}
1410EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1411
1412void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1413{
1414 ip6_update_pmtu(skb, sock_net(sk), mtu,
1415 sk->sk_bound_dev_if, sk->sk_mark);
1416}
1417EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1418
b55b76b2
DJ
1419/* Handle redirects */
1420struct ip6rd_flowi {
1421 struct flowi6 fl6;
1422 struct in6_addr gateway;
1423};
1424
1425static struct rt6_info *__ip6_route_redirect(struct net *net,
1426 struct fib6_table *table,
1427 struct flowi6 *fl6,
1428 int flags)
1429{
1430 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1431 struct rt6_info *rt;
1432 struct fib6_node *fn;
1433
1434 /* Get the "current" route for this destination and
1435 * check if the redirect has come from approriate router.
1436 *
1437 * RFC 4861 specifies that redirects should only be
1438 * accepted if they come from the nexthop to the target.
1439 * Due to the way the routes are chosen, this notion
1440 * is a bit fuzzy and one might need to check all possible
1441 * routes.
1442 */
1443
1444 read_lock_bh(&table->tb6_lock);
1445 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1446restart:
1447 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1448 if (rt6_check_expired(rt))
1449 continue;
1450 if (rt->dst.error)
1451 break;
1452 if (!(rt->rt6i_flags & RTF_GATEWAY))
1453 continue;
1454 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1455 continue;
1456 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1457 continue;
1458 break;
1459 }
1460
1461 if (!rt)
1462 rt = net->ipv6.ip6_null_entry;
1463 else if (rt->dst.error) {
1464 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1465 goto out;
1466 }
1467
1468 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1469 fn = fib6_backtrack(fn, &fl6->saddr);
1470 if (fn)
1471 goto restart;
b55b76b2 1472 }
a3c00e46 1473
b0a1ba59 1474out:
b55b76b2
DJ
1475 dst_hold(&rt->dst);
1476
1477 read_unlock_bh(&table->tb6_lock);
1478
1479 return rt;
1480};
1481
1482static struct dst_entry *ip6_route_redirect(struct net *net,
1483 const struct flowi6 *fl6,
1484 const struct in6_addr *gateway)
1485{
1486 int flags = RT6_LOOKUP_F_HAS_SADDR;
1487 struct ip6rd_flowi rdfl;
1488
1489 rdfl.fl6 = *fl6;
1490 rdfl.gateway = *gateway;
1491
1492 return fib6_rule_lookup(net, &rdfl.fl6,
1493 flags, __ip6_route_redirect);
1494}
1495
3a5ad2ee
DM
1496void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1497{
1498 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1499 struct dst_entry *dst;
1500 struct flowi6 fl6;
1501
1502 memset(&fl6, 0, sizeof(fl6));
e374c618 1503 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1504 fl6.flowi6_oif = oif;
1505 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1506 fl6.daddr = iph->daddr;
1507 fl6.saddr = iph->saddr;
6502ca52 1508 fl6.flowlabel = ip6_flowinfo(iph);
3a5ad2ee 1509
b55b76b2
DJ
1510 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1511 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1512 dst_release(dst);
1513}
1514EXPORT_SYMBOL_GPL(ip6_redirect);
1515
c92a59ec
DJ
1516void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1517 u32 mark)
1518{
1519 const struct ipv6hdr *iph = ipv6_hdr(skb);
1520 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1521 struct dst_entry *dst;
1522 struct flowi6 fl6;
1523
1524 memset(&fl6, 0, sizeof(fl6));
e374c618 1525 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1526 fl6.flowi6_oif = oif;
1527 fl6.flowi6_mark = mark;
c92a59ec
DJ
1528 fl6.daddr = msg->dest;
1529 fl6.saddr = iph->daddr;
1530
b55b76b2
DJ
1531 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1532 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1533 dst_release(dst);
1534}
1535
3a5ad2ee
DM
1536void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1537{
1538 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1539}
1540EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1541
0dbaee3b 1542static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1543{
0dbaee3b
DM
1544 struct net_device *dev = dst->dev;
1545 unsigned int mtu = dst_mtu(dst);
1546 struct net *net = dev_net(dev);
1547
1da177e4
LT
1548 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1549
5578689a
DL
1550 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1551 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1552
1553 /*
1ab1457c
YH
1554 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1555 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1556 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1557 * rely only on pmtu discovery"
1558 */
1559 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1560 mtu = IPV6_MAXPLEN;
1561 return mtu;
1562}
1563
ebb762f2 1564static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1565{
4b32b5ad
MKL
1566 const struct rt6_info *rt = (const struct rt6_info *)dst;
1567 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1568 struct inet6_dev *idev;
618f9bc7 1569
4b32b5ad
MKL
1570 if (mtu)
1571 goto out;
1572
1573 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1574 if (mtu)
30f78d8e 1575 goto out;
618f9bc7
SK
1576
1577 mtu = IPV6_MIN_MTU;
d33e4553
DM
1578
1579 rcu_read_lock();
1580 idev = __in6_dev_get(dst->dev);
1581 if (idev)
1582 mtu = idev->cnf.mtu6;
1583 rcu_read_unlock();
1584
30f78d8e
ED
1585out:
1586 return min_t(unsigned int, mtu, IP6_MAX_MTU);
d33e4553
DM
1587}
1588
3b00944c
YH
1589static struct dst_entry *icmp6_dst_gc_list;
1590static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1591
3b00944c 1592struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1593 struct flowi6 *fl6)
1da177e4 1594{
87a11578 1595 struct dst_entry *dst;
1da177e4
LT
1596 struct rt6_info *rt;
1597 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1598 struct net *net = dev_net(dev);
1da177e4 1599
38308473 1600 if (unlikely(!idev))
122bdf67 1601 return ERR_PTR(-ENODEV);
1da177e4 1602
ad706862 1603 rt = ip6_dst_alloc(net, dev, 0);
38308473 1604 if (unlikely(!rt)) {
1da177e4 1605 in6_dev_put(idev);
87a11578 1606 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1607 goto out;
1608 }
1609
8e2ec639
YZ
1610 rt->dst.flags |= DST_HOST;
1611 rt->dst.output = ip6_output;
d8d1f30b 1612 atomic_set(&rt->dst.__refcnt, 1);
550bab42 1613 rt->rt6i_gateway = fl6->daddr;
87a11578 1614 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1615 rt->rt6i_dst.plen = 128;
1616 rt->rt6i_idev = idev;
14edd87d 1617 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1618
3b00944c 1619 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1620 rt->dst.next = icmp6_dst_gc_list;
1621 icmp6_dst_gc_list = &rt->dst;
3b00944c 1622 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1623
5578689a 1624 fib6_force_start_gc(net);
1da177e4 1625
87a11578
DM
1626 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1627
1da177e4 1628out:
87a11578 1629 return dst;
1da177e4
LT
1630}
1631
3d0f24a7 1632int icmp6_dst_gc(void)
1da177e4 1633{
e9476e95 1634 struct dst_entry *dst, **pprev;
3d0f24a7 1635 int more = 0;
1da177e4 1636
3b00944c
YH
1637 spin_lock_bh(&icmp6_dst_lock);
1638 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1639
1da177e4
LT
1640 while ((dst = *pprev) != NULL) {
1641 if (!atomic_read(&dst->__refcnt)) {
1642 *pprev = dst->next;
1643 dst_free(dst);
1da177e4
LT
1644 } else {
1645 pprev = &dst->next;
3d0f24a7 1646 ++more;
1da177e4
LT
1647 }
1648 }
1649
3b00944c 1650 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1651
3d0f24a7 1652 return more;
1da177e4
LT
1653}
1654
1e493d19
DM
1655static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1656 void *arg)
1657{
1658 struct dst_entry *dst, **pprev;
1659
1660 spin_lock_bh(&icmp6_dst_lock);
1661 pprev = &icmp6_dst_gc_list;
1662 while ((dst = *pprev) != NULL) {
1663 struct rt6_info *rt = (struct rt6_info *) dst;
1664 if (func(rt, arg)) {
1665 *pprev = dst->next;
1666 dst_free(dst);
1667 } else {
1668 pprev = &dst->next;
1669 }
1670 }
1671 spin_unlock_bh(&icmp6_dst_lock);
1672}
1673
569d3645 1674static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1675{
86393e52 1676 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1677 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1678 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1679 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1680 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1681 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1682 int entries;
7019b78e 1683
fc66f95c 1684 entries = dst_entries_get_fast(ops);
49a18d86 1685 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1686 entries <= rt_max_size)
1da177e4
LT
1687 goto out;
1688
6891a346 1689 net->ipv6.ip6_rt_gc_expire++;
14956643 1690 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1691 entries = dst_entries_get_slow(ops);
1692 if (entries < ops->gc_thresh)
7019b78e 1693 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1694out:
7019b78e 1695 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1696 return entries > rt_max_size;
1da177e4
LT
1697}
1698
e715b6d3
FW
1699static int ip6_convert_metrics(struct mx6_config *mxc,
1700 const struct fib6_config *cfg)
1701{
c3a8d947 1702 bool ecn_ca = false;
e715b6d3
FW
1703 struct nlattr *nla;
1704 int remaining;
1705 u32 *mp;
1706
63159f29 1707 if (!cfg->fc_mx)
e715b6d3
FW
1708 return 0;
1709
1710 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1711 if (unlikely(!mp))
1712 return -ENOMEM;
1713
1714 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1715 int type = nla_type(nla);
1bb14807 1716 u32 val;
e715b6d3 1717
1bb14807
DB
1718 if (!type)
1719 continue;
1720 if (unlikely(type > RTAX_MAX))
1721 goto err;
ea697639 1722
1bb14807
DB
1723 if (type == RTAX_CC_ALGO) {
1724 char tmp[TCP_CA_NAME_MAX];
e715b6d3 1725
1bb14807 1726 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 1727 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
1728 if (val == TCP_CA_UNSPEC)
1729 goto err;
1730 } else {
1731 val = nla_get_u32(nla);
e715b6d3 1732 }
b8d3e416
DB
1733 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1734 goto err;
1bb14807
DB
1735
1736 mp[type - 1] = val;
1737 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
1738 }
1739
c3a8d947
DB
1740 if (ecn_ca) {
1741 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1742 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1743 }
e715b6d3 1744
c3a8d947 1745 mxc->mx = mp;
e715b6d3
FW
1746 return 0;
1747 err:
1748 kfree(mp);
1749 return -EINVAL;
1750}
1da177e4 1751
6b9ea5a6 1752int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
1da177e4
LT
1753{
1754 int err;
5578689a 1755 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1756 struct rt6_info *rt = NULL;
1757 struct net_device *dev = NULL;
1758 struct inet6_dev *idev = NULL;
c71099ac 1759 struct fib6_table *table;
1da177e4
LT
1760 int addr_type;
1761
86872cb5 1762 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1763 return -EINVAL;
1764#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1765 if (cfg->fc_src_len)
1da177e4
LT
1766 return -EINVAL;
1767#endif
86872cb5 1768 if (cfg->fc_ifindex) {
1da177e4 1769 err = -ENODEV;
5578689a 1770 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1771 if (!dev)
1772 goto out;
1773 idev = in6_dev_get(dev);
1774 if (!idev)
1775 goto out;
1776 }
1777
86872cb5
TG
1778 if (cfg->fc_metric == 0)
1779 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1780
d71314b4 1781 err = -ENOBUFS;
38308473
DM
1782 if (cfg->fc_nlinfo.nlh &&
1783 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1784 table = fib6_get_table(net, cfg->fc_table);
38308473 1785 if (!table) {
f3213831 1786 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1787 table = fib6_new_table(net, cfg->fc_table);
1788 }
1789 } else {
1790 table = fib6_new_table(net, cfg->fc_table);
1791 }
38308473
DM
1792
1793 if (!table)
c71099ac 1794 goto out;
c71099ac 1795
ad706862
MKL
1796 rt = ip6_dst_alloc(net, NULL,
1797 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 1798
38308473 1799 if (!rt) {
1da177e4
LT
1800 err = -ENOMEM;
1801 goto out;
1802 }
1803
1716a961
G
1804 if (cfg->fc_flags & RTF_EXPIRES)
1805 rt6_set_expires(rt, jiffies +
1806 clock_t_to_jiffies(cfg->fc_expires));
1807 else
1808 rt6_clean_expires(rt);
1da177e4 1809
86872cb5
TG
1810 if (cfg->fc_protocol == RTPROT_UNSPEC)
1811 cfg->fc_protocol = RTPROT_BOOT;
1812 rt->rt6i_protocol = cfg->fc_protocol;
1813
1814 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1815
1816 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1817 rt->dst.input = ip6_mc_input;
ab79ad14
1818 else if (cfg->fc_flags & RTF_LOCAL)
1819 rt->dst.input = ip6_input;
1da177e4 1820 else
d8d1f30b 1821 rt->dst.input = ip6_forward;
1da177e4 1822
d8d1f30b 1823 rt->dst.output = ip6_output;
1da177e4 1824
19e42e45
RP
1825 if (cfg->fc_encap) {
1826 struct lwtunnel_state *lwtstate;
1827
1828 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
127eb7cd
TH
1829 cfg->fc_encap, AF_INET6, cfg,
1830 &lwtstate);
19e42e45
RP
1831 if (err)
1832 goto out;
61adedf3
JB
1833 rt->dst.lwtstate = lwtstate_get(lwtstate);
1834 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1835 rt->dst.lwtstate->orig_output = rt->dst.output;
1836 rt->dst.output = lwtunnel_output;
25368623 1837 }
61adedf3
JB
1838 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1839 rt->dst.lwtstate->orig_input = rt->dst.input;
1840 rt->dst.input = lwtunnel_input;
25368623 1841 }
19e42e45
RP
1842 }
1843
86872cb5
TG
1844 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1845 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 1846 if (rt->rt6i_dst.plen == 128)
e5fd387a 1847 rt->dst.flags |= DST_HOST;
e5fd387a 1848
1da177e4 1849#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1850 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1851 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1852#endif
1853
86872cb5 1854 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1855
1856 /* We cannot add true routes via loopback here,
1857 they would result in kernel looping; promote them to reject routes
1858 */
86872cb5 1859 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1860 (dev && (dev->flags & IFF_LOOPBACK) &&
1861 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1862 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1863 /* hold loopback dev/idev if we haven't done so. */
5578689a 1864 if (dev != net->loopback_dev) {
1da177e4
LT
1865 if (dev) {
1866 dev_put(dev);
1867 in6_dev_put(idev);
1868 }
5578689a 1869 dev = net->loopback_dev;
1da177e4
LT
1870 dev_hold(dev);
1871 idev = in6_dev_get(dev);
1872 if (!idev) {
1873 err = -ENODEV;
1874 goto out;
1875 }
1876 }
1da177e4 1877 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1878 switch (cfg->fc_type) {
1879 case RTN_BLACKHOLE:
1880 rt->dst.error = -EINVAL;
aad88724 1881 rt->dst.output = dst_discard_sk;
7150aede 1882 rt->dst.input = dst_discard;
ef2c7d7b
ND
1883 break;
1884 case RTN_PROHIBIT:
1885 rt->dst.error = -EACCES;
7150aede
K
1886 rt->dst.output = ip6_pkt_prohibit_out;
1887 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 1888 break;
b4949ab2 1889 case RTN_THROW:
0315e382 1890 case RTN_UNREACHABLE:
ef2c7d7b 1891 default:
7150aede 1892 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
1893 : (cfg->fc_type == RTN_UNREACHABLE)
1894 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
1895 rt->dst.output = ip6_pkt_discard_out;
1896 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
1897 break;
1898 }
1da177e4
LT
1899 goto install_route;
1900 }
1901
86872cb5 1902 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1903 const struct in6_addr *gw_addr;
1da177e4
LT
1904 int gwa_type;
1905
86872cb5 1906 gw_addr = &cfg->fc_gateway;
330567b7 1907 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
1908
1909 /* if gw_addr is local we will fail to detect this in case
1910 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1911 * will return already-added prefix route via interface that
1912 * prefix route was assigned to, which might be non-loopback.
1913 */
1914 err = -EINVAL;
330567b7
FW
1915 if (ipv6_chk_addr_and_flags(net, gw_addr,
1916 gwa_type & IPV6_ADDR_LINKLOCAL ?
1917 dev : NULL, 0, 0))
48ed7b26
FW
1918 goto out;
1919
4e3fd7a0 1920 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1921
1922 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1923 struct rt6_info *grt;
1924
1925 /* IPv6 strictly inhibits using not link-local
1926 addresses as nexthop address.
1927 Otherwise, router will not able to send redirects.
1928 It is very good, but in some (rare!) circumstances
1929 (SIT, PtP, NBMA NOARP links) it is handy to allow
1930 some exceptions. --ANK
1931 */
38308473 1932 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1933 goto out;
1934
5578689a 1935 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1936
1937 err = -EHOSTUNREACH;
38308473 1938 if (!grt)
1da177e4
LT
1939 goto out;
1940 if (dev) {
d1918542 1941 if (dev != grt->dst.dev) {
94e187c0 1942 ip6_rt_put(grt);
1da177e4
LT
1943 goto out;
1944 }
1945 } else {
d1918542 1946 dev = grt->dst.dev;
1da177e4
LT
1947 idev = grt->rt6i_idev;
1948 dev_hold(dev);
1949 in6_dev_hold(grt->rt6i_idev);
1950 }
38308473 1951 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1952 err = 0;
94e187c0 1953 ip6_rt_put(grt);
1da177e4
LT
1954
1955 if (err)
1956 goto out;
1957 }
1958 err = -EINVAL;
38308473 1959 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1960 goto out;
1961 }
1962
1963 err = -ENODEV;
38308473 1964 if (!dev)
1da177e4
LT
1965 goto out;
1966
c3968a85
DW
1967 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1968 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1969 err = -EINVAL;
1970 goto out;
1971 }
4e3fd7a0 1972 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1973 rt->rt6i_prefsrc.plen = 128;
1974 } else
1975 rt->rt6i_prefsrc.plen = 0;
1976
86872cb5 1977 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1978
1979install_route:
d8d1f30b 1980 rt->dst.dev = dev;
1da177e4 1981 rt->rt6i_idev = idev;
c71099ac 1982 rt->rt6i_table = table;
63152fc0 1983
c346dca1 1984 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1985
6b9ea5a6
RP
1986 *rt_ret = rt;
1987
1988 return 0;
1989out:
1990 if (dev)
1991 dev_put(dev);
1992 if (idev)
1993 in6_dev_put(idev);
1994 if (rt)
1995 dst_free(&rt->dst);
1996
1997 *rt_ret = NULL;
1998
1999 return err;
2000}
2001
2002int ip6_route_add(struct fib6_config *cfg)
2003{
2004 struct mx6_config mxc = { .mx = NULL, };
2005 struct rt6_info *rt = NULL;
2006 int err;
2007
2008 err = ip6_route_info_create(cfg, &rt);
2009 if (err)
2010 goto out;
2011
e715b6d3
FW
2012 err = ip6_convert_metrics(&mxc, cfg);
2013 if (err)
2014 goto out;
1da177e4 2015
e715b6d3
FW
2016 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2017
2018 kfree(mxc.mx);
6b9ea5a6 2019
e715b6d3 2020 return err;
1da177e4 2021out:
1da177e4 2022 if (rt)
d8d1f30b 2023 dst_free(&rt->dst);
6b9ea5a6 2024
1da177e4
LT
2025 return err;
2026}
2027
86872cb5 2028static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2029{
2030 int err;
c71099ac 2031 struct fib6_table *table;
d1918542 2032 struct net *net = dev_net(rt->dst.dev);
1da177e4 2033
8e3d5be7
MKL
2034 if (rt == net->ipv6.ip6_null_entry ||
2035 rt->dst.flags & DST_NOCACHE) {
6825a26c
G
2036 err = -ENOENT;
2037 goto out;
2038 }
6c813a72 2039
c71099ac
TG
2040 table = rt->rt6i_table;
2041 write_lock_bh(&table->tb6_lock);
86872cb5 2042 err = fib6_del(rt, info);
c71099ac 2043 write_unlock_bh(&table->tb6_lock);
1da177e4 2044
6825a26c 2045out:
94e187c0 2046 ip6_rt_put(rt);
1da177e4
LT
2047 return err;
2048}
2049
e0a1ad73
TG
2050int ip6_del_rt(struct rt6_info *rt)
2051{
4d1169c1 2052 struct nl_info info = {
d1918542 2053 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2054 };
528c4ceb 2055 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2056}
2057
86872cb5 2058static int ip6_route_del(struct fib6_config *cfg)
1da177e4 2059{
c71099ac 2060 struct fib6_table *table;
1da177e4
LT
2061 struct fib6_node *fn;
2062 struct rt6_info *rt;
2063 int err = -ESRCH;
2064
5578689a 2065 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 2066 if (!table)
c71099ac
TG
2067 return err;
2068
2069 read_lock_bh(&table->tb6_lock);
1da177e4 2070
c71099ac 2071 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
2072 &cfg->fc_dst, cfg->fc_dst_len,
2073 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 2074
1da177e4 2075 if (fn) {
d8d1f30b 2076 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2077 if ((rt->rt6i_flags & RTF_CACHE) &&
2078 !(cfg->fc_flags & RTF_CACHE))
2079 continue;
86872cb5 2080 if (cfg->fc_ifindex &&
d1918542
DM
2081 (!rt->dst.dev ||
2082 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2083 continue;
86872cb5
TG
2084 if (cfg->fc_flags & RTF_GATEWAY &&
2085 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2086 continue;
86872cb5 2087 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2088 continue;
d8d1f30b 2089 dst_hold(&rt->dst);
c71099ac 2090 read_unlock_bh(&table->tb6_lock);
1da177e4 2091
86872cb5 2092 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
2093 }
2094 }
c71099ac 2095 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2096
2097 return err;
2098}
2099
6700c270 2100static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2101{
e8599ff4 2102 struct net *net = dev_net(skb->dev);
a6279458 2103 struct netevent_redirect netevent;
e8599ff4 2104 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2105 struct ndisc_options ndopts;
2106 struct inet6_dev *in6_dev;
2107 struct neighbour *neigh;
71bcdba0 2108 struct rd_msg *msg;
6e157b6a
DM
2109 int optlen, on_link;
2110 u8 *lladdr;
e8599ff4 2111
29a3cad5 2112 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2113 optlen -= sizeof(*msg);
e8599ff4
DM
2114
2115 if (optlen < 0) {
6e157b6a 2116 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2117 return;
2118 }
2119
71bcdba0 2120 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2121
71bcdba0 2122 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2123 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2124 return;
2125 }
2126
6e157b6a 2127 on_link = 0;
71bcdba0 2128 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2129 on_link = 1;
71bcdba0 2130 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2131 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2132 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2133 return;
2134 }
2135
2136 in6_dev = __in6_dev_get(skb->dev);
2137 if (!in6_dev)
2138 return;
2139 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2140 return;
2141
2142 /* RFC2461 8.1:
2143 * The IP source address of the Redirect MUST be the same as the current
2144 * first-hop router for the specified ICMP Destination Address.
2145 */
2146
71bcdba0 2147 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2148 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2149 return;
2150 }
6e157b6a
DM
2151
2152 lladdr = NULL;
e8599ff4
DM
2153 if (ndopts.nd_opts_tgt_lladdr) {
2154 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2155 skb->dev);
2156 if (!lladdr) {
2157 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2158 return;
2159 }
2160 }
2161
6e157b6a
DM
2162 rt = (struct rt6_info *) dst;
2163 if (rt == net->ipv6.ip6_null_entry) {
2164 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2165 return;
6e157b6a 2166 }
e8599ff4 2167
6e157b6a
DM
2168 /* Redirect received -> path was valid.
2169 * Look, redirects are sent only in response to data packets,
2170 * so that this nexthop apparently is reachable. --ANK
2171 */
2172 dst_confirm(&rt->dst);
a6279458 2173
71bcdba0 2174 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2175 if (!neigh)
2176 return;
a6279458 2177
1da177e4
LT
2178 /*
2179 * We have finally decided to accept it.
2180 */
2181
1ab1457c 2182 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
2183 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2184 NEIGH_UPDATE_F_OVERRIDE|
2185 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2186 NEIGH_UPDATE_F_ISROUTER))
2187 );
2188
83a09abd 2189 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2190 if (!nrt)
1da177e4
LT
2191 goto out;
2192
2193 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2194 if (on_link)
2195 nrt->rt6i_flags &= ~RTF_GATEWAY;
2196
4e3fd7a0 2197 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2198
40e22e8f 2199 if (ip6_ins_rt(nrt))
1da177e4
LT
2200 goto out;
2201
d8d1f30b
CG
2202 netevent.old = &rt->dst;
2203 netevent.new = &nrt->dst;
71bcdba0 2204 netevent.daddr = &msg->dest;
60592833 2205 netevent.neigh = neigh;
8d71740c
TT
2206 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2207
38308473 2208 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2209 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2210 ip6_del_rt(rt);
1da177e4
LT
2211 }
2212
2213out:
e8599ff4 2214 neigh_release(neigh);
6e157b6a
DM
2215}
2216
1da177e4
LT
2217/*
2218 * Misc support functions
2219 */
2220
4b32b5ad
MKL
2221static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2222{
2223 BUG_ON(from->dst.from);
2224
2225 rt->rt6i_flags &= ~RTF_EXPIRES;
2226 dst_hold(&from->dst);
2227 rt->dst.from = &from->dst;
2228 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2229}
2230
83a09abd
MKL
2231static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2232{
2233 rt->dst.input = ort->dst.input;
2234 rt->dst.output = ort->dst.output;
2235 rt->rt6i_dst = ort->rt6i_dst;
2236 rt->dst.error = ort->dst.error;
2237 rt->rt6i_idev = ort->rt6i_idev;
2238 if (rt->rt6i_idev)
2239 in6_dev_hold(rt->rt6i_idev);
2240 rt->dst.lastuse = jiffies;
2241 rt->rt6i_gateway = ort->rt6i_gateway;
2242 rt->rt6i_flags = ort->rt6i_flags;
2243 rt6_set_from(rt, ort);
2244 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2245#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2246 rt->rt6i_src = ort->rt6i_src;
1da177e4 2247#endif
83a09abd
MKL
2248 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2249 rt->rt6i_table = ort->rt6i_table;
61adedf3 2250 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2251}
2252
70ceb4f5 2253#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2254static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
2255 const struct in6_addr *prefix, int prefixlen,
2256 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
2257{
2258 struct fib6_node *fn;
2259 struct rt6_info *rt = NULL;
c71099ac
TG
2260 struct fib6_table *table;
2261
efa2cea0 2262 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 2263 if (!table)
c71099ac 2264 return NULL;
70ceb4f5 2265
5744dd9b 2266 read_lock_bh(&table->tb6_lock);
67ba4152 2267 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2268 if (!fn)
2269 goto out;
2270
d8d1f30b 2271 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2272 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2273 continue;
2274 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2275 continue;
2276 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2277 continue;
d8d1f30b 2278 dst_hold(&rt->dst);
70ceb4f5
YH
2279 break;
2280 }
2281out:
5744dd9b 2282 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2283 return rt;
2284}
2285
efa2cea0 2286static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
2287 const struct in6_addr *prefix, int prefixlen,
2288 const struct in6_addr *gwaddr, int ifindex,
95c96174 2289 unsigned int pref)
70ceb4f5 2290{
86872cb5
TG
2291 struct fib6_config cfg = {
2292 .fc_table = RT6_TABLE_INFO,
238fc7ea 2293 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2294 .fc_ifindex = ifindex,
2295 .fc_dst_len = prefixlen,
2296 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2297 RTF_UP | RTF_PREF(pref),
15e47304 2298 .fc_nlinfo.portid = 0,
efa2cea0
DL
2299 .fc_nlinfo.nlh = NULL,
2300 .fc_nlinfo.nl_net = net,
86872cb5
TG
2301 };
2302
4e3fd7a0
AD
2303 cfg.fc_dst = *prefix;
2304 cfg.fc_gateway = *gwaddr;
70ceb4f5 2305
e317da96
YH
2306 /* We should treat it as a default route if prefix length is 0. */
2307 if (!prefixlen)
86872cb5 2308 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2309
86872cb5 2310 ip6_route_add(&cfg);
70ceb4f5 2311
efa2cea0 2312 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
2313}
2314#endif
2315
b71d1d42 2316struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2317{
1da177e4 2318 struct rt6_info *rt;
c71099ac 2319 struct fib6_table *table;
1da177e4 2320
c346dca1 2321 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 2322 if (!table)
c71099ac 2323 return NULL;
1da177e4 2324
5744dd9b 2325 read_lock_bh(&table->tb6_lock);
67ba4152 2326 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2327 if (dev == rt->dst.dev &&
045927ff 2328 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2329 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2330 break;
2331 }
2332 if (rt)
d8d1f30b 2333 dst_hold(&rt->dst);
5744dd9b 2334 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2335 return rt;
2336}
2337
b71d1d42 2338struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2339 struct net_device *dev,
2340 unsigned int pref)
1da177e4 2341{
86872cb5
TG
2342 struct fib6_config cfg = {
2343 .fc_table = RT6_TABLE_DFLT,
238fc7ea 2344 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2345 .fc_ifindex = dev->ifindex,
2346 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2347 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2348 .fc_nlinfo.portid = 0,
5578689a 2349 .fc_nlinfo.nlh = NULL,
c346dca1 2350 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2351 };
1da177e4 2352
4e3fd7a0 2353 cfg.fc_gateway = *gwaddr;
1da177e4 2354
86872cb5 2355 ip6_route_add(&cfg);
1da177e4 2356
1da177e4
LT
2357 return rt6_get_dflt_router(gwaddr, dev);
2358}
2359
7b4da532 2360void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
2361{
2362 struct rt6_info *rt;
c71099ac
TG
2363 struct fib6_table *table;
2364
2365 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 2366 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 2367 if (!table)
c71099ac 2368 return;
1da177e4
LT
2369
2370restart:
c71099ac 2371 read_lock_bh(&table->tb6_lock);
d8d1f30b 2372 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2373 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2374 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2375 dst_hold(&rt->dst);
c71099ac 2376 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2377 ip6_del_rt(rt);
1da177e4
LT
2378 goto restart;
2379 }
2380 }
c71099ac 2381 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2382}
2383
5578689a
DL
2384static void rtmsg_to_fib6_config(struct net *net,
2385 struct in6_rtmsg *rtmsg,
86872cb5
TG
2386 struct fib6_config *cfg)
2387{
2388 memset(cfg, 0, sizeof(*cfg));
2389
2390 cfg->fc_table = RT6_TABLE_MAIN;
2391 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2392 cfg->fc_metric = rtmsg->rtmsg_metric;
2393 cfg->fc_expires = rtmsg->rtmsg_info;
2394 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2395 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2396 cfg->fc_flags = rtmsg->rtmsg_flags;
2397
5578689a 2398 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2399
4e3fd7a0
AD
2400 cfg->fc_dst = rtmsg->rtmsg_dst;
2401 cfg->fc_src = rtmsg->rtmsg_src;
2402 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2403}
2404
5578689a 2405int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2406{
86872cb5 2407 struct fib6_config cfg;
1da177e4
LT
2408 struct in6_rtmsg rtmsg;
2409 int err;
2410
67ba4152 2411 switch (cmd) {
1da177e4
LT
2412 case SIOCADDRT: /* Add a route */
2413 case SIOCDELRT: /* Delete a route */
af31f412 2414 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2415 return -EPERM;
2416 err = copy_from_user(&rtmsg, arg,
2417 sizeof(struct in6_rtmsg));
2418 if (err)
2419 return -EFAULT;
86872cb5 2420
5578689a 2421 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2422
1da177e4
LT
2423 rtnl_lock();
2424 switch (cmd) {
2425 case SIOCADDRT:
86872cb5 2426 err = ip6_route_add(&cfg);
1da177e4
LT
2427 break;
2428 case SIOCDELRT:
86872cb5 2429 err = ip6_route_del(&cfg);
1da177e4
LT
2430 break;
2431 default:
2432 err = -EINVAL;
2433 }
2434 rtnl_unlock();
2435
2436 return err;
3ff50b79 2437 }
1da177e4
LT
2438
2439 return -EINVAL;
2440}
2441
2442/*
2443 * Drop the packet on the floor
2444 */
2445
d5fdd6ba 2446static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2447{
612f09e8 2448 int type;
adf30907 2449 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2450 switch (ipstats_mib_noroutes) {
2451 case IPSTATS_MIB_INNOROUTES:
0660e03f 2452 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2453 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2454 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2455 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2456 break;
2457 }
2458 /* FALLTHROUGH */
2459 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2460 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2461 ipstats_mib_noroutes);
612f09e8
YH
2462 break;
2463 }
3ffe533c 2464 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2465 kfree_skb(skb);
2466 return 0;
2467}
2468
9ce8ade0
TG
2469static int ip6_pkt_discard(struct sk_buff *skb)
2470{
612f09e8 2471 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2472}
2473
aad88724 2474static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
1da177e4 2475{
adf30907 2476 skb->dev = skb_dst(skb)->dev;
612f09e8 2477 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2478}
2479
9ce8ade0
TG
2480static int ip6_pkt_prohibit(struct sk_buff *skb)
2481{
612f09e8 2482 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2483}
2484
aad88724 2485static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
9ce8ade0 2486{
adf30907 2487 skb->dev = skb_dst(skb)->dev;
612f09e8 2488 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2489}
2490
1da177e4
LT
2491/*
2492 * Allocate a dst for local (unicast / anycast) address.
2493 */
2494
2495struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2496 const struct in6_addr *addr,
8f031519 2497 bool anycast)
1da177e4 2498{
c346dca1 2499 struct net *net = dev_net(idev->dev);
a3300ef4 2500 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
ad706862 2501 DST_NOCOUNT);
a3300ef4 2502 if (!rt)
1da177e4
LT
2503 return ERR_PTR(-ENOMEM);
2504
1da177e4
LT
2505 in6_dev_hold(idev);
2506
11d53b49 2507 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2508 rt->dst.input = ip6_input;
2509 rt->dst.output = ip6_output;
1da177e4 2510 rt->rt6i_idev = idev;
1da177e4
LT
2511
2512 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2513 if (anycast)
2514 rt->rt6i_flags |= RTF_ANYCAST;
2515 else
1da177e4 2516 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2517
550bab42 2518 rt->rt6i_gateway = *addr;
4e3fd7a0 2519 rt->rt6i_dst.addr = *addr;
1da177e4 2520 rt->rt6i_dst.plen = 128;
5578689a 2521 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
8e3d5be7 2522 rt->dst.flags |= DST_NOCACHE;
1da177e4 2523
d8d1f30b 2524 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2525
2526 return rt;
2527}
2528
c3968a85
DW
2529int ip6_route_get_saddr(struct net *net,
2530 struct rt6_info *rt,
b71d1d42 2531 const struct in6_addr *daddr,
c3968a85
DW
2532 unsigned int prefs,
2533 struct in6_addr *saddr)
2534{
e16e888b
MS
2535 struct inet6_dev *idev =
2536 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
c3968a85 2537 int err = 0;
e16e888b 2538 if (rt && rt->rt6i_prefsrc.plen)
4e3fd7a0 2539 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2540 else
2541 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2542 daddr, prefs, saddr);
2543 return err;
2544}
2545
2546/* remove deleted ip from prefsrc entries */
2547struct arg_dev_net_ip {
2548 struct net_device *dev;
2549 struct net *net;
2550 struct in6_addr *addr;
2551};
2552
2553static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2554{
2555 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2556 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2557 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2558
d1918542 2559 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2560 rt != net->ipv6.ip6_null_entry &&
2561 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2562 /* remove prefsrc entry */
2563 rt->rt6i_prefsrc.plen = 0;
2564 }
2565 return 0;
2566}
2567
2568void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2569{
2570 struct net *net = dev_net(ifp->idev->dev);
2571 struct arg_dev_net_ip adni = {
2572 .dev = ifp->idev->dev,
2573 .net = net,
2574 .addr = &ifp->addr,
2575 };
0c3584d5 2576 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2577}
2578
be7a010d
DJ
2579#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2580#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2581
2582/* Remove routers and update dst entries when gateway turn into host. */
2583static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2584{
2585 struct in6_addr *gateway = (struct in6_addr *)arg;
2586
2587 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2588 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2589 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2590 return -1;
2591 }
2592 return 0;
2593}
2594
2595void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2596{
2597 fib6_clean_all(net, fib6_clean_tohost, gateway);
2598}
2599
8ed67789
DL
2600struct arg_dev_net {
2601 struct net_device *dev;
2602 struct net *net;
2603};
2604
1da177e4
LT
2605static int fib6_ifdown(struct rt6_info *rt, void *arg)
2606{
bc3ef660 2607 const struct arg_dev_net *adn = arg;
2608 const struct net_device *dev = adn->dev;
8ed67789 2609
d1918542 2610 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2611 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2612 return -1;
c159d30c 2613
1da177e4
LT
2614 return 0;
2615}
2616
f3db4851 2617void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2618{
8ed67789
DL
2619 struct arg_dev_net adn = {
2620 .dev = dev,
2621 .net = net,
2622 };
2623
0c3584d5 2624 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2625 icmp6_clean_all(fib6_ifdown, &adn);
e332bc67
EB
2626 if (dev)
2627 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
2628}
2629
95c96174 2630struct rt6_mtu_change_arg {
1da177e4 2631 struct net_device *dev;
95c96174 2632 unsigned int mtu;
1da177e4
LT
2633};
2634
2635static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2636{
2637 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2638 struct inet6_dev *idev;
2639
2640 /* In IPv6 pmtu discovery is not optional,
2641 so that RTAX_MTU lock cannot disable it.
2642 We still use this lock to block changes
2643 caused by addrconf/ndisc.
2644 */
2645
2646 idev = __in6_dev_get(arg->dev);
38308473 2647 if (!idev)
1da177e4
LT
2648 return 0;
2649
2650 /* For administrative MTU increase, there is no way to discover
2651 IPv6 PMTU increase, so PMTU increase should be updated here.
2652 Since RFC 1981 doesn't include administrative MTU increase
2653 update PMTU increase is a MUST. (i.e. jumbo frame)
2654 */
2655 /*
2656 If new MTU is less than route PMTU, this new MTU will be the
2657 lowest MTU in the path, update the route PMTU to reflect PMTU
2658 decreases; if new MTU is greater than route PMTU, and the
2659 old MTU is the lowest MTU in the path, update the route PMTU
2660 to reflect the increase. In this case if the other nodes' MTU
2661 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2662 PMTU discouvery.
2663 */
d1918542 2664 if (rt->dst.dev == arg->dev &&
4b32b5ad
MKL
2665 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2666 if (rt->rt6i_flags & RTF_CACHE) {
2667 /* For RTF_CACHE with rt6i_pmtu == 0
2668 * (i.e. a redirected route),
2669 * the metrics of its rt->dst.from has already
2670 * been updated.
2671 */
2672 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2673 rt->rt6i_pmtu = arg->mtu;
2674 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2675 (dst_mtu(&rt->dst) < arg->mtu &&
2676 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2677 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2678 }
566cfd8f 2679 }
1da177e4
LT
2680 return 0;
2681}
2682
95c96174 2683void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2684{
c71099ac
TG
2685 struct rt6_mtu_change_arg arg = {
2686 .dev = dev,
2687 .mtu = mtu,
2688 };
1da177e4 2689
0c3584d5 2690 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2691}
2692
ef7c79ed 2693static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2694 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2695 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2696 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2697 [RTA_PRIORITY] = { .type = NLA_U32 },
2698 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2699 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2700 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
2701 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2702 [RTA_ENCAP] = { .type = NLA_NESTED },
86872cb5
TG
2703};
2704
2705static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2706 struct fib6_config *cfg)
1da177e4 2707{
86872cb5
TG
2708 struct rtmsg *rtm;
2709 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2710 unsigned int pref;
86872cb5 2711 int err;
1da177e4 2712
86872cb5
TG
2713 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2714 if (err < 0)
2715 goto errout;
1da177e4 2716
86872cb5
TG
2717 err = -EINVAL;
2718 rtm = nlmsg_data(nlh);
2719 memset(cfg, 0, sizeof(*cfg));
2720
2721 cfg->fc_table = rtm->rtm_table;
2722 cfg->fc_dst_len = rtm->rtm_dst_len;
2723 cfg->fc_src_len = rtm->rtm_src_len;
2724 cfg->fc_flags = RTF_UP;
2725 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2726 cfg->fc_type = rtm->rtm_type;
86872cb5 2727
ef2c7d7b
ND
2728 if (rtm->rtm_type == RTN_UNREACHABLE ||
2729 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2730 rtm->rtm_type == RTN_PROHIBIT ||
2731 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2732 cfg->fc_flags |= RTF_REJECT;
2733
ab79ad14
2734 if (rtm->rtm_type == RTN_LOCAL)
2735 cfg->fc_flags |= RTF_LOCAL;
2736
1f56a01f
MKL
2737 if (rtm->rtm_flags & RTM_F_CLONED)
2738 cfg->fc_flags |= RTF_CACHE;
2739
15e47304 2740 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2741 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2742 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2743
2744 if (tb[RTA_GATEWAY]) {
67b61f6c 2745 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2746 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2747 }
86872cb5
TG
2748
2749 if (tb[RTA_DST]) {
2750 int plen = (rtm->rtm_dst_len + 7) >> 3;
2751
2752 if (nla_len(tb[RTA_DST]) < plen)
2753 goto errout;
2754
2755 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2756 }
86872cb5
TG
2757
2758 if (tb[RTA_SRC]) {
2759 int plen = (rtm->rtm_src_len + 7) >> 3;
2760
2761 if (nla_len(tb[RTA_SRC]) < plen)
2762 goto errout;
2763
2764 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2765 }
86872cb5 2766
c3968a85 2767 if (tb[RTA_PREFSRC])
67b61f6c 2768 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 2769
86872cb5
TG
2770 if (tb[RTA_OIF])
2771 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2772
2773 if (tb[RTA_PRIORITY])
2774 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2775
2776 if (tb[RTA_METRICS]) {
2777 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2778 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2779 }
86872cb5
TG
2780
2781 if (tb[RTA_TABLE])
2782 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2783
51ebd318
ND
2784 if (tb[RTA_MULTIPATH]) {
2785 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2786 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2787 }
2788
c78ba6d6
LR
2789 if (tb[RTA_PREF]) {
2790 pref = nla_get_u8(tb[RTA_PREF]);
2791 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2792 pref != ICMPV6_ROUTER_PREF_HIGH)
2793 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2794 cfg->fc_flags |= RTF_PREF(pref);
2795 }
2796
19e42e45
RP
2797 if (tb[RTA_ENCAP])
2798 cfg->fc_encap = tb[RTA_ENCAP];
2799
2800 if (tb[RTA_ENCAP_TYPE])
2801 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2802
86872cb5
TG
2803 err = 0;
2804errout:
2805 return err;
1da177e4
LT
2806}
2807
6b9ea5a6
RP
2808struct rt6_nh {
2809 struct rt6_info *rt6_info;
2810 struct fib6_config r_cfg;
2811 struct mx6_config mxc;
2812 struct list_head next;
2813};
2814
2815static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2816{
2817 struct rt6_nh *nh;
2818
2819 list_for_each_entry(nh, rt6_nh_list, next) {
2820 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2821 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2822 nh->r_cfg.fc_ifindex);
2823 }
2824}
2825
2826static int ip6_route_info_append(struct list_head *rt6_nh_list,
2827 struct rt6_info *rt, struct fib6_config *r_cfg)
2828{
2829 struct rt6_nh *nh;
2830 struct rt6_info *rtnh;
2831 int err = -EEXIST;
2832
2833 list_for_each_entry(nh, rt6_nh_list, next) {
2834 /* check if rt6_info already exists */
2835 rtnh = nh->rt6_info;
2836
2837 if (rtnh->dst.dev == rt->dst.dev &&
2838 rtnh->rt6i_idev == rt->rt6i_idev &&
2839 ipv6_addr_equal(&rtnh->rt6i_gateway,
2840 &rt->rt6i_gateway))
2841 return err;
2842 }
2843
2844 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2845 if (!nh)
2846 return -ENOMEM;
2847 nh->rt6_info = rt;
2848 err = ip6_convert_metrics(&nh->mxc, r_cfg);
2849 if (err) {
2850 kfree(nh);
2851 return err;
2852 }
2853 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2854 list_add_tail(&nh->next, rt6_nh_list);
2855
2856 return 0;
2857}
2858
2859static int ip6_route_multipath_add(struct fib6_config *cfg)
51ebd318
ND
2860{
2861 struct fib6_config r_cfg;
2862 struct rtnexthop *rtnh;
6b9ea5a6
RP
2863 struct rt6_info *rt;
2864 struct rt6_nh *err_nh;
2865 struct rt6_nh *nh, *nh_safe;
51ebd318
ND
2866 int remaining;
2867 int attrlen;
6b9ea5a6
RP
2868 int err = 1;
2869 int nhn = 0;
2870 int replace = (cfg->fc_nlinfo.nlh &&
2871 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2872 LIST_HEAD(rt6_nh_list);
51ebd318 2873
35f1b4e9 2874 remaining = cfg->fc_mp_len;
51ebd318 2875 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 2876
6b9ea5a6
RP
2877 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2878 * rt6_info structs per nexthop
2879 */
51ebd318
ND
2880 while (rtnh_ok(rtnh, remaining)) {
2881 memcpy(&r_cfg, cfg, sizeof(*cfg));
2882 if (rtnh->rtnh_ifindex)
2883 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2884
2885 attrlen = rtnh_attrlen(rtnh);
2886 if (attrlen > 0) {
2887 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2888
2889 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2890 if (nla) {
67b61f6c 2891 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
2892 r_cfg.fc_flags |= RTF_GATEWAY;
2893 }
19e42e45
RP
2894 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2895 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2896 if (nla)
2897 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 2898 }
6b9ea5a6
RP
2899
2900 err = ip6_route_info_create(&r_cfg, &rt);
2901 if (err)
2902 goto cleanup;
2903
2904 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 2905 if (err) {
6b9ea5a6
RP
2906 dst_free(&rt->dst);
2907 goto cleanup;
2908 }
2909
2910 rtnh = rtnh_next(rtnh, &remaining);
2911 }
2912
2913 err_nh = NULL;
2914 list_for_each_entry(nh, &rt6_nh_list, next) {
2915 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2916 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2917 nh->rt6_info = NULL;
2918 if (err) {
2919 if (replace && nhn)
2920 ip6_print_replace_route_err(&rt6_nh_list);
2921 err_nh = nh;
2922 goto add_errout;
51ebd318 2923 }
6b9ea5a6 2924
1a72418b 2925 /* Because each route is added like a single route we remove
27596472
MK
2926 * these flags after the first nexthop: if there is a collision,
2927 * we have already failed to add the first nexthop:
2928 * fib6_add_rt2node() has rejected it; when replacing, old
2929 * nexthops have been replaced by first new, the rest should
2930 * be added to it.
1a72418b 2931 */
27596472
MK
2932 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2933 NLM_F_REPLACE);
6b9ea5a6
RP
2934 nhn++;
2935 }
2936
2937 goto cleanup;
2938
2939add_errout:
2940 /* Delete routes that were already added */
2941 list_for_each_entry(nh, &rt6_nh_list, next) {
2942 if (err_nh == nh)
2943 break;
2944 ip6_route_del(&nh->r_cfg);
2945 }
2946
2947cleanup:
2948 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2949 if (nh->rt6_info)
2950 dst_free(&nh->rt6_info->dst);
52fe51f8 2951 kfree(nh->mxc.mx);
6b9ea5a6
RP
2952 list_del(&nh->next);
2953 kfree(nh);
2954 }
2955
2956 return err;
2957}
2958
2959static int ip6_route_multipath_del(struct fib6_config *cfg)
2960{
2961 struct fib6_config r_cfg;
2962 struct rtnexthop *rtnh;
2963 int remaining;
2964 int attrlen;
2965 int err = 1, last_err = 0;
2966
2967 remaining = cfg->fc_mp_len;
2968 rtnh = (struct rtnexthop *)cfg->fc_mp;
2969
2970 /* Parse a Multipath Entry */
2971 while (rtnh_ok(rtnh, remaining)) {
2972 memcpy(&r_cfg, cfg, sizeof(*cfg));
2973 if (rtnh->rtnh_ifindex)
2974 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2975
2976 attrlen = rtnh_attrlen(rtnh);
2977 if (attrlen > 0) {
2978 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2979
2980 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2981 if (nla) {
2982 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2983 r_cfg.fc_flags |= RTF_GATEWAY;
2984 }
2985 }
2986 err = ip6_route_del(&r_cfg);
2987 if (err)
2988 last_err = err;
2989
51ebd318
ND
2990 rtnh = rtnh_next(rtnh, &remaining);
2991 }
2992
2993 return last_err;
2994}
2995
67ba4152 2996static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 2997{
86872cb5
TG
2998 struct fib6_config cfg;
2999 int err;
1da177e4 3000
86872cb5
TG
3001 err = rtm_to_fib6_config(skb, nlh, &cfg);
3002 if (err < 0)
3003 return err;
3004
51ebd318 3005 if (cfg.fc_mp)
6b9ea5a6 3006 return ip6_route_multipath_del(&cfg);
51ebd318
ND
3007 else
3008 return ip6_route_del(&cfg);
1da177e4
LT
3009}
3010
67ba4152 3011static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3012{
86872cb5
TG
3013 struct fib6_config cfg;
3014 int err;
1da177e4 3015
86872cb5
TG
3016 err = rtm_to_fib6_config(skb, nlh, &cfg);
3017 if (err < 0)
3018 return err;
3019
51ebd318 3020 if (cfg.fc_mp)
6b9ea5a6 3021 return ip6_route_multipath_add(&cfg);
51ebd318
ND
3022 else
3023 return ip6_route_add(&cfg);
1da177e4
LT
3024}
3025
19e42e45 3026static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f
TG
3027{
3028 return NLMSG_ALIGN(sizeof(struct rtmsg))
3029 + nla_total_size(16) /* RTA_SRC */
3030 + nla_total_size(16) /* RTA_DST */
3031 + nla_total_size(16) /* RTA_GATEWAY */
3032 + nla_total_size(16) /* RTA_PREFSRC */
3033 + nla_total_size(4) /* RTA_TABLE */
3034 + nla_total_size(4) /* RTA_IIF */
3035 + nla_total_size(4) /* RTA_OIF */
3036 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3037 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3038 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3039 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3040 + nla_total_size(1) /* RTA_PREF */
61adedf3 3041 + lwtunnel_get_encap_size(rt->dst.lwtstate);
339bf98f
TG
3042}
3043
191cd582
BH
3044static int rt6_fill_node(struct net *net,
3045 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 3046 struct in6_addr *dst, struct in6_addr *src,
15e47304 3047 int iif, int type, u32 portid, u32 seq,
7bc570c8 3048 int prefix, int nowait, unsigned int flags)
1da177e4 3049{
4b32b5ad 3050 u32 metrics[RTAX_MAX];
1da177e4 3051 struct rtmsg *rtm;
2d7202bf 3052 struct nlmsghdr *nlh;
e3703b3d 3053 long expires;
9e762a4a 3054 u32 table;
1da177e4
LT
3055
3056 if (prefix) { /* user wants prefix routes only */
3057 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3058 /* success since this is not a prefix route */
3059 return 1;
3060 }
3061 }
3062
15e47304 3063 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 3064 if (!nlh)
26932566 3065 return -EMSGSIZE;
2d7202bf
TG
3066
3067 rtm = nlmsg_data(nlh);
1da177e4
LT
3068 rtm->rtm_family = AF_INET6;
3069 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3070 rtm->rtm_src_len = rt->rt6i_src.plen;
3071 rtm->rtm_tos = 0;
c71099ac 3072 if (rt->rt6i_table)
9e762a4a 3073 table = rt->rt6i_table->tb6_id;
c71099ac 3074 else
9e762a4a
PM
3075 table = RT6_TABLE_UNSPEC;
3076 rtm->rtm_table = table;
c78679e8
DM
3077 if (nla_put_u32(skb, RTA_TABLE, table))
3078 goto nla_put_failure;
ef2c7d7b
ND
3079 if (rt->rt6i_flags & RTF_REJECT) {
3080 switch (rt->dst.error) {
3081 case -EINVAL:
3082 rtm->rtm_type = RTN_BLACKHOLE;
3083 break;
3084 case -EACCES:
3085 rtm->rtm_type = RTN_PROHIBIT;
3086 break;
b4949ab2
ND
3087 case -EAGAIN:
3088 rtm->rtm_type = RTN_THROW;
3089 break;
ef2c7d7b
ND
3090 default:
3091 rtm->rtm_type = RTN_UNREACHABLE;
3092 break;
3093 }
3094 }
38308473 3095 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 3096 rtm->rtm_type = RTN_LOCAL;
d1918542 3097 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
3098 rtm->rtm_type = RTN_LOCAL;
3099 else
3100 rtm->rtm_type = RTN_UNICAST;
3101 rtm->rtm_flags = 0;
35103d11 3102 if (!netif_carrier_ok(rt->dst.dev)) {
cea45e20 3103 rtm->rtm_flags |= RTNH_F_LINKDOWN;
35103d11
AG
3104 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3105 rtm->rtm_flags |= RTNH_F_DEAD;
3106 }
1da177e4
LT
3107 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3108 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 3109 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 3110 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
3111 else if (rt->rt6i_flags & RTF_ADDRCONF) {
3112 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3113 rtm->rtm_protocol = RTPROT_RA;
3114 else
3115 rtm->rtm_protocol = RTPROT_KERNEL;
3116 }
1da177e4 3117
38308473 3118 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
3119 rtm->rtm_flags |= RTM_F_CLONED;
3120
3121 if (dst) {
930345ea 3122 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 3123 goto nla_put_failure;
1ab1457c 3124 rtm->rtm_dst_len = 128;
1da177e4 3125 } else if (rtm->rtm_dst_len)
930345ea 3126 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 3127 goto nla_put_failure;
1da177e4
LT
3128#ifdef CONFIG_IPV6_SUBTREES
3129 if (src) {
930345ea 3130 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 3131 goto nla_put_failure;
1ab1457c 3132 rtm->rtm_src_len = 128;
c78679e8 3133 } else if (rtm->rtm_src_len &&
930345ea 3134 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 3135 goto nla_put_failure;
1da177e4 3136#endif
7bc570c8
YH
3137 if (iif) {
3138#ifdef CONFIG_IPV6_MROUTE
3139 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 3140 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
3141 if (err <= 0) {
3142 if (!nowait) {
3143 if (err == 0)
3144 return 0;
3145 goto nla_put_failure;
3146 } else {
3147 if (err == -EMSGSIZE)
3148 goto nla_put_failure;
3149 }
3150 }
3151 } else
3152#endif
c78679e8
DM
3153 if (nla_put_u32(skb, RTA_IIF, iif))
3154 goto nla_put_failure;
7bc570c8 3155 } else if (dst) {
1da177e4 3156 struct in6_addr saddr_buf;
c78679e8 3157 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 3158 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3159 goto nla_put_failure;
1da177e4 3160 }
2d7202bf 3161
c3968a85
DW
3162 if (rt->rt6i_prefsrc.plen) {
3163 struct in6_addr saddr_buf;
4e3fd7a0 3164 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 3165 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3166 goto nla_put_failure;
c3968a85
DW
3167 }
3168
4b32b5ad
MKL
3169 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3170 if (rt->rt6i_pmtu)
3171 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3172 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
3173 goto nla_put_failure;
3174
dd0cbf29 3175 if (rt->rt6i_flags & RTF_GATEWAY) {
930345ea 3176 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
94f826b8 3177 goto nla_put_failure;
94f826b8 3178 }
2d7202bf 3179
c78679e8
DM
3180 if (rt->dst.dev &&
3181 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3182 goto nla_put_failure;
3183 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3184 goto nla_put_failure;
8253947e
LW
3185
3186 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 3187
87a50699 3188 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 3189 goto nla_put_failure;
2d7202bf 3190
c78ba6d6
LR
3191 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3192 goto nla_put_failure;
3193
61adedf3 3194 lwtunnel_fill_encap(skb, rt->dst.lwtstate);
19e42e45 3195
053c095a
JB
3196 nlmsg_end(skb, nlh);
3197 return 0;
2d7202bf
TG
3198
3199nla_put_failure:
26932566
PM
3200 nlmsg_cancel(skb, nlh);
3201 return -EMSGSIZE;
1da177e4
LT
3202}
3203
1b43af54 3204int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
3205{
3206 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3207 int prefix;
3208
2d7202bf
TG
3209 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3210 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
3211 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3212 } else
3213 prefix = 0;
3214
191cd582
BH
3215 return rt6_fill_node(arg->net,
3216 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 3217 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 3218 prefix, 0, NLM_F_MULTI);
1da177e4
LT
3219}
3220
67ba4152 3221static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 3222{
3b1e0a65 3223 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
3224 struct nlattr *tb[RTA_MAX+1];
3225 struct rt6_info *rt;
1da177e4 3226 struct sk_buff *skb;
ab364a6f 3227 struct rtmsg *rtm;
4c9483b2 3228 struct flowi6 fl6;
72331bc0 3229 int err, iif = 0, oif = 0;
1da177e4 3230
ab364a6f
TG
3231 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3232 if (err < 0)
3233 goto errout;
1da177e4 3234
ab364a6f 3235 err = -EINVAL;
4c9483b2 3236 memset(&fl6, 0, sizeof(fl6));
1da177e4 3237
ab364a6f
TG
3238 if (tb[RTA_SRC]) {
3239 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3240 goto errout;
3241
4e3fd7a0 3242 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
3243 }
3244
3245 if (tb[RTA_DST]) {
3246 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3247 goto errout;
3248
4e3fd7a0 3249 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
3250 }
3251
3252 if (tb[RTA_IIF])
3253 iif = nla_get_u32(tb[RTA_IIF]);
3254
3255 if (tb[RTA_OIF])
72331bc0 3256 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 3257
2e47b291
LC
3258 if (tb[RTA_MARK])
3259 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3260
1da177e4
LT
3261 if (iif) {
3262 struct net_device *dev;
72331bc0
SL
3263 int flags = 0;
3264
5578689a 3265 dev = __dev_get_by_index(net, iif);
1da177e4
LT
3266 if (!dev) {
3267 err = -ENODEV;
ab364a6f 3268 goto errout;
1da177e4 3269 }
72331bc0
SL
3270
3271 fl6.flowi6_iif = iif;
3272
3273 if (!ipv6_addr_any(&fl6.saddr))
3274 flags |= RT6_LOOKUP_F_HAS_SADDR;
3275
3276 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3277 flags);
3278 } else {
3279 fl6.flowi6_oif = oif;
3280
3281 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
3282 }
3283
ab364a6f 3284 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 3285 if (!skb) {
94e187c0 3286 ip6_rt_put(rt);
ab364a6f
TG
3287 err = -ENOBUFS;
3288 goto errout;
3289 }
1da177e4 3290
ab364a6f
TG
3291 /* Reserve room for dummy headers, this skb can pass
3292 through good chunk of routing engine.
3293 */
459a98ed 3294 skb_reset_mac_header(skb);
ab364a6f 3295 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 3296
d8d1f30b 3297 skb_dst_set(skb, &rt->dst);
1da177e4 3298
4c9483b2 3299 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 3300 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 3301 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 3302 if (err < 0) {
ab364a6f
TG
3303 kfree_skb(skb);
3304 goto errout;
1da177e4
LT
3305 }
3306
15e47304 3307 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 3308errout:
1da177e4 3309 return err;
1da177e4
LT
3310}
3311
37a1d361
RP
3312void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3313 unsigned int nlm_flags)
1da177e4
LT
3314{
3315 struct sk_buff *skb;
5578689a 3316 struct net *net = info->nl_net;
528c4ceb
DL
3317 u32 seq;
3318 int err;
3319
3320 err = -ENOBUFS;
38308473 3321 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 3322
19e42e45 3323 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 3324 if (!skb)
21713ebc
TG
3325 goto errout;
3326
191cd582 3327 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
37a1d361 3328 event, info->portid, seq, 0, 0, nlm_flags);
26932566
PM
3329 if (err < 0) {
3330 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3331 WARN_ON(err == -EMSGSIZE);
3332 kfree_skb(skb);
3333 goto errout;
3334 }
15e47304 3335 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
3336 info->nlh, gfp_any());
3337 return;
21713ebc
TG
3338errout:
3339 if (err < 0)
5578689a 3340 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
3341}
3342
8ed67789 3343static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 3344 unsigned long event, void *ptr)
8ed67789 3345{
351638e7 3346 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 3347 struct net *net = dev_net(dev);
8ed67789
DL
3348
3349 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 3350 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
3351 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3352#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3353 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 3354 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 3355 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
3356 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3357#endif
3358 }
3359
3360 return NOTIFY_OK;
3361}
3362
1da177e4
LT
3363/*
3364 * /proc
3365 */
3366
3367#ifdef CONFIG_PROC_FS
3368
33120b30
AD
3369static const struct file_operations ipv6_route_proc_fops = {
3370 .owner = THIS_MODULE,
3371 .open = ipv6_route_open,
3372 .read = seq_read,
3373 .llseek = seq_lseek,
8d2ca1d7 3374 .release = seq_release_net,
33120b30
AD
3375};
3376
1da177e4
LT
3377static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3378{
69ddb805 3379 struct net *net = (struct net *)seq->private;
1da177e4 3380 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
3381 net->ipv6.rt6_stats->fib_nodes,
3382 net->ipv6.rt6_stats->fib_route_nodes,
3383 net->ipv6.rt6_stats->fib_rt_alloc,
3384 net->ipv6.rt6_stats->fib_rt_entries,
3385 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 3386 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 3387 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
3388
3389 return 0;
3390}
3391
3392static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3393{
de05c557 3394 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
3395}
3396
9a32144e 3397static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
3398 .owner = THIS_MODULE,
3399 .open = rt6_stats_seq_open,
3400 .read = seq_read,
3401 .llseek = seq_lseek,
b6fcbdb4 3402 .release = single_release_net,
1da177e4
LT
3403};
3404#endif /* CONFIG_PROC_FS */
3405
3406#ifdef CONFIG_SYSCTL
3407
1da177e4 3408static
fe2c6338 3409int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
3410 void __user *buffer, size_t *lenp, loff_t *ppos)
3411{
c486da34
LAG
3412 struct net *net;
3413 int delay;
3414 if (!write)
1da177e4 3415 return -EINVAL;
c486da34
LAG
3416
3417 net = (struct net *)ctl->extra1;
3418 delay = net->ipv6.sysctl.flush_delay;
3419 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3420 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3421 return 0;
1da177e4
LT
3422}
3423
fe2c6338 3424struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3425 {
1da177e4 3426 .procname = "flush",
4990509f 3427 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3428 .maxlen = sizeof(int),
89c8b3a1 3429 .mode = 0200,
6d9f239a 3430 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3431 },
3432 {
1da177e4 3433 .procname = "gc_thresh",
9a7ec3a9 3434 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3435 .maxlen = sizeof(int),
3436 .mode = 0644,
6d9f239a 3437 .proc_handler = proc_dointvec,
1da177e4
LT
3438 },
3439 {
1da177e4 3440 .procname = "max_size",
4990509f 3441 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3442 .maxlen = sizeof(int),
3443 .mode = 0644,
6d9f239a 3444 .proc_handler = proc_dointvec,
1da177e4
LT
3445 },
3446 {
1da177e4 3447 .procname = "gc_min_interval",
4990509f 3448 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3449 .maxlen = sizeof(int),
3450 .mode = 0644,
6d9f239a 3451 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3452 },
3453 {
1da177e4 3454 .procname = "gc_timeout",
4990509f 3455 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3456 .maxlen = sizeof(int),
3457 .mode = 0644,
6d9f239a 3458 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3459 },
3460 {
1da177e4 3461 .procname = "gc_interval",
4990509f 3462 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3463 .maxlen = sizeof(int),
3464 .mode = 0644,
6d9f239a 3465 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3466 },
3467 {
1da177e4 3468 .procname = "gc_elasticity",
4990509f 3469 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3470 .maxlen = sizeof(int),
3471 .mode = 0644,
f3d3f616 3472 .proc_handler = proc_dointvec,
1da177e4
LT
3473 },
3474 {
1da177e4 3475 .procname = "mtu_expires",
4990509f 3476 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3477 .maxlen = sizeof(int),
3478 .mode = 0644,
6d9f239a 3479 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3480 },
3481 {
1da177e4 3482 .procname = "min_adv_mss",
4990509f 3483 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3484 .maxlen = sizeof(int),
3485 .mode = 0644,
f3d3f616 3486 .proc_handler = proc_dointvec,
1da177e4
LT
3487 },
3488 {
1da177e4 3489 .procname = "gc_min_interval_ms",
4990509f 3490 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3491 .maxlen = sizeof(int),
3492 .mode = 0644,
6d9f239a 3493 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3494 },
f8572d8f 3495 { }
1da177e4
LT
3496};
3497
2c8c1e72 3498struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3499{
3500 struct ctl_table *table;
3501
3502 table = kmemdup(ipv6_route_table_template,
3503 sizeof(ipv6_route_table_template),
3504 GFP_KERNEL);
5ee09105
YH
3505
3506 if (table) {
3507 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3508 table[0].extra1 = net;
86393e52 3509 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3510 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3511 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3512 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3513 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3514 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3515 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3516 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3517 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3518
3519 /* Don't export sysctls to unprivileged users */
3520 if (net->user_ns != &init_user_ns)
3521 table[0].procname = NULL;
5ee09105
YH
3522 }
3523
760f2d01
DL
3524 return table;
3525}
1da177e4
LT
3526#endif
3527
2c8c1e72 3528static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3529{
633d424b 3530 int ret = -ENOMEM;
8ed67789 3531
86393e52
AD
3532 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3533 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3534
fc66f95c
ED
3535 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3536 goto out_ip6_dst_ops;
3537
8ed67789
DL
3538 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3539 sizeof(*net->ipv6.ip6_null_entry),
3540 GFP_KERNEL);
3541 if (!net->ipv6.ip6_null_entry)
fc66f95c 3542 goto out_ip6_dst_entries;
d8d1f30b 3543 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3544 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3545 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3546 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3547 ip6_template_metrics, true);
8ed67789
DL
3548
3549#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3550 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3551 sizeof(*net->ipv6.ip6_prohibit_entry),
3552 GFP_KERNEL);
68fffc67
PZ
3553 if (!net->ipv6.ip6_prohibit_entry)
3554 goto out_ip6_null_entry;
d8d1f30b 3555 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3556 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3557 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3558 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3559 ip6_template_metrics, true);
8ed67789
DL
3560
3561 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3562 sizeof(*net->ipv6.ip6_blk_hole_entry),
3563 GFP_KERNEL);
68fffc67
PZ
3564 if (!net->ipv6.ip6_blk_hole_entry)
3565 goto out_ip6_prohibit_entry;
d8d1f30b 3566 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3567 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3568 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3569 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3570 ip6_template_metrics, true);
8ed67789
DL
3571#endif
3572
b339a47c
PZ
3573 net->ipv6.sysctl.flush_delay = 0;
3574 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3575 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3576 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3577 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3578 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3579 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3580 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3581
6891a346
BT
3582 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3583
8ed67789
DL
3584 ret = 0;
3585out:
3586 return ret;
f2fc6a54 3587
68fffc67
PZ
3588#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3589out_ip6_prohibit_entry:
3590 kfree(net->ipv6.ip6_prohibit_entry);
3591out_ip6_null_entry:
3592 kfree(net->ipv6.ip6_null_entry);
3593#endif
fc66f95c
ED
3594out_ip6_dst_entries:
3595 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3596out_ip6_dst_ops:
f2fc6a54 3597 goto out;
cdb18761
DL
3598}
3599
2c8c1e72 3600static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3601{
8ed67789
DL
3602 kfree(net->ipv6.ip6_null_entry);
3603#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3604 kfree(net->ipv6.ip6_prohibit_entry);
3605 kfree(net->ipv6.ip6_blk_hole_entry);
3606#endif
41bb78b4 3607 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3608}
3609
d189634e
TG
3610static int __net_init ip6_route_net_init_late(struct net *net)
3611{
3612#ifdef CONFIG_PROC_FS
d4beaa66
G
3613 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3614 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
3615#endif
3616 return 0;
3617}
3618
3619static void __net_exit ip6_route_net_exit_late(struct net *net)
3620{
3621#ifdef CONFIG_PROC_FS
ece31ffd
G
3622 remove_proc_entry("ipv6_route", net->proc_net);
3623 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
3624#endif
3625}
3626
cdb18761
DL
3627static struct pernet_operations ip6_route_net_ops = {
3628 .init = ip6_route_net_init,
3629 .exit = ip6_route_net_exit,
3630};
3631
c3426b47
DM
3632static int __net_init ipv6_inetpeer_init(struct net *net)
3633{
3634 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3635
3636 if (!bp)
3637 return -ENOMEM;
3638 inet_peer_base_init(bp);
3639 net->ipv6.peers = bp;
3640 return 0;
3641}
3642
3643static void __net_exit ipv6_inetpeer_exit(struct net *net)
3644{
3645 struct inet_peer_base *bp = net->ipv6.peers;
3646
3647 net->ipv6.peers = NULL;
56a6b248 3648 inetpeer_invalidate_tree(bp);
c3426b47
DM
3649 kfree(bp);
3650}
3651
2b823f72 3652static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3653 .init = ipv6_inetpeer_init,
3654 .exit = ipv6_inetpeer_exit,
3655};
3656
d189634e
TG
3657static struct pernet_operations ip6_route_net_late_ops = {
3658 .init = ip6_route_net_init_late,
3659 .exit = ip6_route_net_exit_late,
3660};
3661
8ed67789
DL
3662static struct notifier_block ip6_route_dev_notifier = {
3663 .notifier_call = ip6_route_dev_notify,
3664 .priority = 0,
3665};
3666
433d49c3 3667int __init ip6_route_init(void)
1da177e4 3668{
433d49c3 3669 int ret;
8d0b94af 3670 int cpu;
433d49c3 3671
9a7ec3a9
DL
3672 ret = -ENOMEM;
3673 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3674 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3675 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3676 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3677 goto out;
14e50e57 3678
fc66f95c 3679 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3680 if (ret)
bdb3289f 3681 goto out_kmem_cache;
bdb3289f 3682
c3426b47
DM
3683 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3684 if (ret)
e8803b6c 3685 goto out_dst_entries;
2a0c451a 3686
7e52b33b
DM
3687 ret = register_pernet_subsys(&ip6_route_net_ops);
3688 if (ret)
3689 goto out_register_inetpeer;
c3426b47 3690
5dc121e9
AE
3691 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3692
8ed67789
DL
3693 /* Registering of the loopback is done before this portion of code,
3694 * the loopback reference in rt6_info will not be taken, do it
3695 * manually for init_net */
d8d1f30b 3696 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3697 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3698 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3699 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3700 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3701 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3702 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3703 #endif
e8803b6c 3704 ret = fib6_init();
433d49c3 3705 if (ret)
8ed67789 3706 goto out_register_subsys;
433d49c3 3707
433d49c3
DL
3708 ret = xfrm6_init();
3709 if (ret)
e8803b6c 3710 goto out_fib6_init;
c35b7e72 3711
433d49c3
DL
3712 ret = fib6_rules_init();
3713 if (ret)
3714 goto xfrm6_init;
7e5449c2 3715
d189634e
TG
3716 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3717 if (ret)
3718 goto fib6_rules_init;
3719
433d49c3 3720 ret = -ENOBUFS;
c7ac8679
GR
3721 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3722 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3723 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3724 goto out_register_late_subsys;
c127ea2c 3725
8ed67789 3726 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3727 if (ret)
d189634e 3728 goto out_register_late_subsys;
8ed67789 3729
8d0b94af
MKL
3730 for_each_possible_cpu(cpu) {
3731 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3732
3733 INIT_LIST_HEAD(&ul->head);
3734 spin_lock_init(&ul->lock);
3735 }
3736
433d49c3
DL
3737out:
3738 return ret;
3739
d189634e
TG
3740out_register_late_subsys:
3741 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3742fib6_rules_init:
433d49c3
DL
3743 fib6_rules_cleanup();
3744xfrm6_init:
433d49c3 3745 xfrm6_fini();
2a0c451a
TG
3746out_fib6_init:
3747 fib6_gc_cleanup();
8ed67789
DL
3748out_register_subsys:
3749 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3750out_register_inetpeer:
3751 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3752out_dst_entries:
3753 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3754out_kmem_cache:
f2fc6a54 3755 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3756 goto out;
1da177e4
LT
3757}
3758
3759void ip6_route_cleanup(void)
3760{
8ed67789 3761 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3762 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3763 fib6_rules_cleanup();
1da177e4 3764 xfrm6_fini();
1da177e4 3765 fib6_gc_cleanup();
c3426b47 3766 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3767 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3768 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3769 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3770}