net: Allow userns root to control ipv4
[linux-2.6-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
51ebd318 60#include <net/nexthop.h>
1da177e4
LT
61
62#include <asm/uaccess.h>
63
64#ifdef CONFIG_SYSCTL
65#include <linux/sysctl.h>
66#endif
67
1716a961 68static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 69 const struct in6_addr *dest);
1da177e4 70static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 71static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 72static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
73static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74static void ip6_dst_destroy(struct dst_entry *);
75static void ip6_dst_ifdown(struct dst_entry *,
76 struct net_device *dev, int how);
569d3645 77static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
78
79static int ip6_pkt_discard(struct sk_buff *skb);
80static int ip6_pkt_discard_out(struct sk_buff *skb);
81static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
82static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83 struct sk_buff *skb, u32 mtu);
84static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85 struct sk_buff *skb);
1da177e4 86
70ceb4f5 87#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 88static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex,
95c96174 91 unsigned int pref);
efa2cea0 92static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
93 const struct in6_addr *prefix, int prefixlen,
94 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
95#endif
96
06582540
DM
97static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98{
99 struct rt6_info *rt = (struct rt6_info *) dst;
100 struct inet_peer *peer;
101 u32 *p = NULL;
102
8e2ec639
YZ
103 if (!(rt->dst.flags & DST_HOST))
104 return NULL;
105
fbfe95a4 106 peer = rt6_get_peer_create(rt);
06582540
DM
107 if (peer) {
108 u32 *old_p = __DST_METRICS_PTR(old);
109 unsigned long prev, new;
110
111 p = peer->metrics;
112 if (inet_metrics_new(peer))
113 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114
115 new = (unsigned long) p;
116 prev = cmpxchg(&dst->_metrics, old, new);
117
118 if (prev != old) {
119 p = __DST_METRICS_PTR(prev);
120 if (prev & DST_METRICS_READ_ONLY)
121 p = NULL;
122 }
123 }
124 return p;
125}
126
f894cbf8
DM
127static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128 struct sk_buff *skb,
129 const void *daddr)
39232973
DM
130{
131 struct in6_addr *p = &rt->rt6i_gateway;
132
a7563f34 133 if (!ipv6_addr_any(p))
39232973 134 return (const void *) p;
f894cbf8
DM
135 else if (skb)
136 return &ipv6_hdr(skb)->daddr;
39232973
DM
137 return daddr;
138}
139
f894cbf8
DM
140static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141 struct sk_buff *skb,
142 const void *daddr)
d3aaeb38 143{
39232973
DM
144 struct rt6_info *rt = (struct rt6_info *) dst;
145 struct neighbour *n;
146
f894cbf8 147 daddr = choose_neigh_daddr(rt, skb, daddr);
39232973 148 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
149 if (n)
150 return n;
151 return neigh_create(&nd_tbl, daddr, dst->dev);
152}
153
8ade06c6 154static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 155{
8ade06c6
DM
156 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
157 if (!n) {
158 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
159 if (IS_ERR(n))
160 return PTR_ERR(n);
161 }
97cac082 162 rt->n = n;
f83c7790
DM
163
164 return 0;
d3aaeb38
DM
165}
166
9a7ec3a9 167static struct dst_ops ip6_dst_ops_template = {
1da177e4 168 .family = AF_INET6,
09640e63 169 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
170 .gc = ip6_dst_gc,
171 .gc_thresh = 1024,
172 .check = ip6_dst_check,
0dbaee3b 173 .default_advmss = ip6_default_advmss,
ebb762f2 174 .mtu = ip6_mtu,
06582540 175 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
176 .destroy = ip6_dst_destroy,
177 .ifdown = ip6_dst_ifdown,
178 .negative_advice = ip6_negative_advice,
179 .link_failure = ip6_link_failure,
180 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 181 .redirect = rt6_do_redirect,
1ac06e03 182 .local_out = __ip6_local_out,
d3aaeb38 183 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
184};
185
ebb762f2 186static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 187{
618f9bc7
SK
188 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
189
190 return mtu ? : dst->dev->mtu;
ec831ea7
RD
191}
192
6700c270
DM
193static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
194 struct sk_buff *skb, u32 mtu)
14e50e57
DM
195{
196}
197
6700c270
DM
198static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
199 struct sk_buff *skb)
b587ee3b
DM
200{
201}
202
0972ddb2
HB
203static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
204 unsigned long old)
205{
206 return NULL;
207}
208
14e50e57
DM
209static struct dst_ops ip6_dst_blackhole_ops = {
210 .family = AF_INET6,
09640e63 211 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
212 .destroy = ip6_dst_destroy,
213 .check = ip6_dst_check,
ebb762f2 214 .mtu = ip6_blackhole_mtu,
214f45c9 215 .default_advmss = ip6_default_advmss,
14e50e57 216 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 217 .redirect = ip6_rt_blackhole_redirect,
0972ddb2 218 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 219 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
220};
221
62fa8a84 222static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 223 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
224};
225
fb0af4c7 226static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
227 .dst = {
228 .__refcnt = ATOMIC_INIT(1),
229 .__use = 1,
2c20cbd7 230 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 231 .error = -ENETUNREACH,
d8d1f30b
CG
232 .input = ip6_pkt_discard,
233 .output = ip6_pkt_discard_out,
1da177e4
LT
234 },
235 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 236 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
237 .rt6i_metric = ~(u32) 0,
238 .rt6i_ref = ATOMIC_INIT(1),
239};
240
101367c2
TG
241#ifdef CONFIG_IPV6_MULTIPLE_TABLES
242
6723ab54
DM
243static int ip6_pkt_prohibit(struct sk_buff *skb);
244static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 245
fb0af4c7 246static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
247 .dst = {
248 .__refcnt = ATOMIC_INIT(1),
249 .__use = 1,
2c20cbd7 250 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 251 .error = -EACCES,
d8d1f30b
CG
252 .input = ip6_pkt_prohibit,
253 .output = ip6_pkt_prohibit_out,
101367c2
TG
254 },
255 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 256 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
257 .rt6i_metric = ~(u32) 0,
258 .rt6i_ref = ATOMIC_INIT(1),
259};
260
fb0af4c7 261static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
262 .dst = {
263 .__refcnt = ATOMIC_INIT(1),
264 .__use = 1,
2c20cbd7 265 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 266 .error = -EINVAL,
d8d1f30b
CG
267 .input = dst_discard,
268 .output = dst_discard,
101367c2
TG
269 },
270 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 271 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
272 .rt6i_metric = ~(u32) 0,
273 .rt6i_ref = ATOMIC_INIT(1),
274};
275
276#endif
277
1da177e4 278/* allocate dst with ip6_dst_ops */
97bab73f 279static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 280 struct net_device *dev,
8b96d22d
DM
281 int flags,
282 struct fib6_table *table)
1da177e4 283{
97bab73f 284 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 285 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 286
97bab73f 287 if (rt) {
8104891b
SK
288 struct dst_entry *dst = &rt->dst;
289
290 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
8b96d22d 291 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
6f3118b5 292 rt->rt6i_genid = rt_genid(net);
51ebd318
ND
293 INIT_LIST_HEAD(&rt->rt6i_siblings);
294 rt->rt6i_nsiblings = 0;
97bab73f 295 }
cf911662 296 return rt;
1da177e4
LT
297}
298
299static void ip6_dst_destroy(struct dst_entry *dst)
300{
301 struct rt6_info *rt = (struct rt6_info *)dst;
302 struct inet6_dev *idev = rt->rt6i_idev;
303
97cac082
DM
304 if (rt->n)
305 neigh_release(rt->n);
306
8e2ec639
YZ
307 if (!(rt->dst.flags & DST_HOST))
308 dst_destroy_metrics_generic(dst);
309
38308473 310 if (idev) {
1da177e4
LT
311 rt->rt6i_idev = NULL;
312 in6_dev_put(idev);
1ab1457c 313 }
1716a961
G
314
315 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
316 dst_release(dst->from);
317
97bab73f
DM
318 if (rt6_has_peer(rt)) {
319 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
320 inet_putpeer(peer);
321 }
322}
323
324void rt6_bind_peer(struct rt6_info *rt, int create)
325{
97bab73f 326 struct inet_peer_base *base;
b3419363
DM
327 struct inet_peer *peer;
328
97bab73f
DM
329 base = inetpeer_base_ptr(rt->_rt6i_peer);
330 if (!base)
331 return;
332
333 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
7b34ca2a
DM
334 if (peer) {
335 if (!rt6_set_peer(rt, peer))
336 inet_putpeer(peer);
7b34ca2a 337 }
1da177e4
LT
338}
339
340static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
341 int how)
342{
343 struct rt6_info *rt = (struct rt6_info *)dst;
344 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 345 struct net_device *loopback_dev =
c346dca1 346 dev_net(dev)->loopback_dev;
1da177e4 347
97cac082
DM
348 if (dev != loopback_dev) {
349 if (idev && idev->dev == dev) {
350 struct inet6_dev *loopback_idev =
351 in6_dev_get(loopback_dev);
352 if (loopback_idev) {
353 rt->rt6i_idev = loopback_idev;
354 in6_dev_put(idev);
355 }
356 }
357 if (rt->n && rt->n->dev == dev) {
358 rt->n->dev = loopback_dev;
359 dev_hold(loopback_dev);
360 dev_put(dev);
1da177e4
LT
361 }
362 }
363}
364
a50feda5 365static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 366{
1716a961
G
367 if (rt->rt6i_flags & RTF_EXPIRES) {
368 if (time_after(jiffies, rt->dst.expires))
a50feda5 369 return true;
1716a961 370 } else if (rt->dst.from) {
3fd91fb3 371 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 372 }
a50feda5 373 return false;
1da177e4
LT
374}
375
a50feda5 376static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 377{
a02cec21
ED
378 return ipv6_addr_type(daddr) &
379 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
380}
381
51ebd318
ND
382/* Multipath route selection:
383 * Hash based function using packet header and flowlabel.
384 * Adapted from fib_info_hashfn()
385 */
386static int rt6_info_hash_nhsfn(unsigned int candidate_count,
387 const struct flowi6 *fl6)
388{
389 unsigned int val = fl6->flowi6_proto;
390
b3ce5ae1
ND
391 val ^= (__force u32)fl6->daddr.s6_addr32[0];
392 val ^= (__force u32)fl6->daddr.s6_addr32[1];
393 val ^= (__force u32)fl6->daddr.s6_addr32[2];
394 val ^= (__force u32)fl6->daddr.s6_addr32[3];
51ebd318 395
b3ce5ae1
ND
396 val ^= (__force u32)fl6->saddr.s6_addr32[0];
397 val ^= (__force u32)fl6->saddr.s6_addr32[1];
398 val ^= (__force u32)fl6->saddr.s6_addr32[2];
399 val ^= (__force u32)fl6->saddr.s6_addr32[3];
51ebd318
ND
400
401 /* Work only if this not encapsulated */
402 switch (fl6->flowi6_proto) {
403 case IPPROTO_UDP:
404 case IPPROTO_TCP:
405 case IPPROTO_SCTP:
b3ce5ae1
ND
406 val ^= (__force u16)fl6->fl6_sport;
407 val ^= (__force u16)fl6->fl6_dport;
51ebd318
ND
408 break;
409
410 case IPPROTO_ICMPV6:
b3ce5ae1
ND
411 val ^= (__force u16)fl6->fl6_icmp_type;
412 val ^= (__force u16)fl6->fl6_icmp_code;
51ebd318
ND
413 break;
414 }
415 /* RFC6438 recommands to use flowlabel */
b3ce5ae1 416 val ^= (__force u32)fl6->flowlabel;
51ebd318
ND
417
418 /* Perhaps, we need to tune, this function? */
419 val = val ^ (val >> 7) ^ (val >> 12);
420 return val % candidate_count;
421}
422
423static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
424 struct flowi6 *fl6)
425{
426 struct rt6_info *sibling, *next_sibling;
427 int route_choosen;
428
429 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
430 /* Don't change the route, if route_choosen == 0
431 * (siblings does not include ourself)
432 */
433 if (route_choosen)
434 list_for_each_entry_safe(sibling, next_sibling,
435 &match->rt6i_siblings, rt6i_siblings) {
436 route_choosen--;
437 if (route_choosen == 0) {
438 match = sibling;
439 break;
440 }
441 }
442 return match;
443}
444
1da177e4 445/*
c71099ac 446 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
447 */
448
8ed67789
DL
449static inline struct rt6_info *rt6_device_match(struct net *net,
450 struct rt6_info *rt,
b71d1d42 451 const struct in6_addr *saddr,
1da177e4 452 int oif,
d420895e 453 int flags)
1da177e4
LT
454{
455 struct rt6_info *local = NULL;
456 struct rt6_info *sprt;
457
dd3abc4e
YH
458 if (!oif && ipv6_addr_any(saddr))
459 goto out;
460
d8d1f30b 461 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 462 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
463
464 if (oif) {
1da177e4
LT
465 if (dev->ifindex == oif)
466 return sprt;
467 if (dev->flags & IFF_LOOPBACK) {
38308473 468 if (!sprt->rt6i_idev ||
1da177e4 469 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 470 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 471 continue;
1ab1457c 472 if (local && (!oif ||
1da177e4
LT
473 local->rt6i_idev->dev->ifindex == oif))
474 continue;
475 }
476 local = sprt;
477 }
dd3abc4e
YH
478 } else {
479 if (ipv6_chk_addr(net, saddr, dev,
480 flags & RT6_LOOKUP_F_IFACE))
481 return sprt;
1da177e4 482 }
dd3abc4e 483 }
1da177e4 484
dd3abc4e 485 if (oif) {
1da177e4
LT
486 if (local)
487 return local;
488
d420895e 489 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 490 return net->ipv6.ip6_null_entry;
1da177e4 491 }
dd3abc4e 492out:
1da177e4
LT
493 return rt;
494}
495
27097255
YH
496#ifdef CONFIG_IPV6_ROUTER_PREF
497static void rt6_probe(struct rt6_info *rt)
498{
f2c31e32 499 struct neighbour *neigh;
27097255
YH
500 /*
501 * Okay, this does not seem to be appropriate
502 * for now, however, we need to check if it
503 * is really so; aka Router Reachability Probing.
504 *
505 * Router Reachability Probe MUST be rate-limited
506 * to no more than one per minute.
507 */
97cac082 508 neigh = rt ? rt->n : NULL;
27097255 509 if (!neigh || (neigh->nud_state & NUD_VALID))
fdd6681d 510 return;
27097255
YH
511 read_lock_bh(&neigh->lock);
512 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 513 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
514 struct in6_addr mcaddr;
515 struct in6_addr *target;
516
517 neigh->updated = jiffies;
518 read_unlock_bh(&neigh->lock);
519
520 target = (struct in6_addr *)&neigh->primary_key;
521 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 522 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 523 } else {
27097255 524 read_unlock_bh(&neigh->lock);
f2c31e32 525 }
27097255
YH
526}
527#else
528static inline void rt6_probe(struct rt6_info *rt)
529{
27097255
YH
530}
531#endif
532
1da177e4 533/*
554cfb7e 534 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 535 */
b6f99a21 536static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 537{
d1918542 538 struct net_device *dev = rt->dst.dev;
161980f4 539 if (!oif || dev->ifindex == oif)
554cfb7e 540 return 2;
161980f4
DM
541 if ((dev->flags & IFF_LOOPBACK) &&
542 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
543 return 1;
544 return 0;
554cfb7e 545}
1da177e4 546
b6f99a21 547static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 548{
f2c31e32 549 struct neighbour *neigh;
398bcbeb 550 int m;
f2c31e32 551
97cac082 552 neigh = rt->n;
4d0c5911
YH
553 if (rt->rt6i_flags & RTF_NONEXTHOP ||
554 !(rt->rt6i_flags & RTF_GATEWAY))
555 m = 1;
556 else if (neigh) {
554cfb7e
YH
557 read_lock_bh(&neigh->lock);
558 if (neigh->nud_state & NUD_VALID)
4d0c5911 559 m = 2;
398bcbeb
YH
560#ifdef CONFIG_IPV6_ROUTER_PREF
561 else if (neigh->nud_state & NUD_FAILED)
562 m = 0;
563#endif
564 else
ea73ee23 565 m = 1;
554cfb7e 566 read_unlock_bh(&neigh->lock);
398bcbeb
YH
567 } else
568 m = 0;
554cfb7e 569 return m;
1da177e4
LT
570}
571
554cfb7e
YH
572static int rt6_score_route(struct rt6_info *rt, int oif,
573 int strict)
1da177e4 574{
4d0c5911 575 int m, n;
1ab1457c 576
4d0c5911 577 m = rt6_check_dev(rt, oif);
77d16f45 578 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 579 return -1;
ebacaaa0
YH
580#ifdef CONFIG_IPV6_ROUTER_PREF
581 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
582#endif
4d0c5911 583 n = rt6_check_neigh(rt);
557e92ef 584 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
585 return -1;
586 return m;
587}
588
f11e6659
DM
589static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
590 int *mpri, struct rt6_info *match)
554cfb7e 591{
f11e6659
DM
592 int m;
593
594 if (rt6_check_expired(rt))
595 goto out;
596
597 m = rt6_score_route(rt, oif, strict);
598 if (m < 0)
599 goto out;
600
601 if (m > *mpri) {
602 if (strict & RT6_LOOKUP_F_REACHABLE)
603 rt6_probe(match);
604 *mpri = m;
605 match = rt;
606 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
607 rt6_probe(rt);
608 }
609
610out:
611 return match;
612}
613
614static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
615 struct rt6_info *rr_head,
616 u32 metric, int oif, int strict)
617{
618 struct rt6_info *rt, *match;
554cfb7e 619 int mpri = -1;
1da177e4 620
f11e6659
DM
621 match = NULL;
622 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 623 rt = rt->dst.rt6_next)
f11e6659
DM
624 match = find_match(rt, oif, strict, &mpri, match);
625 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 626 rt = rt->dst.rt6_next)
f11e6659 627 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 628
f11e6659
DM
629 return match;
630}
1da177e4 631
f11e6659
DM
632static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
633{
634 struct rt6_info *match, *rt0;
8ed67789 635 struct net *net;
1da177e4 636
f11e6659
DM
637 rt0 = fn->rr_ptr;
638 if (!rt0)
639 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 640
f11e6659 641 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 642
554cfb7e 643 if (!match &&
f11e6659 644 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 645 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 646
554cfb7e 647 /* no entries matched; do round-robin */
f11e6659
DM
648 if (!next || next->rt6i_metric != rt0->rt6i_metric)
649 next = fn->leaf;
650
651 if (next != rt0)
652 fn->rr_ptr = next;
1da177e4 653 }
1da177e4 654
d1918542 655 net = dev_net(rt0->dst.dev);
a02cec21 656 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
657}
658
70ceb4f5
YH
659#ifdef CONFIG_IPV6_ROUTE_INFO
660int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 661 const struct in6_addr *gwaddr)
70ceb4f5 662{
c346dca1 663 struct net *net = dev_net(dev);
70ceb4f5
YH
664 struct route_info *rinfo = (struct route_info *) opt;
665 struct in6_addr prefix_buf, *prefix;
666 unsigned int pref;
4bed72e4 667 unsigned long lifetime;
70ceb4f5
YH
668 struct rt6_info *rt;
669
670 if (len < sizeof(struct route_info)) {
671 return -EINVAL;
672 }
673
674 /* Sanity check for prefix_len and length */
675 if (rinfo->length > 3) {
676 return -EINVAL;
677 } else if (rinfo->prefix_len > 128) {
678 return -EINVAL;
679 } else if (rinfo->prefix_len > 64) {
680 if (rinfo->length < 2) {
681 return -EINVAL;
682 }
683 } else if (rinfo->prefix_len > 0) {
684 if (rinfo->length < 1) {
685 return -EINVAL;
686 }
687 }
688
689 pref = rinfo->route_pref;
690 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 691 return -EINVAL;
70ceb4f5 692
4bed72e4 693 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
694
695 if (rinfo->length == 3)
696 prefix = (struct in6_addr *)rinfo->prefix;
697 else {
698 /* this function is safe */
699 ipv6_addr_prefix(&prefix_buf,
700 (struct in6_addr *)rinfo->prefix,
701 rinfo->prefix_len);
702 prefix = &prefix_buf;
703 }
704
efa2cea0
DL
705 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
706 dev->ifindex);
70ceb4f5
YH
707
708 if (rt && !lifetime) {
e0a1ad73 709 ip6_del_rt(rt);
70ceb4f5
YH
710 rt = NULL;
711 }
712
713 if (!rt && lifetime)
efa2cea0 714 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
715 pref);
716 else if (rt)
717 rt->rt6i_flags = RTF_ROUTEINFO |
718 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
719
720 if (rt) {
1716a961
G
721 if (!addrconf_finite_timeout(lifetime))
722 rt6_clean_expires(rt);
723 else
724 rt6_set_expires(rt, jiffies + HZ * lifetime);
725
94e187c0 726 ip6_rt_put(rt);
70ceb4f5
YH
727 }
728 return 0;
729}
730#endif
731
8ed67789 732#define BACKTRACK(__net, saddr) \
982f56f3 733do { \
8ed67789 734 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 735 struct fib6_node *pn; \
e0eda7bb 736 while (1) { \
982f56f3
YH
737 if (fn->fn_flags & RTN_TL_ROOT) \
738 goto out; \
739 pn = fn->parent; \
740 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 741 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
742 else \
743 fn = pn; \
744 if (fn->fn_flags & RTN_RTINFO) \
745 goto restart; \
c71099ac 746 } \
c71099ac 747 } \
38308473 748} while (0)
c71099ac 749
8ed67789
DL
750static struct rt6_info *ip6_pol_route_lookup(struct net *net,
751 struct fib6_table *table,
4c9483b2 752 struct flowi6 *fl6, int flags)
1da177e4
LT
753{
754 struct fib6_node *fn;
755 struct rt6_info *rt;
756
c71099ac 757 read_lock_bh(&table->tb6_lock);
4c9483b2 758 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
759restart:
760 rt = fn->leaf;
4c9483b2 761 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318
ND
762 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
763 rt = rt6_multipath_select(rt, fl6);
4c9483b2 764 BACKTRACK(net, &fl6->saddr);
c71099ac 765out:
d8d1f30b 766 dst_use(&rt->dst, jiffies);
c71099ac 767 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
768 return rt;
769
770}
771
ea6e574e
FW
772struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
773 int flags)
774{
775 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
776}
777EXPORT_SYMBOL_GPL(ip6_route_lookup);
778
9acd9f3a
YH
779struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
780 const struct in6_addr *saddr, int oif, int strict)
c71099ac 781{
4c9483b2
DM
782 struct flowi6 fl6 = {
783 .flowi6_oif = oif,
784 .daddr = *daddr,
c71099ac
TG
785 };
786 struct dst_entry *dst;
77d16f45 787 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 788
adaa70bb 789 if (saddr) {
4c9483b2 790 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
791 flags |= RT6_LOOKUP_F_HAS_SADDR;
792 }
793
4c9483b2 794 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
795 if (dst->error == 0)
796 return (struct rt6_info *) dst;
797
798 dst_release(dst);
799
1da177e4
LT
800 return NULL;
801}
802
7159039a
YH
803EXPORT_SYMBOL(rt6_lookup);
804
c71099ac 805/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
806 It takes new route entry, the addition fails by any reason the
807 route is freed. In any case, if caller does not hold it, it may
808 be destroyed.
809 */
810
86872cb5 811static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
812{
813 int err;
c71099ac 814 struct fib6_table *table;
1da177e4 815
c71099ac
TG
816 table = rt->rt6i_table;
817 write_lock_bh(&table->tb6_lock);
86872cb5 818 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 819 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
820
821 return err;
822}
823
40e22e8f
TG
824int ip6_ins_rt(struct rt6_info *rt)
825{
4d1169c1 826 struct nl_info info = {
d1918542 827 .nl_net = dev_net(rt->dst.dev),
4d1169c1 828 };
528c4ceb 829 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
830}
831
1716a961 832static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 833 const struct in6_addr *daddr,
b71d1d42 834 const struct in6_addr *saddr)
1da177e4 835{
1da177e4
LT
836 struct rt6_info *rt;
837
838 /*
839 * Clone the route.
840 */
841
21efcfa0 842 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
843
844 if (rt) {
14deae41
DM
845 int attempts = !in_softirq();
846
38308473 847 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 848 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 849 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 850 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 851 rt->rt6i_gateway = *daddr;
58c4fb86 852 }
1da177e4 853
1da177e4 854 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
855
856#ifdef CONFIG_IPV6_SUBTREES
857 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 858 rt->rt6i_src.addr = *saddr;
1da177e4
LT
859 rt->rt6i_src.plen = 128;
860 }
861#endif
862
14deae41 863 retry:
8ade06c6 864 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 865 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
866 int saved_rt_min_interval =
867 net->ipv6.sysctl.ip6_rt_gc_min_interval;
868 int saved_rt_elasticity =
869 net->ipv6.sysctl.ip6_rt_gc_elasticity;
870
871 if (attempts-- > 0) {
872 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
873 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
874
86393e52 875 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
876
877 net->ipv6.sysctl.ip6_rt_gc_elasticity =
878 saved_rt_elasticity;
879 net->ipv6.sysctl.ip6_rt_gc_min_interval =
880 saved_rt_min_interval;
881 goto retry;
882 }
883
f3213831 884 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 885 dst_free(&rt->dst);
14deae41
DM
886 return NULL;
887 }
95a9a5ba 888 }
1da177e4 889
95a9a5ba
YH
890 return rt;
891}
1da177e4 892
21efcfa0
ED
893static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
894 const struct in6_addr *daddr)
299d9939 895{
21efcfa0
ED
896 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
897
299d9939 898 if (rt) {
299d9939 899 rt->rt6i_flags |= RTF_CACHE;
97cac082 900 rt->n = neigh_clone(ort->n);
299d9939
YH
901 }
902 return rt;
903}
904
8ed67789 905static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 906 struct flowi6 *fl6, int flags)
1da177e4
LT
907{
908 struct fib6_node *fn;
519fbd87 909 struct rt6_info *rt, *nrt;
c71099ac 910 int strict = 0;
1da177e4 911 int attempts = 3;
519fbd87 912 int err;
53b7997f 913 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 914
77d16f45 915 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
916
917relookup:
c71099ac 918 read_lock_bh(&table->tb6_lock);
1da177e4 919
8238dd06 920restart_2:
4c9483b2 921 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
922
923restart:
4acad72d 924 rt = rt6_select(fn, oif, strict | reachable);
51ebd318
ND
925 if (rt->rt6i_nsiblings && oif == 0)
926 rt = rt6_multipath_select(rt, fl6);
4c9483b2 927 BACKTRACK(net, &fl6->saddr);
8ed67789 928 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 929 rt->rt6i_flags & RTF_CACHE)
1ddef044 930 goto out;
1da177e4 931
d8d1f30b 932 dst_hold(&rt->dst);
c71099ac 933 read_unlock_bh(&table->tb6_lock);
fb9de91e 934
97cac082 935 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 936 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 937 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 938 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
939 else
940 goto out2;
e40cf353 941
94e187c0 942 ip6_rt_put(rt);
8ed67789 943 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 944
d8d1f30b 945 dst_hold(&rt->dst);
519fbd87 946 if (nrt) {
40e22e8f 947 err = ip6_ins_rt(nrt);
519fbd87 948 if (!err)
1da177e4 949 goto out2;
1da177e4 950 }
1da177e4 951
519fbd87
YH
952 if (--attempts <= 0)
953 goto out2;
954
955 /*
c71099ac 956 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
957 * released someone could insert this route. Relookup.
958 */
94e187c0 959 ip6_rt_put(rt);
519fbd87
YH
960 goto relookup;
961
962out:
8238dd06
YH
963 if (reachable) {
964 reachable = 0;
965 goto restart_2;
966 }
d8d1f30b 967 dst_hold(&rt->dst);
c71099ac 968 read_unlock_bh(&table->tb6_lock);
1da177e4 969out2:
d8d1f30b
CG
970 rt->dst.lastuse = jiffies;
971 rt->dst.__use++;
c71099ac
TG
972
973 return rt;
1da177e4
LT
974}
975
8ed67789 976static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 977 struct flowi6 *fl6, int flags)
4acad72d 978{
4c9483b2 979 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
980}
981
72331bc0
SL
982static struct dst_entry *ip6_route_input_lookup(struct net *net,
983 struct net_device *dev,
984 struct flowi6 *fl6, int flags)
985{
986 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
987 flags |= RT6_LOOKUP_F_IFACE;
988
989 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
990}
991
c71099ac
TG
992void ip6_route_input(struct sk_buff *skb)
993{
b71d1d42 994 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 995 struct net *net = dev_net(skb->dev);
adaa70bb 996 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
997 struct flowi6 fl6 = {
998 .flowi6_iif = skb->dev->ifindex,
999 .daddr = iph->daddr,
1000 .saddr = iph->saddr,
38308473 1001 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
1002 .flowi6_mark = skb->mark,
1003 .flowi6_proto = iph->nexthdr,
c71099ac 1004 };
adaa70bb 1005
72331bc0 1006 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1007}
1008
8ed67789 1009static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1010 struct flowi6 *fl6, int flags)
1da177e4 1011{
4c9483b2 1012 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1013}
1014
9c7a4f9c 1015struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 1016 struct flowi6 *fl6)
c71099ac
TG
1017{
1018 int flags = 0;
1019
1fb9489b 1020 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1021
4c9483b2 1022 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 1023 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1024
4c9483b2 1025 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 1026 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1027 else if (sk)
1028 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1029
4c9483b2 1030 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
1031}
1032
7159039a 1033EXPORT_SYMBOL(ip6_route_output);
1da177e4 1034
2774c131 1035struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1036{
5c1e6aa3 1037 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1038 struct dst_entry *new = NULL;
1039
f5b0a874 1040 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1041 if (rt) {
d8d1f30b 1042 new = &rt->dst;
14e50e57 1043
8104891b
SK
1044 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1045 rt6_init_peer(rt, net->ipv6.peers);
1046
14e50e57 1047 new->__use = 1;
352e512c
HX
1048 new->input = dst_discard;
1049 new->output = dst_discard;
14e50e57 1050
21efcfa0
ED
1051 if (dst_metrics_read_only(&ort->dst))
1052 new->_metrics = ort->dst._metrics;
1053 else
1054 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1055 rt->rt6i_idev = ort->rt6i_idev;
1056 if (rt->rt6i_idev)
1057 in6_dev_hold(rt->rt6i_idev);
14e50e57 1058
4e3fd7a0 1059 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1060 rt->rt6i_flags = ort->rt6i_flags;
1061 rt6_clean_expires(rt);
14e50e57
DM
1062 rt->rt6i_metric = 0;
1063
1064 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1065#ifdef CONFIG_IPV6_SUBTREES
1066 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1067#endif
1068
1069 dst_free(new);
1070 }
1071
69ead7af
DM
1072 dst_release(dst_orig);
1073 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1074}
14e50e57 1075
1da177e4
LT
1076/*
1077 * Destination cache support functions
1078 */
1079
1080static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1081{
1082 struct rt6_info *rt;
1083
1084 rt = (struct rt6_info *) dst;
1085
6f3118b5
ND
1086 /* All IPV6 dsts are created with ->obsolete set to the value
1087 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1088 * into this function always.
1089 */
1090 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1091 return NULL;
1092
a4477c4d 1093 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1da177e4 1094 return dst;
a4477c4d 1095
1da177e4
LT
1096 return NULL;
1097}
1098
1099static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1100{
1101 struct rt6_info *rt = (struct rt6_info *) dst;
1102
1103 if (rt) {
54c1a859
YH
1104 if (rt->rt6i_flags & RTF_CACHE) {
1105 if (rt6_check_expired(rt)) {
1106 ip6_del_rt(rt);
1107 dst = NULL;
1108 }
1109 } else {
1da177e4 1110 dst_release(dst);
54c1a859
YH
1111 dst = NULL;
1112 }
1da177e4 1113 }
54c1a859 1114 return dst;
1da177e4
LT
1115}
1116
1117static void ip6_link_failure(struct sk_buff *skb)
1118{
1119 struct rt6_info *rt;
1120
3ffe533c 1121 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1122
adf30907 1123 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1124 if (rt) {
1716a961
G
1125 if (rt->rt6i_flags & RTF_CACHE)
1126 rt6_update_expires(rt, 0);
1127 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1128 rt->rt6i_node->fn_sernum = -1;
1129 }
1130}
1131
6700c270
DM
1132static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1133 struct sk_buff *skb, u32 mtu)
1da177e4
LT
1134{
1135 struct rt6_info *rt6 = (struct rt6_info*)dst;
1136
81aded24 1137 dst_confirm(dst);
1da177e4 1138 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
81aded24
DM
1139 struct net *net = dev_net(dst->dev);
1140
1da177e4
LT
1141 rt6->rt6i_flags |= RTF_MODIFIED;
1142 if (mtu < IPV6_MIN_MTU) {
defb3519 1143 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1144 mtu = IPV6_MIN_MTU;
defb3519
DM
1145 features |= RTAX_FEATURE_ALLFRAG;
1146 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1147 }
defb3519 1148 dst_metric_set(dst, RTAX_MTU, mtu);
81aded24 1149 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1150 }
1151}
1152
42ae66c8
DM
1153void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1154 int oif, u32 mark)
81aded24
DM
1155{
1156 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1157 struct dst_entry *dst;
1158 struct flowi6 fl6;
1159
1160 memset(&fl6, 0, sizeof(fl6));
1161 fl6.flowi6_oif = oif;
1162 fl6.flowi6_mark = mark;
3e12939a 1163 fl6.flowi6_flags = 0;
81aded24
DM
1164 fl6.daddr = iph->daddr;
1165 fl6.saddr = iph->saddr;
1166 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1167
1168 dst = ip6_route_output(net, NULL, &fl6);
1169 if (!dst->error)
6700c270 1170 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
81aded24
DM
1171 dst_release(dst);
1172}
1173EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1174
1175void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1176{
1177 ip6_update_pmtu(skb, sock_net(sk), mtu,
1178 sk->sk_bound_dev_if, sk->sk_mark);
1179}
1180EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1181
3a5ad2ee
DM
1182void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1183{
1184 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1185 struct dst_entry *dst;
1186 struct flowi6 fl6;
1187
1188 memset(&fl6, 0, sizeof(fl6));
1189 fl6.flowi6_oif = oif;
1190 fl6.flowi6_mark = mark;
1191 fl6.flowi6_flags = 0;
1192 fl6.daddr = iph->daddr;
1193 fl6.saddr = iph->saddr;
1194 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1195
1196 dst = ip6_route_output(net, NULL, &fl6);
1197 if (!dst->error)
6700c270 1198 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1199 dst_release(dst);
1200}
1201EXPORT_SYMBOL_GPL(ip6_redirect);
1202
1203void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1204{
1205 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1206}
1207EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1208
0dbaee3b 1209static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1210{
0dbaee3b
DM
1211 struct net_device *dev = dst->dev;
1212 unsigned int mtu = dst_mtu(dst);
1213 struct net *net = dev_net(dev);
1214
1da177e4
LT
1215 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1216
5578689a
DL
1217 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1218 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1219
1220 /*
1ab1457c
YH
1221 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1222 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1223 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1224 * rely only on pmtu discovery"
1225 */
1226 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1227 mtu = IPV6_MAXPLEN;
1228 return mtu;
1229}
1230
ebb762f2 1231static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1232{
d33e4553 1233 struct inet6_dev *idev;
618f9bc7
SK
1234 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1235
1236 if (mtu)
1237 return mtu;
1238
1239 mtu = IPV6_MIN_MTU;
d33e4553
DM
1240
1241 rcu_read_lock();
1242 idev = __in6_dev_get(dst->dev);
1243 if (idev)
1244 mtu = idev->cnf.mtu6;
1245 rcu_read_unlock();
1246
1247 return mtu;
1248}
1249
3b00944c
YH
1250static struct dst_entry *icmp6_dst_gc_list;
1251static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1252
3b00944c 1253struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1254 struct neighbour *neigh,
87a11578 1255 struct flowi6 *fl6)
1da177e4 1256{
87a11578 1257 struct dst_entry *dst;
1da177e4
LT
1258 struct rt6_info *rt;
1259 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1260 struct net *net = dev_net(dev);
1da177e4 1261
38308473 1262 if (unlikely(!idev))
122bdf67 1263 return ERR_PTR(-ENODEV);
1da177e4 1264
8b96d22d 1265 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1266 if (unlikely(!rt)) {
1da177e4 1267 in6_dev_put(idev);
87a11578 1268 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1269 goto out;
1270 }
1271
1da177e4
LT
1272 if (neigh)
1273 neigh_hold(neigh);
14deae41 1274 else {
f894cbf8 1275 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
b43faac6 1276 if (IS_ERR(neigh)) {
252c3d84 1277 in6_dev_put(idev);
b43faac6
DM
1278 dst_free(&rt->dst);
1279 return ERR_CAST(neigh);
1280 }
14deae41 1281 }
1da177e4 1282
8e2ec639
YZ
1283 rt->dst.flags |= DST_HOST;
1284 rt->dst.output = ip6_output;
97cac082 1285 rt->n = neigh;
d8d1f30b 1286 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1287 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1288 rt->rt6i_dst.plen = 128;
1289 rt->rt6i_idev = idev;
14edd87d 1290 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1291
3b00944c 1292 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1293 rt->dst.next = icmp6_dst_gc_list;
1294 icmp6_dst_gc_list = &rt->dst;
3b00944c 1295 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1296
5578689a 1297 fib6_force_start_gc(net);
1da177e4 1298
87a11578
DM
1299 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1300
1da177e4 1301out:
87a11578 1302 return dst;
1da177e4
LT
1303}
1304
3d0f24a7 1305int icmp6_dst_gc(void)
1da177e4 1306{
e9476e95 1307 struct dst_entry *dst, **pprev;
3d0f24a7 1308 int more = 0;
1da177e4 1309
3b00944c
YH
1310 spin_lock_bh(&icmp6_dst_lock);
1311 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1312
1da177e4
LT
1313 while ((dst = *pprev) != NULL) {
1314 if (!atomic_read(&dst->__refcnt)) {
1315 *pprev = dst->next;
1316 dst_free(dst);
1da177e4
LT
1317 } else {
1318 pprev = &dst->next;
3d0f24a7 1319 ++more;
1da177e4
LT
1320 }
1321 }
1322
3b00944c 1323 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1324
3d0f24a7 1325 return more;
1da177e4
LT
1326}
1327
1e493d19
DM
1328static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1329 void *arg)
1330{
1331 struct dst_entry *dst, **pprev;
1332
1333 spin_lock_bh(&icmp6_dst_lock);
1334 pprev = &icmp6_dst_gc_list;
1335 while ((dst = *pprev) != NULL) {
1336 struct rt6_info *rt = (struct rt6_info *) dst;
1337 if (func(rt, arg)) {
1338 *pprev = dst->next;
1339 dst_free(dst);
1340 } else {
1341 pprev = &dst->next;
1342 }
1343 }
1344 spin_unlock_bh(&icmp6_dst_lock);
1345}
1346
569d3645 1347static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1348{
1da177e4 1349 unsigned long now = jiffies;
86393e52 1350 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1351 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1352 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1353 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1354 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1355 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1356 int entries;
7019b78e 1357
fc66f95c 1358 entries = dst_entries_get_fast(ops);
7019b78e 1359 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1360 entries <= rt_max_size)
1da177e4
LT
1361 goto out;
1362
6891a346
BT
1363 net->ipv6.ip6_rt_gc_expire++;
1364 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1365 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1366 entries = dst_entries_get_slow(ops);
1367 if (entries < ops->gc_thresh)
7019b78e 1368 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1369out:
7019b78e 1370 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1371 return entries > rt_max_size;
1da177e4
LT
1372}
1373
6b75d090 1374int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1375{
5170ae82 1376 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1377 if (hoplimit == 0) {
6b75d090 1378 struct net_device *dev = dst->dev;
c68f24cc
ED
1379 struct inet6_dev *idev;
1380
1381 rcu_read_lock();
1382 idev = __in6_dev_get(dev);
1383 if (idev)
6b75d090 1384 hoplimit = idev->cnf.hop_limit;
c68f24cc 1385 else
53b7997f 1386 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1387 rcu_read_unlock();
1da177e4
LT
1388 }
1389 return hoplimit;
1390}
abbf46ae 1391EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1392
1393/*
1394 *
1395 */
1396
86872cb5 1397int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1398{
1399 int err;
5578689a 1400 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1401 struct rt6_info *rt = NULL;
1402 struct net_device *dev = NULL;
1403 struct inet6_dev *idev = NULL;
c71099ac 1404 struct fib6_table *table;
1da177e4
LT
1405 int addr_type;
1406
86872cb5 1407 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1408 return -EINVAL;
1409#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1410 if (cfg->fc_src_len)
1da177e4
LT
1411 return -EINVAL;
1412#endif
86872cb5 1413 if (cfg->fc_ifindex) {
1da177e4 1414 err = -ENODEV;
5578689a 1415 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1416 if (!dev)
1417 goto out;
1418 idev = in6_dev_get(dev);
1419 if (!idev)
1420 goto out;
1421 }
1422
86872cb5
TG
1423 if (cfg->fc_metric == 0)
1424 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1425
d71314b4 1426 err = -ENOBUFS;
38308473
DM
1427 if (cfg->fc_nlinfo.nlh &&
1428 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1429 table = fib6_get_table(net, cfg->fc_table);
38308473 1430 if (!table) {
f3213831 1431 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1432 table = fib6_new_table(net, cfg->fc_table);
1433 }
1434 } else {
1435 table = fib6_new_table(net, cfg->fc_table);
1436 }
38308473
DM
1437
1438 if (!table)
c71099ac 1439 goto out;
c71099ac 1440
8b96d22d 1441 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1da177e4 1442
38308473 1443 if (!rt) {
1da177e4
LT
1444 err = -ENOMEM;
1445 goto out;
1446 }
1447
1716a961
G
1448 if (cfg->fc_flags & RTF_EXPIRES)
1449 rt6_set_expires(rt, jiffies +
1450 clock_t_to_jiffies(cfg->fc_expires));
1451 else
1452 rt6_clean_expires(rt);
1da177e4 1453
86872cb5
TG
1454 if (cfg->fc_protocol == RTPROT_UNSPEC)
1455 cfg->fc_protocol = RTPROT_BOOT;
1456 rt->rt6i_protocol = cfg->fc_protocol;
1457
1458 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1459
1460 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1461 rt->dst.input = ip6_mc_input;
ab79ad14
1462 else if (cfg->fc_flags & RTF_LOCAL)
1463 rt->dst.input = ip6_input;
1da177e4 1464 else
d8d1f30b 1465 rt->dst.input = ip6_forward;
1da177e4 1466
d8d1f30b 1467 rt->dst.output = ip6_output;
1da177e4 1468
86872cb5
TG
1469 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1470 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1471 if (rt->rt6i_dst.plen == 128)
11d53b49 1472 rt->dst.flags |= DST_HOST;
1da177e4 1473
8e2ec639
YZ
1474 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1475 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1476 if (!metrics) {
1477 err = -ENOMEM;
1478 goto out;
1479 }
1480 dst_init_metrics(&rt->dst, metrics, 0);
1481 }
1da177e4 1482#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1483 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1484 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1485#endif
1486
86872cb5 1487 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1488
1489 /* We cannot add true routes via loopback here,
1490 they would result in kernel looping; promote them to reject routes
1491 */
86872cb5 1492 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1493 (dev && (dev->flags & IFF_LOOPBACK) &&
1494 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1495 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1496 /* hold loopback dev/idev if we haven't done so. */
5578689a 1497 if (dev != net->loopback_dev) {
1da177e4
LT
1498 if (dev) {
1499 dev_put(dev);
1500 in6_dev_put(idev);
1501 }
5578689a 1502 dev = net->loopback_dev;
1da177e4
LT
1503 dev_hold(dev);
1504 idev = in6_dev_get(dev);
1505 if (!idev) {
1506 err = -ENODEV;
1507 goto out;
1508 }
1509 }
d8d1f30b
CG
1510 rt->dst.output = ip6_pkt_discard_out;
1511 rt->dst.input = ip6_pkt_discard;
1da177e4 1512 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1513 switch (cfg->fc_type) {
1514 case RTN_BLACKHOLE:
1515 rt->dst.error = -EINVAL;
1516 break;
1517 case RTN_PROHIBIT:
1518 rt->dst.error = -EACCES;
1519 break;
b4949ab2
ND
1520 case RTN_THROW:
1521 rt->dst.error = -EAGAIN;
1522 break;
ef2c7d7b
ND
1523 default:
1524 rt->dst.error = -ENETUNREACH;
1525 break;
1526 }
1da177e4
LT
1527 goto install_route;
1528 }
1529
86872cb5 1530 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1531 const struct in6_addr *gw_addr;
1da177e4
LT
1532 int gwa_type;
1533
86872cb5 1534 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1535 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1536 gwa_type = ipv6_addr_type(gw_addr);
1537
1538 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1539 struct rt6_info *grt;
1540
1541 /* IPv6 strictly inhibits using not link-local
1542 addresses as nexthop address.
1543 Otherwise, router will not able to send redirects.
1544 It is very good, but in some (rare!) circumstances
1545 (SIT, PtP, NBMA NOARP links) it is handy to allow
1546 some exceptions. --ANK
1547 */
1548 err = -EINVAL;
38308473 1549 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1550 goto out;
1551
5578689a 1552 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1553
1554 err = -EHOSTUNREACH;
38308473 1555 if (!grt)
1da177e4
LT
1556 goto out;
1557 if (dev) {
d1918542 1558 if (dev != grt->dst.dev) {
94e187c0 1559 ip6_rt_put(grt);
1da177e4
LT
1560 goto out;
1561 }
1562 } else {
d1918542 1563 dev = grt->dst.dev;
1da177e4
LT
1564 idev = grt->rt6i_idev;
1565 dev_hold(dev);
1566 in6_dev_hold(grt->rt6i_idev);
1567 }
38308473 1568 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1569 err = 0;
94e187c0 1570 ip6_rt_put(grt);
1da177e4
LT
1571
1572 if (err)
1573 goto out;
1574 }
1575 err = -EINVAL;
38308473 1576 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1577 goto out;
1578 }
1579
1580 err = -ENODEV;
38308473 1581 if (!dev)
1da177e4
LT
1582 goto out;
1583
c3968a85
DW
1584 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1585 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1586 err = -EINVAL;
1587 goto out;
1588 }
4e3fd7a0 1589 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1590 rt->rt6i_prefsrc.plen = 128;
1591 } else
1592 rt->rt6i_prefsrc.plen = 0;
1593
86872cb5 1594 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1595 err = rt6_bind_neighbour(rt, dev);
f83c7790 1596 if (err)
1da177e4 1597 goto out;
1da177e4
LT
1598 }
1599
86872cb5 1600 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1601
1602install_route:
86872cb5
TG
1603 if (cfg->fc_mx) {
1604 struct nlattr *nla;
1605 int remaining;
1606
1607 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1608 int type = nla_type(nla);
86872cb5
TG
1609
1610 if (type) {
1611 if (type > RTAX_MAX) {
1da177e4
LT
1612 err = -EINVAL;
1613 goto out;
1614 }
86872cb5 1615
defb3519 1616 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1617 }
1da177e4
LT
1618 }
1619 }
1620
d8d1f30b 1621 rt->dst.dev = dev;
1da177e4 1622 rt->rt6i_idev = idev;
c71099ac 1623 rt->rt6i_table = table;
63152fc0 1624
c346dca1 1625 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1626
86872cb5 1627 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1628
1629out:
1630 if (dev)
1631 dev_put(dev);
1632 if (idev)
1633 in6_dev_put(idev);
1634 if (rt)
d8d1f30b 1635 dst_free(&rt->dst);
1da177e4
LT
1636 return err;
1637}
1638
86872cb5 1639static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1640{
1641 int err;
c71099ac 1642 struct fib6_table *table;
d1918542 1643 struct net *net = dev_net(rt->dst.dev);
1da177e4 1644
6825a26c
G
1645 if (rt == net->ipv6.ip6_null_entry) {
1646 err = -ENOENT;
1647 goto out;
1648 }
6c813a72 1649
c71099ac
TG
1650 table = rt->rt6i_table;
1651 write_lock_bh(&table->tb6_lock);
86872cb5 1652 err = fib6_del(rt, info);
c71099ac 1653 write_unlock_bh(&table->tb6_lock);
1da177e4 1654
6825a26c 1655out:
94e187c0 1656 ip6_rt_put(rt);
1da177e4
LT
1657 return err;
1658}
1659
e0a1ad73
TG
1660int ip6_del_rt(struct rt6_info *rt)
1661{
4d1169c1 1662 struct nl_info info = {
d1918542 1663 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1664 };
528c4ceb 1665 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1666}
1667
86872cb5 1668static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1669{
c71099ac 1670 struct fib6_table *table;
1da177e4
LT
1671 struct fib6_node *fn;
1672 struct rt6_info *rt;
1673 int err = -ESRCH;
1674
5578689a 1675 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1676 if (!table)
c71099ac
TG
1677 return err;
1678
1679 read_lock_bh(&table->tb6_lock);
1da177e4 1680
c71099ac 1681 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1682 &cfg->fc_dst, cfg->fc_dst_len,
1683 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1684
1da177e4 1685 if (fn) {
d8d1f30b 1686 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1687 if (cfg->fc_ifindex &&
d1918542
DM
1688 (!rt->dst.dev ||
1689 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1690 continue;
86872cb5
TG
1691 if (cfg->fc_flags & RTF_GATEWAY &&
1692 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1693 continue;
86872cb5 1694 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1695 continue;
d8d1f30b 1696 dst_hold(&rt->dst);
c71099ac 1697 read_unlock_bh(&table->tb6_lock);
1da177e4 1698
86872cb5 1699 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1700 }
1701 }
c71099ac 1702 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1703
1704 return err;
1705}
1706
6700c270 1707static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 1708{
e8599ff4 1709 struct net *net = dev_net(skb->dev);
a6279458 1710 struct netevent_redirect netevent;
e8599ff4
DM
1711 struct rt6_info *rt, *nrt = NULL;
1712 const struct in6_addr *target;
e8599ff4 1713 struct ndisc_options ndopts;
6e157b6a
DM
1714 const struct in6_addr *dest;
1715 struct neighbour *old_neigh;
e8599ff4
DM
1716 struct inet6_dev *in6_dev;
1717 struct neighbour *neigh;
1718 struct icmp6hdr *icmph;
6e157b6a
DM
1719 int optlen, on_link;
1720 u8 *lladdr;
e8599ff4
DM
1721
1722 optlen = skb->tail - skb->transport_header;
1723 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1724
1725 if (optlen < 0) {
6e157b6a 1726 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
1727 return;
1728 }
1729
1730 icmph = icmp6_hdr(skb);
1731 target = (const struct in6_addr *) (icmph + 1);
1732 dest = target + 1;
1733
1734 if (ipv6_addr_is_multicast(dest)) {
6e157b6a 1735 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
1736 return;
1737 }
1738
6e157b6a 1739 on_link = 0;
e8599ff4
DM
1740 if (ipv6_addr_equal(dest, target)) {
1741 on_link = 1;
1742 } else if (ipv6_addr_type(target) !=
1743 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 1744 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
1745 return;
1746 }
1747
1748 in6_dev = __in6_dev_get(skb->dev);
1749 if (!in6_dev)
1750 return;
1751 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1752 return;
1753
1754 /* RFC2461 8.1:
1755 * The IP source address of the Redirect MUST be the same as the current
1756 * first-hop router for the specified ICMP Destination Address.
1757 */
1758
1759 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1760 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1761 return;
1762 }
6e157b6a
DM
1763
1764 lladdr = NULL;
e8599ff4
DM
1765 if (ndopts.nd_opts_tgt_lladdr) {
1766 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1767 skb->dev);
1768 if (!lladdr) {
1769 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1770 return;
1771 }
1772 }
1773
6e157b6a
DM
1774 rt = (struct rt6_info *) dst;
1775 if (rt == net->ipv6.ip6_null_entry) {
1776 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 1777 return;
6e157b6a 1778 }
e8599ff4 1779
6e157b6a
DM
1780 /* Redirect received -> path was valid.
1781 * Look, redirects are sent only in response to data packets,
1782 * so that this nexthop apparently is reachable. --ANK
1783 */
1784 dst_confirm(&rt->dst);
a6279458 1785
6e157b6a
DM
1786 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1787 if (!neigh)
1788 return;
a6279458 1789
6e157b6a
DM
1790 /* Duplicate redirect: silently ignore. */
1791 old_neigh = rt->n;
1792 if (neigh == old_neigh)
a6279458 1793 goto out;
1da177e4 1794
1da177e4
LT
1795 /*
1796 * We have finally decided to accept it.
1797 */
1798
1ab1457c 1799 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1800 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1801 NEIGH_UPDATE_F_OVERRIDE|
1802 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1803 NEIGH_UPDATE_F_ISROUTER))
1804 );
1805
21efcfa0 1806 nrt = ip6_rt_copy(rt, dest);
38308473 1807 if (!nrt)
1da177e4
LT
1808 goto out;
1809
1810 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1811 if (on_link)
1812 nrt->rt6i_flags &= ~RTF_GATEWAY;
1813
4e3fd7a0 1814 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
97cac082 1815 nrt->n = neigh_clone(neigh);
1da177e4 1816
40e22e8f 1817 if (ip6_ins_rt(nrt))
1da177e4
LT
1818 goto out;
1819
d8d1f30b 1820 netevent.old = &rt->dst;
1d248b1c 1821 netevent.old_neigh = old_neigh;
d8d1f30b 1822 netevent.new = &nrt->dst;
1d248b1c
DM
1823 netevent.new_neigh = neigh;
1824 netevent.daddr = dest;
8d71740c
TT
1825 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1826
38308473 1827 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 1828 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 1829 ip6_del_rt(rt);
1da177e4
LT
1830 }
1831
1832out:
e8599ff4 1833 neigh_release(neigh);
6e157b6a
DM
1834}
1835
1da177e4
LT
1836/*
1837 * Misc support functions
1838 */
1839
1716a961 1840static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1841 const struct in6_addr *dest)
1da177e4 1842{
d1918542 1843 struct net *net = dev_net(ort->dst.dev);
8b96d22d
DM
1844 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1845 ort->rt6i_table);
1da177e4
LT
1846
1847 if (rt) {
d8d1f30b
CG
1848 rt->dst.input = ort->dst.input;
1849 rt->dst.output = ort->dst.output;
8e2ec639 1850 rt->dst.flags |= DST_HOST;
d8d1f30b 1851
4e3fd7a0 1852 rt->rt6i_dst.addr = *dest;
8e2ec639 1853 rt->rt6i_dst.plen = 128;
defb3519 1854 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1855 rt->dst.error = ort->dst.error;
1da177e4
LT
1856 rt->rt6i_idev = ort->rt6i_idev;
1857 if (rt->rt6i_idev)
1858 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1859 rt->dst.lastuse = jiffies;
1da177e4 1860
4e3fd7a0 1861 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1862 rt->rt6i_flags = ort->rt6i_flags;
1863 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1864 (RTF_DEFAULT | RTF_ADDRCONF))
1865 rt6_set_from(rt, ort);
1866 else
1867 rt6_clean_expires(rt);
1da177e4
LT
1868 rt->rt6i_metric = 0;
1869
1da177e4
LT
1870#ifdef CONFIG_IPV6_SUBTREES
1871 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1872#endif
0f6c6392 1873 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1874 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1875 }
1876 return rt;
1877}
1878
70ceb4f5 1879#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1880static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1881 const struct in6_addr *prefix, int prefixlen,
1882 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1883{
1884 struct fib6_node *fn;
1885 struct rt6_info *rt = NULL;
c71099ac
TG
1886 struct fib6_table *table;
1887
efa2cea0 1888 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1889 if (!table)
c71099ac 1890 return NULL;
70ceb4f5 1891
5744dd9b 1892 read_lock_bh(&table->tb6_lock);
c71099ac 1893 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1894 if (!fn)
1895 goto out;
1896
d8d1f30b 1897 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1898 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1899 continue;
1900 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1901 continue;
1902 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1903 continue;
d8d1f30b 1904 dst_hold(&rt->dst);
70ceb4f5
YH
1905 break;
1906 }
1907out:
5744dd9b 1908 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1909 return rt;
1910}
1911
efa2cea0 1912static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1913 const struct in6_addr *prefix, int prefixlen,
1914 const struct in6_addr *gwaddr, int ifindex,
95c96174 1915 unsigned int pref)
70ceb4f5 1916{
86872cb5
TG
1917 struct fib6_config cfg = {
1918 .fc_table = RT6_TABLE_INFO,
238fc7ea 1919 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1920 .fc_ifindex = ifindex,
1921 .fc_dst_len = prefixlen,
1922 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1923 RTF_UP | RTF_PREF(pref),
15e47304 1924 .fc_nlinfo.portid = 0,
efa2cea0
DL
1925 .fc_nlinfo.nlh = NULL,
1926 .fc_nlinfo.nl_net = net,
86872cb5
TG
1927 };
1928
4e3fd7a0
AD
1929 cfg.fc_dst = *prefix;
1930 cfg.fc_gateway = *gwaddr;
70ceb4f5 1931
e317da96
YH
1932 /* We should treat it as a default route if prefix length is 0. */
1933 if (!prefixlen)
86872cb5 1934 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1935
86872cb5 1936 ip6_route_add(&cfg);
70ceb4f5 1937
efa2cea0 1938 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1939}
1940#endif
1941
b71d1d42 1942struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1943{
1da177e4 1944 struct rt6_info *rt;
c71099ac 1945 struct fib6_table *table;
1da177e4 1946
c346dca1 1947 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1948 if (!table)
c71099ac 1949 return NULL;
1da177e4 1950
5744dd9b 1951 read_lock_bh(&table->tb6_lock);
d8d1f30b 1952 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1953 if (dev == rt->dst.dev &&
045927ff 1954 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1955 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1956 break;
1957 }
1958 if (rt)
d8d1f30b 1959 dst_hold(&rt->dst);
5744dd9b 1960 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1961 return rt;
1962}
1963
b71d1d42 1964struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1965 struct net_device *dev,
1966 unsigned int pref)
1da177e4 1967{
86872cb5
TG
1968 struct fib6_config cfg = {
1969 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1970 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1971 .fc_ifindex = dev->ifindex,
1972 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1973 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 1974 .fc_nlinfo.portid = 0,
5578689a 1975 .fc_nlinfo.nlh = NULL,
c346dca1 1976 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1977 };
1da177e4 1978
4e3fd7a0 1979 cfg.fc_gateway = *gwaddr;
1da177e4 1980
86872cb5 1981 ip6_route_add(&cfg);
1da177e4 1982
1da177e4
LT
1983 return rt6_get_dflt_router(gwaddr, dev);
1984}
1985
7b4da532 1986void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1987{
1988 struct rt6_info *rt;
c71099ac
TG
1989 struct fib6_table *table;
1990
1991 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1992 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1993 if (!table)
c71099ac 1994 return;
1da177e4
LT
1995
1996restart:
c71099ac 1997 read_lock_bh(&table->tb6_lock);
d8d1f30b 1998 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1999 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 2000 dst_hold(&rt->dst);
c71099ac 2001 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2002 ip6_del_rt(rt);
1da177e4
LT
2003 goto restart;
2004 }
2005 }
c71099ac 2006 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2007}
2008
5578689a
DL
2009static void rtmsg_to_fib6_config(struct net *net,
2010 struct in6_rtmsg *rtmsg,
86872cb5
TG
2011 struct fib6_config *cfg)
2012{
2013 memset(cfg, 0, sizeof(*cfg));
2014
2015 cfg->fc_table = RT6_TABLE_MAIN;
2016 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2017 cfg->fc_metric = rtmsg->rtmsg_metric;
2018 cfg->fc_expires = rtmsg->rtmsg_info;
2019 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2020 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2021 cfg->fc_flags = rtmsg->rtmsg_flags;
2022
5578689a 2023 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2024
4e3fd7a0
AD
2025 cfg->fc_dst = rtmsg->rtmsg_dst;
2026 cfg->fc_src = rtmsg->rtmsg_src;
2027 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2028}
2029
5578689a 2030int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2031{
86872cb5 2032 struct fib6_config cfg;
1da177e4
LT
2033 struct in6_rtmsg rtmsg;
2034 int err;
2035
2036 switch(cmd) {
2037 case SIOCADDRT: /* Add a route */
2038 case SIOCDELRT: /* Delete a route */
2039 if (!capable(CAP_NET_ADMIN))
2040 return -EPERM;
2041 err = copy_from_user(&rtmsg, arg,
2042 sizeof(struct in6_rtmsg));
2043 if (err)
2044 return -EFAULT;
86872cb5 2045
5578689a 2046 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2047
1da177e4
LT
2048 rtnl_lock();
2049 switch (cmd) {
2050 case SIOCADDRT:
86872cb5 2051 err = ip6_route_add(&cfg);
1da177e4
LT
2052 break;
2053 case SIOCDELRT:
86872cb5 2054 err = ip6_route_del(&cfg);
1da177e4
LT
2055 break;
2056 default:
2057 err = -EINVAL;
2058 }
2059 rtnl_unlock();
2060
2061 return err;
3ff50b79 2062 }
1da177e4
LT
2063
2064 return -EINVAL;
2065}
2066
2067/*
2068 * Drop the packet on the floor
2069 */
2070
d5fdd6ba 2071static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2072{
612f09e8 2073 int type;
adf30907 2074 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2075 switch (ipstats_mib_noroutes) {
2076 case IPSTATS_MIB_INNOROUTES:
0660e03f 2077 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2078 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2079 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2080 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2081 break;
2082 }
2083 /* FALLTHROUGH */
2084 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2085 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2086 ipstats_mib_noroutes);
612f09e8
YH
2087 break;
2088 }
3ffe533c 2089 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2090 kfree_skb(skb);
2091 return 0;
2092}
2093
9ce8ade0
TG
2094static int ip6_pkt_discard(struct sk_buff *skb)
2095{
612f09e8 2096 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2097}
2098
20380731 2099static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2100{
adf30907 2101 skb->dev = skb_dst(skb)->dev;
612f09e8 2102 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2103}
2104
6723ab54
DM
2105#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2106
9ce8ade0
TG
2107static int ip6_pkt_prohibit(struct sk_buff *skb)
2108{
612f09e8 2109 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2110}
2111
2112static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2113{
adf30907 2114 skb->dev = skb_dst(skb)->dev;
612f09e8 2115 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2116}
2117
6723ab54
DM
2118#endif
2119
1da177e4
LT
2120/*
2121 * Allocate a dst for local (unicast / anycast) address.
2122 */
2123
2124struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2125 const struct in6_addr *addr,
8f031519 2126 bool anycast)
1da177e4 2127{
c346dca1 2128 struct net *net = dev_net(idev->dev);
8b96d22d 2129 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
f83c7790 2130 int err;
1da177e4 2131
38308473 2132 if (!rt) {
f3213831 2133 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2134 return ERR_PTR(-ENOMEM);
40385653 2135 }
1da177e4 2136
1da177e4
LT
2137 in6_dev_hold(idev);
2138
11d53b49 2139 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2140 rt->dst.input = ip6_input;
2141 rt->dst.output = ip6_output;
1da177e4 2142 rt->rt6i_idev = idev;
1da177e4
LT
2143
2144 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2145 if (anycast)
2146 rt->rt6i_flags |= RTF_ANYCAST;
2147 else
1da177e4 2148 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2149 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2150 if (err) {
d8d1f30b 2151 dst_free(&rt->dst);
f83c7790 2152 return ERR_PTR(err);
1da177e4
LT
2153 }
2154
4e3fd7a0 2155 rt->rt6i_dst.addr = *addr;
1da177e4 2156 rt->rt6i_dst.plen = 128;
5578689a 2157 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2158
d8d1f30b 2159 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2160
2161 return rt;
2162}
2163
c3968a85
DW
2164int ip6_route_get_saddr(struct net *net,
2165 struct rt6_info *rt,
b71d1d42 2166 const struct in6_addr *daddr,
c3968a85
DW
2167 unsigned int prefs,
2168 struct in6_addr *saddr)
2169{
2170 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2171 int err = 0;
2172 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2173 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2174 else
2175 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2176 daddr, prefs, saddr);
2177 return err;
2178}
2179
2180/* remove deleted ip from prefsrc entries */
2181struct arg_dev_net_ip {
2182 struct net_device *dev;
2183 struct net *net;
2184 struct in6_addr *addr;
2185};
2186
2187static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2188{
2189 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2190 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2191 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2192
d1918542 2193 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2194 rt != net->ipv6.ip6_null_entry &&
2195 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2196 /* remove prefsrc entry */
2197 rt->rt6i_prefsrc.plen = 0;
2198 }
2199 return 0;
2200}
2201
2202void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2203{
2204 struct net *net = dev_net(ifp->idev->dev);
2205 struct arg_dev_net_ip adni = {
2206 .dev = ifp->idev->dev,
2207 .net = net,
2208 .addr = &ifp->addr,
2209 };
2210 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2211}
2212
8ed67789
DL
2213struct arg_dev_net {
2214 struct net_device *dev;
2215 struct net *net;
2216};
2217
1da177e4
LT
2218static int fib6_ifdown(struct rt6_info *rt, void *arg)
2219{
bc3ef660 2220 const struct arg_dev_net *adn = arg;
2221 const struct net_device *dev = adn->dev;
8ed67789 2222
d1918542 2223 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2224 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2225 return -1;
c159d30c 2226
1da177e4
LT
2227 return 0;
2228}
2229
f3db4851 2230void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2231{
8ed67789
DL
2232 struct arg_dev_net adn = {
2233 .dev = dev,
2234 .net = net,
2235 };
2236
2237 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2238 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2239}
2240
95c96174 2241struct rt6_mtu_change_arg {
1da177e4 2242 struct net_device *dev;
95c96174 2243 unsigned int mtu;
1da177e4
LT
2244};
2245
2246static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2247{
2248 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2249 struct inet6_dev *idev;
2250
2251 /* In IPv6 pmtu discovery is not optional,
2252 so that RTAX_MTU lock cannot disable it.
2253 We still use this lock to block changes
2254 caused by addrconf/ndisc.
2255 */
2256
2257 idev = __in6_dev_get(arg->dev);
38308473 2258 if (!idev)
1da177e4
LT
2259 return 0;
2260
2261 /* For administrative MTU increase, there is no way to discover
2262 IPv6 PMTU increase, so PMTU increase should be updated here.
2263 Since RFC 1981 doesn't include administrative MTU increase
2264 update PMTU increase is a MUST. (i.e. jumbo frame)
2265 */
2266 /*
2267 If new MTU is less than route PMTU, this new MTU will be the
2268 lowest MTU in the path, update the route PMTU to reflect PMTU
2269 decreases; if new MTU is greater than route PMTU, and the
2270 old MTU is the lowest MTU in the path, update the route PMTU
2271 to reflect the increase. In this case if the other nodes' MTU
2272 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2273 PMTU discouvery.
2274 */
d1918542 2275 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2276 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2277 (dst_mtu(&rt->dst) >= arg->mtu ||
2278 (dst_mtu(&rt->dst) < arg->mtu &&
2279 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2280 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2281 }
1da177e4
LT
2282 return 0;
2283}
2284
95c96174 2285void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2286{
c71099ac
TG
2287 struct rt6_mtu_change_arg arg = {
2288 .dev = dev,
2289 .mtu = mtu,
2290 };
1da177e4 2291
c346dca1 2292 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2293}
2294
ef7c79ed 2295static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2296 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2297 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2298 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2299 [RTA_PRIORITY] = { .type = NLA_U32 },
2300 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2301 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
86872cb5
TG
2302};
2303
2304static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2305 struct fib6_config *cfg)
1da177e4 2306{
86872cb5
TG
2307 struct rtmsg *rtm;
2308 struct nlattr *tb[RTA_MAX+1];
2309 int err;
1da177e4 2310
86872cb5
TG
2311 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2312 if (err < 0)
2313 goto errout;
1da177e4 2314
86872cb5
TG
2315 err = -EINVAL;
2316 rtm = nlmsg_data(nlh);
2317 memset(cfg, 0, sizeof(*cfg));
2318
2319 cfg->fc_table = rtm->rtm_table;
2320 cfg->fc_dst_len = rtm->rtm_dst_len;
2321 cfg->fc_src_len = rtm->rtm_src_len;
2322 cfg->fc_flags = RTF_UP;
2323 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2324 cfg->fc_type = rtm->rtm_type;
86872cb5 2325
ef2c7d7b
ND
2326 if (rtm->rtm_type == RTN_UNREACHABLE ||
2327 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2328 rtm->rtm_type == RTN_PROHIBIT ||
2329 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2330 cfg->fc_flags |= RTF_REJECT;
2331
ab79ad14
2332 if (rtm->rtm_type == RTN_LOCAL)
2333 cfg->fc_flags |= RTF_LOCAL;
2334
15e47304 2335 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2336 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2337 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2338
2339 if (tb[RTA_GATEWAY]) {
2340 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2341 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2342 }
86872cb5
TG
2343
2344 if (tb[RTA_DST]) {
2345 int plen = (rtm->rtm_dst_len + 7) >> 3;
2346
2347 if (nla_len(tb[RTA_DST]) < plen)
2348 goto errout;
2349
2350 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2351 }
86872cb5
TG
2352
2353 if (tb[RTA_SRC]) {
2354 int plen = (rtm->rtm_src_len + 7) >> 3;
2355
2356 if (nla_len(tb[RTA_SRC]) < plen)
2357 goto errout;
2358
2359 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2360 }
86872cb5 2361
c3968a85
DW
2362 if (tb[RTA_PREFSRC])
2363 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2364
86872cb5
TG
2365 if (tb[RTA_OIF])
2366 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2367
2368 if (tb[RTA_PRIORITY])
2369 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2370
2371 if (tb[RTA_METRICS]) {
2372 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2373 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2374 }
86872cb5
TG
2375
2376 if (tb[RTA_TABLE])
2377 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2378
51ebd318
ND
2379 if (tb[RTA_MULTIPATH]) {
2380 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2381 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2382 }
2383
86872cb5
TG
2384 err = 0;
2385errout:
2386 return err;
1da177e4
LT
2387}
2388
51ebd318
ND
2389static int ip6_route_multipath(struct fib6_config *cfg, int add)
2390{
2391 struct fib6_config r_cfg;
2392 struct rtnexthop *rtnh;
2393 int remaining;
2394 int attrlen;
2395 int err = 0, last_err = 0;
2396
2397beginning:
2398 rtnh = (struct rtnexthop *)cfg->fc_mp;
2399 remaining = cfg->fc_mp_len;
2400
2401 /* Parse a Multipath Entry */
2402 while (rtnh_ok(rtnh, remaining)) {
2403 memcpy(&r_cfg, cfg, sizeof(*cfg));
2404 if (rtnh->rtnh_ifindex)
2405 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2406
2407 attrlen = rtnh_attrlen(rtnh);
2408 if (attrlen > 0) {
2409 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2410
2411 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2412 if (nla) {
2413 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2414 r_cfg.fc_flags |= RTF_GATEWAY;
2415 }
2416 }
2417 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2418 if (err) {
2419 last_err = err;
2420 /* If we are trying to remove a route, do not stop the
2421 * loop when ip6_route_del() fails (because next hop is
2422 * already gone), we should try to remove all next hops.
2423 */
2424 if (add) {
2425 /* If add fails, we should try to delete all
2426 * next hops that have been already added.
2427 */
2428 add = 0;
2429 goto beginning;
2430 }
2431 }
1a72418b
ND
2432 /* Because each route is added like a single route we remove
2433 * this flag after the first nexthop (if there is a collision,
2434 * we have already fail to add the first nexthop:
2435 * fib6_add_rt2node() has reject it).
2436 */
2437 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
51ebd318
ND
2438 rtnh = rtnh_next(rtnh, &remaining);
2439 }
2440
2441 return last_err;
2442}
2443
c127ea2c 2444static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2445{
86872cb5
TG
2446 struct fib6_config cfg;
2447 int err;
1da177e4 2448
dfc47ef8
EB
2449 if (!capable(CAP_NET_ADMIN))
2450 return -EPERM;
2451
86872cb5
TG
2452 err = rtm_to_fib6_config(skb, nlh, &cfg);
2453 if (err < 0)
2454 return err;
2455
51ebd318
ND
2456 if (cfg.fc_mp)
2457 return ip6_route_multipath(&cfg, 0);
2458 else
2459 return ip6_route_del(&cfg);
1da177e4
LT
2460}
2461
c127ea2c 2462static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2463{
86872cb5
TG
2464 struct fib6_config cfg;
2465 int err;
1da177e4 2466
dfc47ef8
EB
2467 if (!capable(CAP_NET_ADMIN))
2468 return -EPERM;
2469
86872cb5
TG
2470 err = rtm_to_fib6_config(skb, nlh, &cfg);
2471 if (err < 0)
2472 return err;
2473
51ebd318
ND
2474 if (cfg.fc_mp)
2475 return ip6_route_multipath(&cfg, 1);
2476 else
2477 return ip6_route_add(&cfg);
1da177e4
LT
2478}
2479
339bf98f
TG
2480static inline size_t rt6_nlmsg_size(void)
2481{
2482 return NLMSG_ALIGN(sizeof(struct rtmsg))
2483 + nla_total_size(16) /* RTA_SRC */
2484 + nla_total_size(16) /* RTA_DST */
2485 + nla_total_size(16) /* RTA_GATEWAY */
2486 + nla_total_size(16) /* RTA_PREFSRC */
2487 + nla_total_size(4) /* RTA_TABLE */
2488 + nla_total_size(4) /* RTA_IIF */
2489 + nla_total_size(4) /* RTA_OIF */
2490 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2491 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2492 + nla_total_size(sizeof(struct rta_cacheinfo));
2493}
2494
191cd582
BH
2495static int rt6_fill_node(struct net *net,
2496 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 2497 struct in6_addr *dst, struct in6_addr *src,
15e47304 2498 int iif, int type, u32 portid, u32 seq,
7bc570c8 2499 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2500{
2501 struct rtmsg *rtm;
2d7202bf 2502 struct nlmsghdr *nlh;
e3703b3d 2503 long expires;
9e762a4a 2504 u32 table;
f2c31e32 2505 struct neighbour *n;
1da177e4
LT
2506
2507 if (prefix) { /* user wants prefix routes only */
2508 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2509 /* success since this is not a prefix route */
2510 return 1;
2511 }
2512 }
2513
15e47304 2514 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 2515 if (!nlh)
26932566 2516 return -EMSGSIZE;
2d7202bf
TG
2517
2518 rtm = nlmsg_data(nlh);
1da177e4
LT
2519 rtm->rtm_family = AF_INET6;
2520 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2521 rtm->rtm_src_len = rt->rt6i_src.plen;
2522 rtm->rtm_tos = 0;
c71099ac 2523 if (rt->rt6i_table)
9e762a4a 2524 table = rt->rt6i_table->tb6_id;
c71099ac 2525 else
9e762a4a
PM
2526 table = RT6_TABLE_UNSPEC;
2527 rtm->rtm_table = table;
c78679e8
DM
2528 if (nla_put_u32(skb, RTA_TABLE, table))
2529 goto nla_put_failure;
ef2c7d7b
ND
2530 if (rt->rt6i_flags & RTF_REJECT) {
2531 switch (rt->dst.error) {
2532 case -EINVAL:
2533 rtm->rtm_type = RTN_BLACKHOLE;
2534 break;
2535 case -EACCES:
2536 rtm->rtm_type = RTN_PROHIBIT;
2537 break;
b4949ab2
ND
2538 case -EAGAIN:
2539 rtm->rtm_type = RTN_THROW;
2540 break;
ef2c7d7b
ND
2541 default:
2542 rtm->rtm_type = RTN_UNREACHABLE;
2543 break;
2544 }
2545 }
38308473 2546 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2547 rtm->rtm_type = RTN_LOCAL;
d1918542 2548 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2549 rtm->rtm_type = RTN_LOCAL;
2550 else
2551 rtm->rtm_type = RTN_UNICAST;
2552 rtm->rtm_flags = 0;
2553 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2554 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2555 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 2556 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
2557 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2558 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2559 rtm->rtm_protocol = RTPROT_RA;
2560 else
2561 rtm->rtm_protocol = RTPROT_KERNEL;
2562 }
1da177e4 2563
38308473 2564 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2565 rtm->rtm_flags |= RTM_F_CLONED;
2566
2567 if (dst) {
c78679e8
DM
2568 if (nla_put(skb, RTA_DST, 16, dst))
2569 goto nla_put_failure;
1ab1457c 2570 rtm->rtm_dst_len = 128;
1da177e4 2571 } else if (rtm->rtm_dst_len)
c78679e8
DM
2572 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2573 goto nla_put_failure;
1da177e4
LT
2574#ifdef CONFIG_IPV6_SUBTREES
2575 if (src) {
c78679e8
DM
2576 if (nla_put(skb, RTA_SRC, 16, src))
2577 goto nla_put_failure;
1ab1457c 2578 rtm->rtm_src_len = 128;
c78679e8
DM
2579 } else if (rtm->rtm_src_len &&
2580 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2581 goto nla_put_failure;
1da177e4 2582#endif
7bc570c8
YH
2583 if (iif) {
2584#ifdef CONFIG_IPV6_MROUTE
2585 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2586 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2587 if (err <= 0) {
2588 if (!nowait) {
2589 if (err == 0)
2590 return 0;
2591 goto nla_put_failure;
2592 } else {
2593 if (err == -EMSGSIZE)
2594 goto nla_put_failure;
2595 }
2596 }
2597 } else
2598#endif
c78679e8
DM
2599 if (nla_put_u32(skb, RTA_IIF, iif))
2600 goto nla_put_failure;
7bc570c8 2601 } else if (dst) {
1da177e4 2602 struct in6_addr saddr_buf;
c78679e8
DM
2603 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2604 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2605 goto nla_put_failure;
1da177e4 2606 }
2d7202bf 2607
c3968a85
DW
2608 if (rt->rt6i_prefsrc.plen) {
2609 struct in6_addr saddr_buf;
4e3fd7a0 2610 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2611 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2612 goto nla_put_failure;
c3968a85
DW
2613 }
2614
defb3519 2615 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2616 goto nla_put_failure;
2617
97cac082 2618 n = rt->n;
94f826b8 2619 if (n) {
fdd6681d 2620 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
94f826b8 2621 goto nla_put_failure;
94f826b8 2622 }
2d7202bf 2623
c78679e8
DM
2624 if (rt->dst.dev &&
2625 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2626 goto nla_put_failure;
2627 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2628 goto nla_put_failure;
8253947e
LW
2629
2630 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 2631
87a50699 2632 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2633 goto nla_put_failure;
2d7202bf
TG
2634
2635 return nlmsg_end(skb, nlh);
2636
2637nla_put_failure:
26932566
PM
2638 nlmsg_cancel(skb, nlh);
2639 return -EMSGSIZE;
1da177e4
LT
2640}
2641
1b43af54 2642int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2643{
2644 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2645 int prefix;
2646
2d7202bf
TG
2647 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2648 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2649 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2650 } else
2651 prefix = 0;
2652
191cd582
BH
2653 return rt6_fill_node(arg->net,
2654 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 2655 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2656 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2657}
2658
c127ea2c 2659static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2660{
3b1e0a65 2661 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2662 struct nlattr *tb[RTA_MAX+1];
2663 struct rt6_info *rt;
1da177e4 2664 struct sk_buff *skb;
ab364a6f 2665 struct rtmsg *rtm;
4c9483b2 2666 struct flowi6 fl6;
72331bc0 2667 int err, iif = 0, oif = 0;
1da177e4 2668
ab364a6f
TG
2669 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2670 if (err < 0)
2671 goto errout;
1da177e4 2672
ab364a6f 2673 err = -EINVAL;
4c9483b2 2674 memset(&fl6, 0, sizeof(fl6));
1da177e4 2675
ab364a6f
TG
2676 if (tb[RTA_SRC]) {
2677 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2678 goto errout;
2679
4e3fd7a0 2680 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2681 }
2682
2683 if (tb[RTA_DST]) {
2684 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2685 goto errout;
2686
4e3fd7a0 2687 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2688 }
2689
2690 if (tb[RTA_IIF])
2691 iif = nla_get_u32(tb[RTA_IIF]);
2692
2693 if (tb[RTA_OIF])
72331bc0 2694 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2695
2696 if (iif) {
2697 struct net_device *dev;
72331bc0
SL
2698 int flags = 0;
2699
5578689a 2700 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2701 if (!dev) {
2702 err = -ENODEV;
ab364a6f 2703 goto errout;
1da177e4 2704 }
72331bc0
SL
2705
2706 fl6.flowi6_iif = iif;
2707
2708 if (!ipv6_addr_any(&fl6.saddr))
2709 flags |= RT6_LOOKUP_F_HAS_SADDR;
2710
2711 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2712 flags);
2713 } else {
2714 fl6.flowi6_oif = oif;
2715
2716 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2717 }
2718
ab364a6f 2719 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2720 if (!skb) {
94e187c0 2721 ip6_rt_put(rt);
ab364a6f
TG
2722 err = -ENOBUFS;
2723 goto errout;
2724 }
1da177e4 2725
ab364a6f
TG
2726 /* Reserve room for dummy headers, this skb can pass
2727 through good chunk of routing engine.
2728 */
459a98ed 2729 skb_reset_mac_header(skb);
ab364a6f 2730 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2731
d8d1f30b 2732 skb_dst_set(skb, &rt->dst);
1da177e4 2733
4c9483b2 2734 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 2735 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 2736 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2737 if (err < 0) {
ab364a6f
TG
2738 kfree_skb(skb);
2739 goto errout;
1da177e4
LT
2740 }
2741
15e47304 2742 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 2743errout:
1da177e4 2744 return err;
1da177e4
LT
2745}
2746
86872cb5 2747void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2748{
2749 struct sk_buff *skb;
5578689a 2750 struct net *net = info->nl_net;
528c4ceb
DL
2751 u32 seq;
2752 int err;
2753
2754 err = -ENOBUFS;
38308473 2755 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2756
339bf98f 2757 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2758 if (!skb)
21713ebc
TG
2759 goto errout;
2760
191cd582 2761 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
15e47304 2762 event, info->portid, seq, 0, 0, 0);
26932566
PM
2763 if (err < 0) {
2764 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2765 WARN_ON(err == -EMSGSIZE);
2766 kfree_skb(skb);
2767 goto errout;
2768 }
15e47304 2769 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
2770 info->nlh, gfp_any());
2771 return;
21713ebc
TG
2772errout:
2773 if (err < 0)
5578689a 2774 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2775}
2776
8ed67789
DL
2777static int ip6_route_dev_notify(struct notifier_block *this,
2778 unsigned long event, void *data)
2779{
2780 struct net_device *dev = (struct net_device *)data;
c346dca1 2781 struct net *net = dev_net(dev);
8ed67789
DL
2782
2783 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2784 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2785 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2786#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2787 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2788 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2789 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2790 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2791#endif
2792 }
2793
2794 return NOTIFY_OK;
2795}
2796
1da177e4
LT
2797/*
2798 * /proc
2799 */
2800
2801#ifdef CONFIG_PROC_FS
2802
1da177e4
LT
2803struct rt6_proc_arg
2804{
2805 char *buffer;
2806 int offset;
2807 int length;
2808 int skip;
2809 int len;
2810};
2811
2812static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2813{
33120b30 2814 struct seq_file *m = p_arg;
69cce1d1 2815 struct neighbour *n;
1da177e4 2816
4b7a4274 2817 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2818
2819#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2820 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2821#else
33120b30 2822 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2823#endif
97cac082 2824 n = rt->n;
69cce1d1
DM
2825 if (n) {
2826 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2827 } else {
33120b30 2828 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2829 }
33120b30 2830 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2831 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2832 rt->dst.__use, rt->rt6i_flags,
d1918542 2833 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2834 return 0;
2835}
2836
33120b30 2837static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2838{
f3db4851 2839 struct net *net = (struct net *)m->private;
32b293a5 2840 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2841 return 0;
2842}
1da177e4 2843
33120b30
AD
2844static int ipv6_route_open(struct inode *inode, struct file *file)
2845{
de05c557 2846 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2847}
2848
33120b30
AD
2849static const struct file_operations ipv6_route_proc_fops = {
2850 .owner = THIS_MODULE,
2851 .open = ipv6_route_open,
2852 .read = seq_read,
2853 .llseek = seq_lseek,
b6fcbdb4 2854 .release = single_release_net,
33120b30
AD
2855};
2856
1da177e4
LT
2857static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2858{
69ddb805 2859 struct net *net = (struct net *)seq->private;
1da177e4 2860 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2861 net->ipv6.rt6_stats->fib_nodes,
2862 net->ipv6.rt6_stats->fib_route_nodes,
2863 net->ipv6.rt6_stats->fib_rt_alloc,
2864 net->ipv6.rt6_stats->fib_rt_entries,
2865 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2866 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2867 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2868
2869 return 0;
2870}
2871
2872static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2873{
de05c557 2874 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2875}
2876
9a32144e 2877static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2878 .owner = THIS_MODULE,
2879 .open = rt6_stats_seq_open,
2880 .read = seq_read,
2881 .llseek = seq_lseek,
b6fcbdb4 2882 .release = single_release_net,
1da177e4
LT
2883};
2884#endif /* CONFIG_PROC_FS */
2885
2886#ifdef CONFIG_SYSCTL
2887
1da177e4 2888static
8d65af78 2889int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2890 void __user *buffer, size_t *lenp, loff_t *ppos)
2891{
c486da34
LAG
2892 struct net *net;
2893 int delay;
2894 if (!write)
1da177e4 2895 return -EINVAL;
c486da34
LAG
2896
2897 net = (struct net *)ctl->extra1;
2898 delay = net->ipv6.sysctl.flush_delay;
2899 proc_dointvec(ctl, write, buffer, lenp, ppos);
2900 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2901 return 0;
1da177e4
LT
2902}
2903
760f2d01 2904ctl_table ipv6_route_table_template[] = {
1ab1457c 2905 {
1da177e4 2906 .procname = "flush",
4990509f 2907 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2908 .maxlen = sizeof(int),
89c8b3a1 2909 .mode = 0200,
6d9f239a 2910 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2911 },
2912 {
1da177e4 2913 .procname = "gc_thresh",
9a7ec3a9 2914 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2915 .maxlen = sizeof(int),
2916 .mode = 0644,
6d9f239a 2917 .proc_handler = proc_dointvec,
1da177e4
LT
2918 },
2919 {
1da177e4 2920 .procname = "max_size",
4990509f 2921 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2922 .maxlen = sizeof(int),
2923 .mode = 0644,
6d9f239a 2924 .proc_handler = proc_dointvec,
1da177e4
LT
2925 },
2926 {
1da177e4 2927 .procname = "gc_min_interval",
4990509f 2928 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2929 .maxlen = sizeof(int),
2930 .mode = 0644,
6d9f239a 2931 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2932 },
2933 {
1da177e4 2934 .procname = "gc_timeout",
4990509f 2935 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2936 .maxlen = sizeof(int),
2937 .mode = 0644,
6d9f239a 2938 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2939 },
2940 {
1da177e4 2941 .procname = "gc_interval",
4990509f 2942 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2943 .maxlen = sizeof(int),
2944 .mode = 0644,
6d9f239a 2945 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2946 },
2947 {
1da177e4 2948 .procname = "gc_elasticity",
4990509f 2949 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2950 .maxlen = sizeof(int),
2951 .mode = 0644,
f3d3f616 2952 .proc_handler = proc_dointvec,
1da177e4
LT
2953 },
2954 {
1da177e4 2955 .procname = "mtu_expires",
4990509f 2956 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2957 .maxlen = sizeof(int),
2958 .mode = 0644,
6d9f239a 2959 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2960 },
2961 {
1da177e4 2962 .procname = "min_adv_mss",
4990509f 2963 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2964 .maxlen = sizeof(int),
2965 .mode = 0644,
f3d3f616 2966 .proc_handler = proc_dointvec,
1da177e4
LT
2967 },
2968 {
1da177e4 2969 .procname = "gc_min_interval_ms",
4990509f 2970 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2971 .maxlen = sizeof(int),
2972 .mode = 0644,
6d9f239a 2973 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2974 },
f8572d8f 2975 { }
1da177e4
LT
2976};
2977
2c8c1e72 2978struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2979{
2980 struct ctl_table *table;
2981
2982 table = kmemdup(ipv6_route_table_template,
2983 sizeof(ipv6_route_table_template),
2984 GFP_KERNEL);
5ee09105
YH
2985
2986 if (table) {
2987 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2988 table[0].extra1 = net;
86393e52 2989 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2990 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2991 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2992 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2993 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2994 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2995 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2996 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2997 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
2998
2999 /* Don't export sysctls to unprivileged users */
3000 if (net->user_ns != &init_user_ns)
3001 table[0].procname = NULL;
5ee09105
YH
3002 }
3003
760f2d01
DL
3004 return table;
3005}
1da177e4
LT
3006#endif
3007
2c8c1e72 3008static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3009{
633d424b 3010 int ret = -ENOMEM;
8ed67789 3011
86393e52
AD
3012 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3013 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3014
fc66f95c
ED
3015 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3016 goto out_ip6_dst_ops;
3017
8ed67789
DL
3018 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3019 sizeof(*net->ipv6.ip6_null_entry),
3020 GFP_KERNEL);
3021 if (!net->ipv6.ip6_null_entry)
fc66f95c 3022 goto out_ip6_dst_entries;
d8d1f30b 3023 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3024 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3025 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3026 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3027 ip6_template_metrics, true);
8ed67789
DL
3028
3029#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3030 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3031 sizeof(*net->ipv6.ip6_prohibit_entry),
3032 GFP_KERNEL);
68fffc67
PZ
3033 if (!net->ipv6.ip6_prohibit_entry)
3034 goto out_ip6_null_entry;
d8d1f30b 3035 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3036 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3037 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3038 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3039 ip6_template_metrics, true);
8ed67789
DL
3040
3041 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3042 sizeof(*net->ipv6.ip6_blk_hole_entry),
3043 GFP_KERNEL);
68fffc67
PZ
3044 if (!net->ipv6.ip6_blk_hole_entry)
3045 goto out_ip6_prohibit_entry;
d8d1f30b 3046 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3047 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3048 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3049 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3050 ip6_template_metrics, true);
8ed67789
DL
3051#endif
3052
b339a47c
PZ
3053 net->ipv6.sysctl.flush_delay = 0;
3054 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3055 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3056 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3057 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3058 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3059 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3060 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3061
6891a346
BT
3062 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3063
8ed67789
DL
3064 ret = 0;
3065out:
3066 return ret;
f2fc6a54 3067
68fffc67
PZ
3068#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3069out_ip6_prohibit_entry:
3070 kfree(net->ipv6.ip6_prohibit_entry);
3071out_ip6_null_entry:
3072 kfree(net->ipv6.ip6_null_entry);
3073#endif
fc66f95c
ED
3074out_ip6_dst_entries:
3075 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3076out_ip6_dst_ops:
f2fc6a54 3077 goto out;
cdb18761
DL
3078}
3079
2c8c1e72 3080static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3081{
8ed67789
DL
3082 kfree(net->ipv6.ip6_null_entry);
3083#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3084 kfree(net->ipv6.ip6_prohibit_entry);
3085 kfree(net->ipv6.ip6_blk_hole_entry);
3086#endif
41bb78b4 3087 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3088}
3089
d189634e
TG
3090static int __net_init ip6_route_net_init_late(struct net *net)
3091{
3092#ifdef CONFIG_PROC_FS
3093 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3094 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3095#endif
3096 return 0;
3097}
3098
3099static void __net_exit ip6_route_net_exit_late(struct net *net)
3100{
3101#ifdef CONFIG_PROC_FS
3102 proc_net_remove(net, "ipv6_route");
3103 proc_net_remove(net, "rt6_stats");
3104#endif
3105}
3106
cdb18761
DL
3107static struct pernet_operations ip6_route_net_ops = {
3108 .init = ip6_route_net_init,
3109 .exit = ip6_route_net_exit,
3110};
3111
c3426b47
DM
3112static int __net_init ipv6_inetpeer_init(struct net *net)
3113{
3114 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3115
3116 if (!bp)
3117 return -ENOMEM;
3118 inet_peer_base_init(bp);
3119 net->ipv6.peers = bp;
3120 return 0;
3121}
3122
3123static void __net_exit ipv6_inetpeer_exit(struct net *net)
3124{
3125 struct inet_peer_base *bp = net->ipv6.peers;
3126
3127 net->ipv6.peers = NULL;
56a6b248 3128 inetpeer_invalidate_tree(bp);
c3426b47
DM
3129 kfree(bp);
3130}
3131
2b823f72 3132static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3133 .init = ipv6_inetpeer_init,
3134 .exit = ipv6_inetpeer_exit,
3135};
3136
d189634e
TG
3137static struct pernet_operations ip6_route_net_late_ops = {
3138 .init = ip6_route_net_init_late,
3139 .exit = ip6_route_net_exit_late,
3140};
3141
8ed67789
DL
3142static struct notifier_block ip6_route_dev_notifier = {
3143 .notifier_call = ip6_route_dev_notify,
3144 .priority = 0,
3145};
3146
433d49c3 3147int __init ip6_route_init(void)
1da177e4 3148{
433d49c3
DL
3149 int ret;
3150
9a7ec3a9
DL
3151 ret = -ENOMEM;
3152 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3153 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3154 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3155 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3156 goto out;
14e50e57 3157
fc66f95c 3158 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3159 if (ret)
bdb3289f 3160 goto out_kmem_cache;
bdb3289f 3161
c3426b47
DM
3162 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3163 if (ret)
e8803b6c 3164 goto out_dst_entries;
2a0c451a 3165
7e52b33b
DM
3166 ret = register_pernet_subsys(&ip6_route_net_ops);
3167 if (ret)
3168 goto out_register_inetpeer;
c3426b47 3169
5dc121e9
AE
3170 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3171
8ed67789
DL
3172 /* Registering of the loopback is done before this portion of code,
3173 * the loopback reference in rt6_info will not be taken, do it
3174 * manually for init_net */
d8d1f30b 3175 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3176 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3177 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3178 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3179 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3180 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3181 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3182 #endif
e8803b6c 3183 ret = fib6_init();
433d49c3 3184 if (ret)
8ed67789 3185 goto out_register_subsys;
433d49c3 3186
433d49c3
DL
3187 ret = xfrm6_init();
3188 if (ret)
e8803b6c 3189 goto out_fib6_init;
c35b7e72 3190
433d49c3
DL
3191 ret = fib6_rules_init();
3192 if (ret)
3193 goto xfrm6_init;
7e5449c2 3194
d189634e
TG
3195 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3196 if (ret)
3197 goto fib6_rules_init;
3198
433d49c3 3199 ret = -ENOBUFS;
c7ac8679
GR
3200 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3201 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3202 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3203 goto out_register_late_subsys;
c127ea2c 3204
8ed67789 3205 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3206 if (ret)
d189634e 3207 goto out_register_late_subsys;
8ed67789 3208
433d49c3
DL
3209out:
3210 return ret;
3211
d189634e
TG
3212out_register_late_subsys:
3213 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3214fib6_rules_init:
433d49c3
DL
3215 fib6_rules_cleanup();
3216xfrm6_init:
433d49c3 3217 xfrm6_fini();
2a0c451a
TG
3218out_fib6_init:
3219 fib6_gc_cleanup();
8ed67789
DL
3220out_register_subsys:
3221 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3222out_register_inetpeer:
3223 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3224out_dst_entries:
3225 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3226out_kmem_cache:
f2fc6a54 3227 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3228 goto out;
1da177e4
LT
3229}
3230
3231void ip6_route_cleanup(void)
3232{
8ed67789 3233 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3234 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3235 fib6_rules_cleanup();
1da177e4 3236 xfrm6_fini();
1da177e4 3237 fib6_gc_cleanup();
c3426b47 3238 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3239 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3240 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3241 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3242}