ipv6: Introduce ip6_flow_hdr() to fill version, tclass and flowlabel.
[linux-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
51ebd318 60#include <net/nexthop.h>
1da177e4
LT
61
62#include <asm/uaccess.h>
63
64#ifdef CONFIG_SYSCTL
65#include <linux/sysctl.h>
66#endif
67
1716a961 68static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 69 const struct in6_addr *dest);
1da177e4 70static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 71static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 72static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
73static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74static void ip6_dst_destroy(struct dst_entry *);
75static void ip6_dst_ifdown(struct dst_entry *,
76 struct net_device *dev, int how);
569d3645 77static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
78
79static int ip6_pkt_discard(struct sk_buff *skb);
80static int ip6_pkt_discard_out(struct sk_buff *skb);
81static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
82static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83 struct sk_buff *skb, u32 mtu);
84static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85 struct sk_buff *skb);
1da177e4 86
70ceb4f5 87#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 88static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex,
95c96174 91 unsigned int pref);
efa2cea0 92static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
93 const struct in6_addr *prefix, int prefixlen,
94 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
95#endif
96
06582540
DM
97static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98{
99 struct rt6_info *rt = (struct rt6_info *) dst;
100 struct inet_peer *peer;
101 u32 *p = NULL;
102
8e2ec639
YZ
103 if (!(rt->dst.flags & DST_HOST))
104 return NULL;
105
fbfe95a4 106 peer = rt6_get_peer_create(rt);
06582540
DM
107 if (peer) {
108 u32 *old_p = __DST_METRICS_PTR(old);
109 unsigned long prev, new;
110
111 p = peer->metrics;
112 if (inet_metrics_new(peer))
113 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114
115 new = (unsigned long) p;
116 prev = cmpxchg(&dst->_metrics, old, new);
117
118 if (prev != old) {
119 p = __DST_METRICS_PTR(prev);
120 if (prev & DST_METRICS_READ_ONLY)
121 p = NULL;
122 }
123 }
124 return p;
125}
126
f894cbf8
DM
127static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128 struct sk_buff *skb,
129 const void *daddr)
39232973
DM
130{
131 struct in6_addr *p = &rt->rt6i_gateway;
132
a7563f34 133 if (!ipv6_addr_any(p))
39232973 134 return (const void *) p;
f894cbf8
DM
135 else if (skb)
136 return &ipv6_hdr(skb)->daddr;
39232973
DM
137 return daddr;
138}
139
f894cbf8
DM
140static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141 struct sk_buff *skb,
142 const void *daddr)
d3aaeb38 143{
39232973
DM
144 struct rt6_info *rt = (struct rt6_info *) dst;
145 struct neighbour *n;
146
f894cbf8 147 daddr = choose_neigh_daddr(rt, skb, daddr);
39232973 148 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
149 if (n)
150 return n;
151 return neigh_create(&nd_tbl, daddr, dst->dev);
152}
153
8ade06c6 154static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 155{
8ade06c6
DM
156 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
157 if (!n) {
158 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
159 if (IS_ERR(n))
160 return PTR_ERR(n);
161 }
97cac082 162 rt->n = n;
f83c7790
DM
163
164 return 0;
d3aaeb38
DM
165}
166
9a7ec3a9 167static struct dst_ops ip6_dst_ops_template = {
1da177e4 168 .family = AF_INET6,
09640e63 169 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
170 .gc = ip6_dst_gc,
171 .gc_thresh = 1024,
172 .check = ip6_dst_check,
0dbaee3b 173 .default_advmss = ip6_default_advmss,
ebb762f2 174 .mtu = ip6_mtu,
06582540 175 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
176 .destroy = ip6_dst_destroy,
177 .ifdown = ip6_dst_ifdown,
178 .negative_advice = ip6_negative_advice,
179 .link_failure = ip6_link_failure,
180 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 181 .redirect = rt6_do_redirect,
1ac06e03 182 .local_out = __ip6_local_out,
d3aaeb38 183 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
184};
185
ebb762f2 186static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 187{
618f9bc7
SK
188 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
189
190 return mtu ? : dst->dev->mtu;
ec831ea7
RD
191}
192
6700c270
DM
193static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
194 struct sk_buff *skb, u32 mtu)
14e50e57
DM
195{
196}
197
6700c270
DM
198static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
199 struct sk_buff *skb)
b587ee3b
DM
200{
201}
202
0972ddb2
HB
203static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
204 unsigned long old)
205{
206 return NULL;
207}
208
14e50e57
DM
209static struct dst_ops ip6_dst_blackhole_ops = {
210 .family = AF_INET6,
09640e63 211 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
212 .destroy = ip6_dst_destroy,
213 .check = ip6_dst_check,
ebb762f2 214 .mtu = ip6_blackhole_mtu,
214f45c9 215 .default_advmss = ip6_default_advmss,
14e50e57 216 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 217 .redirect = ip6_rt_blackhole_redirect,
0972ddb2 218 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 219 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
220};
221
62fa8a84 222static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 223 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
224};
225
fb0af4c7 226static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
227 .dst = {
228 .__refcnt = ATOMIC_INIT(1),
229 .__use = 1,
2c20cbd7 230 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 231 .error = -ENETUNREACH,
d8d1f30b
CG
232 .input = ip6_pkt_discard,
233 .output = ip6_pkt_discard_out,
1da177e4
LT
234 },
235 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 236 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
237 .rt6i_metric = ~(u32) 0,
238 .rt6i_ref = ATOMIC_INIT(1),
239};
240
101367c2
TG
241#ifdef CONFIG_IPV6_MULTIPLE_TABLES
242
6723ab54
DM
243static int ip6_pkt_prohibit(struct sk_buff *skb);
244static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 245
fb0af4c7 246static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
247 .dst = {
248 .__refcnt = ATOMIC_INIT(1),
249 .__use = 1,
2c20cbd7 250 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 251 .error = -EACCES,
d8d1f30b
CG
252 .input = ip6_pkt_prohibit,
253 .output = ip6_pkt_prohibit_out,
101367c2
TG
254 },
255 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 256 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
257 .rt6i_metric = ~(u32) 0,
258 .rt6i_ref = ATOMIC_INIT(1),
259};
260
fb0af4c7 261static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
262 .dst = {
263 .__refcnt = ATOMIC_INIT(1),
264 .__use = 1,
2c20cbd7 265 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 266 .error = -EINVAL,
d8d1f30b
CG
267 .input = dst_discard,
268 .output = dst_discard,
101367c2
TG
269 },
270 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 271 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
272 .rt6i_metric = ~(u32) 0,
273 .rt6i_ref = ATOMIC_INIT(1),
274};
275
276#endif
277
1da177e4 278/* allocate dst with ip6_dst_ops */
97bab73f 279static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 280 struct net_device *dev,
8b96d22d
DM
281 int flags,
282 struct fib6_table *table)
1da177e4 283{
97bab73f 284 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 285 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 286
97bab73f 287 if (rt) {
8104891b
SK
288 struct dst_entry *dst = &rt->dst;
289
290 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
8b96d22d 291 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
6f3118b5 292 rt->rt6i_genid = rt_genid(net);
51ebd318
ND
293 INIT_LIST_HEAD(&rt->rt6i_siblings);
294 rt->rt6i_nsiblings = 0;
97bab73f 295 }
cf911662 296 return rt;
1da177e4
LT
297}
298
299static void ip6_dst_destroy(struct dst_entry *dst)
300{
301 struct rt6_info *rt = (struct rt6_info *)dst;
302 struct inet6_dev *idev = rt->rt6i_idev;
303
97cac082
DM
304 if (rt->n)
305 neigh_release(rt->n);
306
8e2ec639
YZ
307 if (!(rt->dst.flags & DST_HOST))
308 dst_destroy_metrics_generic(dst);
309
38308473 310 if (idev) {
1da177e4
LT
311 rt->rt6i_idev = NULL;
312 in6_dev_put(idev);
1ab1457c 313 }
1716a961
G
314
315 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
316 dst_release(dst->from);
317
97bab73f
DM
318 if (rt6_has_peer(rt)) {
319 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
320 inet_putpeer(peer);
321 }
322}
323
324void rt6_bind_peer(struct rt6_info *rt, int create)
325{
97bab73f 326 struct inet_peer_base *base;
b3419363
DM
327 struct inet_peer *peer;
328
97bab73f
DM
329 base = inetpeer_base_ptr(rt->_rt6i_peer);
330 if (!base)
331 return;
332
333 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
7b34ca2a
DM
334 if (peer) {
335 if (!rt6_set_peer(rt, peer))
336 inet_putpeer(peer);
7b34ca2a 337 }
1da177e4
LT
338}
339
340static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
341 int how)
342{
343 struct rt6_info *rt = (struct rt6_info *)dst;
344 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 345 struct net_device *loopback_dev =
c346dca1 346 dev_net(dev)->loopback_dev;
1da177e4 347
97cac082
DM
348 if (dev != loopback_dev) {
349 if (idev && idev->dev == dev) {
350 struct inet6_dev *loopback_idev =
351 in6_dev_get(loopback_dev);
352 if (loopback_idev) {
353 rt->rt6i_idev = loopback_idev;
354 in6_dev_put(idev);
355 }
356 }
357 if (rt->n && rt->n->dev == dev) {
358 rt->n->dev = loopback_dev;
359 dev_hold(loopback_dev);
360 dev_put(dev);
1da177e4
LT
361 }
362 }
363}
364
a50feda5 365static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 366{
1716a961
G
367 if (rt->rt6i_flags & RTF_EXPIRES) {
368 if (time_after(jiffies, rt->dst.expires))
a50feda5 369 return true;
1716a961 370 } else if (rt->dst.from) {
3fd91fb3 371 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 372 }
a50feda5 373 return false;
1da177e4
LT
374}
375
a50feda5 376static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 377{
a02cec21
ED
378 return ipv6_addr_type(daddr) &
379 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
380}
381
51ebd318
ND
382/* Multipath route selection:
383 * Hash based function using packet header and flowlabel.
384 * Adapted from fib_info_hashfn()
385 */
386static int rt6_info_hash_nhsfn(unsigned int candidate_count,
387 const struct flowi6 *fl6)
388{
389 unsigned int val = fl6->flowi6_proto;
390
b3ce5ae1
ND
391 val ^= (__force u32)fl6->daddr.s6_addr32[0];
392 val ^= (__force u32)fl6->daddr.s6_addr32[1];
393 val ^= (__force u32)fl6->daddr.s6_addr32[2];
394 val ^= (__force u32)fl6->daddr.s6_addr32[3];
51ebd318 395
b3ce5ae1
ND
396 val ^= (__force u32)fl6->saddr.s6_addr32[0];
397 val ^= (__force u32)fl6->saddr.s6_addr32[1];
398 val ^= (__force u32)fl6->saddr.s6_addr32[2];
399 val ^= (__force u32)fl6->saddr.s6_addr32[3];
51ebd318
ND
400
401 /* Work only if this not encapsulated */
402 switch (fl6->flowi6_proto) {
403 case IPPROTO_UDP:
404 case IPPROTO_TCP:
405 case IPPROTO_SCTP:
b3ce5ae1
ND
406 val ^= (__force u16)fl6->fl6_sport;
407 val ^= (__force u16)fl6->fl6_dport;
51ebd318
ND
408 break;
409
410 case IPPROTO_ICMPV6:
b3ce5ae1
ND
411 val ^= (__force u16)fl6->fl6_icmp_type;
412 val ^= (__force u16)fl6->fl6_icmp_code;
51ebd318
ND
413 break;
414 }
415 /* RFC6438 recommands to use flowlabel */
b3ce5ae1 416 val ^= (__force u32)fl6->flowlabel;
51ebd318
ND
417
418 /* Perhaps, we need to tune, this function? */
419 val = val ^ (val >> 7) ^ (val >> 12);
420 return val % candidate_count;
421}
422
423static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
424 struct flowi6 *fl6)
425{
426 struct rt6_info *sibling, *next_sibling;
427 int route_choosen;
428
429 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
430 /* Don't change the route, if route_choosen == 0
431 * (siblings does not include ourself)
432 */
433 if (route_choosen)
434 list_for_each_entry_safe(sibling, next_sibling,
435 &match->rt6i_siblings, rt6i_siblings) {
436 route_choosen--;
437 if (route_choosen == 0) {
438 match = sibling;
439 break;
440 }
441 }
442 return match;
443}
444
1da177e4 445/*
c71099ac 446 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
447 */
448
8ed67789
DL
449static inline struct rt6_info *rt6_device_match(struct net *net,
450 struct rt6_info *rt,
b71d1d42 451 const struct in6_addr *saddr,
1da177e4 452 int oif,
d420895e 453 int flags)
1da177e4
LT
454{
455 struct rt6_info *local = NULL;
456 struct rt6_info *sprt;
457
dd3abc4e
YH
458 if (!oif && ipv6_addr_any(saddr))
459 goto out;
460
d8d1f30b 461 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 462 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
463
464 if (oif) {
1da177e4
LT
465 if (dev->ifindex == oif)
466 return sprt;
467 if (dev->flags & IFF_LOOPBACK) {
38308473 468 if (!sprt->rt6i_idev ||
1da177e4 469 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 470 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 471 continue;
1ab1457c 472 if (local && (!oif ||
1da177e4
LT
473 local->rt6i_idev->dev->ifindex == oif))
474 continue;
475 }
476 local = sprt;
477 }
dd3abc4e
YH
478 } else {
479 if (ipv6_chk_addr(net, saddr, dev,
480 flags & RT6_LOOKUP_F_IFACE))
481 return sprt;
1da177e4 482 }
dd3abc4e 483 }
1da177e4 484
dd3abc4e 485 if (oif) {
1da177e4
LT
486 if (local)
487 return local;
488
d420895e 489 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 490 return net->ipv6.ip6_null_entry;
1da177e4 491 }
dd3abc4e 492out:
1da177e4
LT
493 return rt;
494}
495
27097255
YH
496#ifdef CONFIG_IPV6_ROUTER_PREF
497static void rt6_probe(struct rt6_info *rt)
498{
f2c31e32 499 struct neighbour *neigh;
27097255
YH
500 /*
501 * Okay, this does not seem to be appropriate
502 * for now, however, we need to check if it
503 * is really so; aka Router Reachability Probing.
504 *
505 * Router Reachability Probe MUST be rate-limited
506 * to no more than one per minute.
507 */
97cac082 508 neigh = rt ? rt->n : NULL;
27097255 509 if (!neigh || (neigh->nud_state & NUD_VALID))
fdd6681d 510 return;
27097255
YH
511 read_lock_bh(&neigh->lock);
512 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 513 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
514 struct in6_addr mcaddr;
515 struct in6_addr *target;
516
517 neigh->updated = jiffies;
518 read_unlock_bh(&neigh->lock);
519
520 target = (struct in6_addr *)&neigh->primary_key;
521 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 522 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 523 } else {
27097255 524 read_unlock_bh(&neigh->lock);
f2c31e32 525 }
27097255
YH
526}
527#else
528static inline void rt6_probe(struct rt6_info *rt)
529{
27097255
YH
530}
531#endif
532
1da177e4 533/*
554cfb7e 534 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 535 */
b6f99a21 536static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 537{
d1918542 538 struct net_device *dev = rt->dst.dev;
161980f4 539 if (!oif || dev->ifindex == oif)
554cfb7e 540 return 2;
161980f4
DM
541 if ((dev->flags & IFF_LOOPBACK) &&
542 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
543 return 1;
544 return 0;
554cfb7e 545}
1da177e4 546
a5a81f0b 547static inline bool rt6_check_neigh(struct rt6_info *rt)
1da177e4 548{
f2c31e32 549 struct neighbour *neigh;
a5a81f0b 550 bool ret = false;
f2c31e32 551
97cac082 552 neigh = rt->n;
4d0c5911
YH
553 if (rt->rt6i_flags & RTF_NONEXTHOP ||
554 !(rt->rt6i_flags & RTF_GATEWAY))
a5a81f0b 555 ret = true;
4d0c5911 556 else if (neigh) {
554cfb7e
YH
557 read_lock_bh(&neigh->lock);
558 if (neigh->nud_state & NUD_VALID)
a5a81f0b 559 ret = true;
398bcbeb 560#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b
PM
561 else if (!(neigh->nud_state & NUD_FAILED))
562 ret = true;
398bcbeb 563#endif
554cfb7e 564 read_unlock_bh(&neigh->lock);
a5a81f0b
PM
565 }
566 return ret;
1da177e4
LT
567}
568
554cfb7e
YH
569static int rt6_score_route(struct rt6_info *rt, int oif,
570 int strict)
1da177e4 571{
a5a81f0b 572 int m;
1ab1457c 573
4d0c5911 574 m = rt6_check_dev(rt, oif);
77d16f45 575 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 576 return -1;
ebacaaa0
YH
577#ifdef CONFIG_IPV6_ROUTER_PREF
578 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
579#endif
a5a81f0b 580 if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
581 return -1;
582 return m;
583}
584
f11e6659
DM
585static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
586 int *mpri, struct rt6_info *match)
554cfb7e 587{
f11e6659
DM
588 int m;
589
590 if (rt6_check_expired(rt))
591 goto out;
592
593 m = rt6_score_route(rt, oif, strict);
594 if (m < 0)
595 goto out;
596
597 if (m > *mpri) {
598 if (strict & RT6_LOOKUP_F_REACHABLE)
599 rt6_probe(match);
600 *mpri = m;
601 match = rt;
602 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
603 rt6_probe(rt);
604 }
605
606out:
607 return match;
608}
609
610static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
611 struct rt6_info *rr_head,
612 u32 metric, int oif, int strict)
613{
614 struct rt6_info *rt, *match;
554cfb7e 615 int mpri = -1;
1da177e4 616
f11e6659
DM
617 match = NULL;
618 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 619 rt = rt->dst.rt6_next)
f11e6659
DM
620 match = find_match(rt, oif, strict, &mpri, match);
621 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 622 rt = rt->dst.rt6_next)
f11e6659 623 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 624
f11e6659
DM
625 return match;
626}
1da177e4 627
f11e6659
DM
628static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
629{
630 struct rt6_info *match, *rt0;
8ed67789 631 struct net *net;
1da177e4 632
f11e6659
DM
633 rt0 = fn->rr_ptr;
634 if (!rt0)
635 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 636
f11e6659 637 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 638
554cfb7e 639 if (!match &&
f11e6659 640 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 641 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 642
554cfb7e 643 /* no entries matched; do round-robin */
f11e6659
DM
644 if (!next || next->rt6i_metric != rt0->rt6i_metric)
645 next = fn->leaf;
646
647 if (next != rt0)
648 fn->rr_ptr = next;
1da177e4 649 }
1da177e4 650
d1918542 651 net = dev_net(rt0->dst.dev);
a02cec21 652 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
653}
654
70ceb4f5
YH
655#ifdef CONFIG_IPV6_ROUTE_INFO
656int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 657 const struct in6_addr *gwaddr)
70ceb4f5 658{
c346dca1 659 struct net *net = dev_net(dev);
70ceb4f5
YH
660 struct route_info *rinfo = (struct route_info *) opt;
661 struct in6_addr prefix_buf, *prefix;
662 unsigned int pref;
4bed72e4 663 unsigned long lifetime;
70ceb4f5
YH
664 struct rt6_info *rt;
665
666 if (len < sizeof(struct route_info)) {
667 return -EINVAL;
668 }
669
670 /* Sanity check for prefix_len and length */
671 if (rinfo->length > 3) {
672 return -EINVAL;
673 } else if (rinfo->prefix_len > 128) {
674 return -EINVAL;
675 } else if (rinfo->prefix_len > 64) {
676 if (rinfo->length < 2) {
677 return -EINVAL;
678 }
679 } else if (rinfo->prefix_len > 0) {
680 if (rinfo->length < 1) {
681 return -EINVAL;
682 }
683 }
684
685 pref = rinfo->route_pref;
686 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 687 return -EINVAL;
70ceb4f5 688
4bed72e4 689 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
690
691 if (rinfo->length == 3)
692 prefix = (struct in6_addr *)rinfo->prefix;
693 else {
694 /* this function is safe */
695 ipv6_addr_prefix(&prefix_buf,
696 (struct in6_addr *)rinfo->prefix,
697 rinfo->prefix_len);
698 prefix = &prefix_buf;
699 }
700
efa2cea0
DL
701 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
702 dev->ifindex);
70ceb4f5
YH
703
704 if (rt && !lifetime) {
e0a1ad73 705 ip6_del_rt(rt);
70ceb4f5
YH
706 rt = NULL;
707 }
708
709 if (!rt && lifetime)
efa2cea0 710 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
711 pref);
712 else if (rt)
713 rt->rt6i_flags = RTF_ROUTEINFO |
714 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
715
716 if (rt) {
1716a961
G
717 if (!addrconf_finite_timeout(lifetime))
718 rt6_clean_expires(rt);
719 else
720 rt6_set_expires(rt, jiffies + HZ * lifetime);
721
94e187c0 722 ip6_rt_put(rt);
70ceb4f5
YH
723 }
724 return 0;
725}
726#endif
727
8ed67789 728#define BACKTRACK(__net, saddr) \
982f56f3 729do { \
8ed67789 730 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 731 struct fib6_node *pn; \
e0eda7bb 732 while (1) { \
982f56f3
YH
733 if (fn->fn_flags & RTN_TL_ROOT) \
734 goto out; \
735 pn = fn->parent; \
736 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 737 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
738 else \
739 fn = pn; \
740 if (fn->fn_flags & RTN_RTINFO) \
741 goto restart; \
c71099ac 742 } \
c71099ac 743 } \
38308473 744} while (0)
c71099ac 745
8ed67789
DL
746static struct rt6_info *ip6_pol_route_lookup(struct net *net,
747 struct fib6_table *table,
4c9483b2 748 struct flowi6 *fl6, int flags)
1da177e4
LT
749{
750 struct fib6_node *fn;
751 struct rt6_info *rt;
752
c71099ac 753 read_lock_bh(&table->tb6_lock);
4c9483b2 754 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
755restart:
756 rt = fn->leaf;
4c9483b2 757 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318
ND
758 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
759 rt = rt6_multipath_select(rt, fl6);
4c9483b2 760 BACKTRACK(net, &fl6->saddr);
c71099ac 761out:
d8d1f30b 762 dst_use(&rt->dst, jiffies);
c71099ac 763 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
764 return rt;
765
766}
767
ea6e574e
FW
768struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
769 int flags)
770{
771 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
772}
773EXPORT_SYMBOL_GPL(ip6_route_lookup);
774
9acd9f3a
YH
775struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
776 const struct in6_addr *saddr, int oif, int strict)
c71099ac 777{
4c9483b2
DM
778 struct flowi6 fl6 = {
779 .flowi6_oif = oif,
780 .daddr = *daddr,
c71099ac
TG
781 };
782 struct dst_entry *dst;
77d16f45 783 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 784
adaa70bb 785 if (saddr) {
4c9483b2 786 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
787 flags |= RT6_LOOKUP_F_HAS_SADDR;
788 }
789
4c9483b2 790 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
791 if (dst->error == 0)
792 return (struct rt6_info *) dst;
793
794 dst_release(dst);
795
1da177e4
LT
796 return NULL;
797}
798
7159039a
YH
799EXPORT_SYMBOL(rt6_lookup);
800
c71099ac 801/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
802 It takes new route entry, the addition fails by any reason the
803 route is freed. In any case, if caller does not hold it, it may
804 be destroyed.
805 */
806
86872cb5 807static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
808{
809 int err;
c71099ac 810 struct fib6_table *table;
1da177e4 811
c71099ac
TG
812 table = rt->rt6i_table;
813 write_lock_bh(&table->tb6_lock);
86872cb5 814 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 815 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
816
817 return err;
818}
819
40e22e8f
TG
820int ip6_ins_rt(struct rt6_info *rt)
821{
4d1169c1 822 struct nl_info info = {
d1918542 823 .nl_net = dev_net(rt->dst.dev),
4d1169c1 824 };
528c4ceb 825 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
826}
827
1716a961 828static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 829 const struct in6_addr *daddr,
b71d1d42 830 const struct in6_addr *saddr)
1da177e4 831{
1da177e4
LT
832 struct rt6_info *rt;
833
834 /*
835 * Clone the route.
836 */
837
21efcfa0 838 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
839
840 if (rt) {
14deae41
DM
841 int attempts = !in_softirq();
842
38308473 843 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 844 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 845 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 846 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 847 rt->rt6i_gateway = *daddr;
58c4fb86 848 }
1da177e4 849
1da177e4 850 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
851
852#ifdef CONFIG_IPV6_SUBTREES
853 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 854 rt->rt6i_src.addr = *saddr;
1da177e4
LT
855 rt->rt6i_src.plen = 128;
856 }
857#endif
858
14deae41 859 retry:
8ade06c6 860 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 861 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
862 int saved_rt_min_interval =
863 net->ipv6.sysctl.ip6_rt_gc_min_interval;
864 int saved_rt_elasticity =
865 net->ipv6.sysctl.ip6_rt_gc_elasticity;
866
867 if (attempts-- > 0) {
868 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
869 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
870
86393e52 871 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
872
873 net->ipv6.sysctl.ip6_rt_gc_elasticity =
874 saved_rt_elasticity;
875 net->ipv6.sysctl.ip6_rt_gc_min_interval =
876 saved_rt_min_interval;
877 goto retry;
878 }
879
f3213831 880 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 881 dst_free(&rt->dst);
14deae41
DM
882 return NULL;
883 }
95a9a5ba 884 }
1da177e4 885
95a9a5ba
YH
886 return rt;
887}
1da177e4 888
21efcfa0
ED
889static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
890 const struct in6_addr *daddr)
299d9939 891{
21efcfa0
ED
892 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
893
299d9939 894 if (rt) {
299d9939 895 rt->rt6i_flags |= RTF_CACHE;
97cac082 896 rt->n = neigh_clone(ort->n);
299d9939
YH
897 }
898 return rt;
899}
900
8ed67789 901static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 902 struct flowi6 *fl6, int flags)
1da177e4
LT
903{
904 struct fib6_node *fn;
519fbd87 905 struct rt6_info *rt, *nrt;
c71099ac 906 int strict = 0;
1da177e4 907 int attempts = 3;
519fbd87 908 int err;
53b7997f 909 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 910
77d16f45 911 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
912
913relookup:
c71099ac 914 read_lock_bh(&table->tb6_lock);
1da177e4 915
8238dd06 916restart_2:
4c9483b2 917 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
918
919restart:
4acad72d 920 rt = rt6_select(fn, oif, strict | reachable);
51ebd318
ND
921 if (rt->rt6i_nsiblings && oif == 0)
922 rt = rt6_multipath_select(rt, fl6);
4c9483b2 923 BACKTRACK(net, &fl6->saddr);
8ed67789 924 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 925 rt->rt6i_flags & RTF_CACHE)
1ddef044 926 goto out;
1da177e4 927
d8d1f30b 928 dst_hold(&rt->dst);
c71099ac 929 read_unlock_bh(&table->tb6_lock);
fb9de91e 930
97cac082 931 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 932 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 933 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 934 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
935 else
936 goto out2;
e40cf353 937
94e187c0 938 ip6_rt_put(rt);
8ed67789 939 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 940
d8d1f30b 941 dst_hold(&rt->dst);
519fbd87 942 if (nrt) {
40e22e8f 943 err = ip6_ins_rt(nrt);
519fbd87 944 if (!err)
1da177e4 945 goto out2;
1da177e4 946 }
1da177e4 947
519fbd87
YH
948 if (--attempts <= 0)
949 goto out2;
950
951 /*
c71099ac 952 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
953 * released someone could insert this route. Relookup.
954 */
94e187c0 955 ip6_rt_put(rt);
519fbd87
YH
956 goto relookup;
957
958out:
8238dd06
YH
959 if (reachable) {
960 reachable = 0;
961 goto restart_2;
962 }
d8d1f30b 963 dst_hold(&rt->dst);
c71099ac 964 read_unlock_bh(&table->tb6_lock);
1da177e4 965out2:
d8d1f30b
CG
966 rt->dst.lastuse = jiffies;
967 rt->dst.__use++;
c71099ac
TG
968
969 return rt;
1da177e4
LT
970}
971
8ed67789 972static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 973 struct flowi6 *fl6, int flags)
4acad72d 974{
4c9483b2 975 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
976}
977
72331bc0
SL
978static struct dst_entry *ip6_route_input_lookup(struct net *net,
979 struct net_device *dev,
980 struct flowi6 *fl6, int flags)
981{
982 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
983 flags |= RT6_LOOKUP_F_IFACE;
984
985 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
986}
987
c71099ac
TG
988void ip6_route_input(struct sk_buff *skb)
989{
b71d1d42 990 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 991 struct net *net = dev_net(skb->dev);
adaa70bb 992 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
993 struct flowi6 fl6 = {
994 .flowi6_iif = skb->dev->ifindex,
995 .daddr = iph->daddr,
996 .saddr = iph->saddr,
38308473 997 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
998 .flowi6_mark = skb->mark,
999 .flowi6_proto = iph->nexthdr,
c71099ac 1000 };
adaa70bb 1001
72331bc0 1002 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1003}
1004
8ed67789 1005static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1006 struct flowi6 *fl6, int flags)
1da177e4 1007{
4c9483b2 1008 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1009}
1010
9c7a4f9c 1011struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 1012 struct flowi6 *fl6)
c71099ac
TG
1013{
1014 int flags = 0;
1015
1fb9489b 1016 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1017
4c9483b2 1018 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 1019 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1020
4c9483b2 1021 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 1022 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1023 else if (sk)
1024 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1025
4c9483b2 1026 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
1027}
1028
7159039a 1029EXPORT_SYMBOL(ip6_route_output);
1da177e4 1030
2774c131 1031struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1032{
5c1e6aa3 1033 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1034 struct dst_entry *new = NULL;
1035
f5b0a874 1036 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1037 if (rt) {
d8d1f30b 1038 new = &rt->dst;
14e50e57 1039
8104891b
SK
1040 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1041 rt6_init_peer(rt, net->ipv6.peers);
1042
14e50e57 1043 new->__use = 1;
352e512c
HX
1044 new->input = dst_discard;
1045 new->output = dst_discard;
14e50e57 1046
21efcfa0
ED
1047 if (dst_metrics_read_only(&ort->dst))
1048 new->_metrics = ort->dst._metrics;
1049 else
1050 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1051 rt->rt6i_idev = ort->rt6i_idev;
1052 if (rt->rt6i_idev)
1053 in6_dev_hold(rt->rt6i_idev);
14e50e57 1054
4e3fd7a0 1055 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1056 rt->rt6i_flags = ort->rt6i_flags;
1057 rt6_clean_expires(rt);
14e50e57
DM
1058 rt->rt6i_metric = 0;
1059
1060 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1061#ifdef CONFIG_IPV6_SUBTREES
1062 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1063#endif
1064
1065 dst_free(new);
1066 }
1067
69ead7af
DM
1068 dst_release(dst_orig);
1069 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1070}
14e50e57 1071
1da177e4
LT
1072/*
1073 * Destination cache support functions
1074 */
1075
1076static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1077{
1078 struct rt6_info *rt;
1079
1080 rt = (struct rt6_info *) dst;
1081
6f3118b5
ND
1082 /* All IPV6 dsts are created with ->obsolete set to the value
1083 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1084 * into this function always.
1085 */
1086 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1087 return NULL;
1088
a4477c4d 1089 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1da177e4 1090 return dst;
a4477c4d 1091
1da177e4
LT
1092 return NULL;
1093}
1094
1095static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1096{
1097 struct rt6_info *rt = (struct rt6_info *) dst;
1098
1099 if (rt) {
54c1a859
YH
1100 if (rt->rt6i_flags & RTF_CACHE) {
1101 if (rt6_check_expired(rt)) {
1102 ip6_del_rt(rt);
1103 dst = NULL;
1104 }
1105 } else {
1da177e4 1106 dst_release(dst);
54c1a859
YH
1107 dst = NULL;
1108 }
1da177e4 1109 }
54c1a859 1110 return dst;
1da177e4
LT
1111}
1112
1113static void ip6_link_failure(struct sk_buff *skb)
1114{
1115 struct rt6_info *rt;
1116
3ffe533c 1117 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1118
adf30907 1119 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1120 if (rt) {
1716a961
G
1121 if (rt->rt6i_flags & RTF_CACHE)
1122 rt6_update_expires(rt, 0);
1123 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1124 rt->rt6i_node->fn_sernum = -1;
1125 }
1126}
1127
6700c270
DM
1128static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1129 struct sk_buff *skb, u32 mtu)
1da177e4
LT
1130{
1131 struct rt6_info *rt6 = (struct rt6_info*)dst;
1132
81aded24 1133 dst_confirm(dst);
1da177e4 1134 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
81aded24
DM
1135 struct net *net = dev_net(dst->dev);
1136
1da177e4
LT
1137 rt6->rt6i_flags |= RTF_MODIFIED;
1138 if (mtu < IPV6_MIN_MTU) {
defb3519 1139 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1140 mtu = IPV6_MIN_MTU;
defb3519
DM
1141 features |= RTAX_FEATURE_ALLFRAG;
1142 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1143 }
defb3519 1144 dst_metric_set(dst, RTAX_MTU, mtu);
81aded24 1145 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1146 }
1147}
1148
42ae66c8
DM
1149void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1150 int oif, u32 mark)
81aded24
DM
1151{
1152 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1153 struct dst_entry *dst;
1154 struct flowi6 fl6;
1155
1156 memset(&fl6, 0, sizeof(fl6));
1157 fl6.flowi6_oif = oif;
1158 fl6.flowi6_mark = mark;
3e12939a 1159 fl6.flowi6_flags = 0;
81aded24
DM
1160 fl6.daddr = iph->daddr;
1161 fl6.saddr = iph->saddr;
1162 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1163
1164 dst = ip6_route_output(net, NULL, &fl6);
1165 if (!dst->error)
6700c270 1166 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
81aded24
DM
1167 dst_release(dst);
1168}
1169EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1170
1171void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1172{
1173 ip6_update_pmtu(skb, sock_net(sk), mtu,
1174 sk->sk_bound_dev_if, sk->sk_mark);
1175}
1176EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1177
3a5ad2ee
DM
1178void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1179{
1180 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1181 struct dst_entry *dst;
1182 struct flowi6 fl6;
1183
1184 memset(&fl6, 0, sizeof(fl6));
1185 fl6.flowi6_oif = oif;
1186 fl6.flowi6_mark = mark;
1187 fl6.flowi6_flags = 0;
1188 fl6.daddr = iph->daddr;
1189 fl6.saddr = iph->saddr;
1190 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1191
1192 dst = ip6_route_output(net, NULL, &fl6);
1193 if (!dst->error)
6700c270 1194 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1195 dst_release(dst);
1196}
1197EXPORT_SYMBOL_GPL(ip6_redirect);
1198
1199void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1200{
1201 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1202}
1203EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1204
0dbaee3b 1205static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1206{
0dbaee3b
DM
1207 struct net_device *dev = dst->dev;
1208 unsigned int mtu = dst_mtu(dst);
1209 struct net *net = dev_net(dev);
1210
1da177e4
LT
1211 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1212
5578689a
DL
1213 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1214 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1215
1216 /*
1ab1457c
YH
1217 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1218 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1219 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1220 * rely only on pmtu discovery"
1221 */
1222 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1223 mtu = IPV6_MAXPLEN;
1224 return mtu;
1225}
1226
ebb762f2 1227static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1228{
d33e4553 1229 struct inet6_dev *idev;
618f9bc7
SK
1230 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1231
1232 if (mtu)
1233 return mtu;
1234
1235 mtu = IPV6_MIN_MTU;
d33e4553
DM
1236
1237 rcu_read_lock();
1238 idev = __in6_dev_get(dst->dev);
1239 if (idev)
1240 mtu = idev->cnf.mtu6;
1241 rcu_read_unlock();
1242
1243 return mtu;
1244}
1245
3b00944c
YH
1246static struct dst_entry *icmp6_dst_gc_list;
1247static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1248
3b00944c 1249struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1250 struct neighbour *neigh,
87a11578 1251 struct flowi6 *fl6)
1da177e4 1252{
87a11578 1253 struct dst_entry *dst;
1da177e4
LT
1254 struct rt6_info *rt;
1255 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1256 struct net *net = dev_net(dev);
1da177e4 1257
38308473 1258 if (unlikely(!idev))
122bdf67 1259 return ERR_PTR(-ENODEV);
1da177e4 1260
8b96d22d 1261 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1262 if (unlikely(!rt)) {
1da177e4 1263 in6_dev_put(idev);
87a11578 1264 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1265 goto out;
1266 }
1267
1da177e4
LT
1268 if (neigh)
1269 neigh_hold(neigh);
14deae41 1270 else {
f894cbf8 1271 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
b43faac6 1272 if (IS_ERR(neigh)) {
252c3d84 1273 in6_dev_put(idev);
b43faac6
DM
1274 dst_free(&rt->dst);
1275 return ERR_CAST(neigh);
1276 }
14deae41 1277 }
1da177e4 1278
8e2ec639
YZ
1279 rt->dst.flags |= DST_HOST;
1280 rt->dst.output = ip6_output;
97cac082 1281 rt->n = neigh;
d8d1f30b 1282 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1283 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1284 rt->rt6i_dst.plen = 128;
1285 rt->rt6i_idev = idev;
14edd87d 1286 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1287
3b00944c 1288 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1289 rt->dst.next = icmp6_dst_gc_list;
1290 icmp6_dst_gc_list = &rt->dst;
3b00944c 1291 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1292
5578689a 1293 fib6_force_start_gc(net);
1da177e4 1294
87a11578
DM
1295 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1296
1da177e4 1297out:
87a11578 1298 return dst;
1da177e4
LT
1299}
1300
3d0f24a7 1301int icmp6_dst_gc(void)
1da177e4 1302{
e9476e95 1303 struct dst_entry *dst, **pprev;
3d0f24a7 1304 int more = 0;
1da177e4 1305
3b00944c
YH
1306 spin_lock_bh(&icmp6_dst_lock);
1307 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1308
1da177e4
LT
1309 while ((dst = *pprev) != NULL) {
1310 if (!atomic_read(&dst->__refcnt)) {
1311 *pprev = dst->next;
1312 dst_free(dst);
1da177e4
LT
1313 } else {
1314 pprev = &dst->next;
3d0f24a7 1315 ++more;
1da177e4
LT
1316 }
1317 }
1318
3b00944c 1319 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1320
3d0f24a7 1321 return more;
1da177e4
LT
1322}
1323
1e493d19
DM
1324static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1325 void *arg)
1326{
1327 struct dst_entry *dst, **pprev;
1328
1329 spin_lock_bh(&icmp6_dst_lock);
1330 pprev = &icmp6_dst_gc_list;
1331 while ((dst = *pprev) != NULL) {
1332 struct rt6_info *rt = (struct rt6_info *) dst;
1333 if (func(rt, arg)) {
1334 *pprev = dst->next;
1335 dst_free(dst);
1336 } else {
1337 pprev = &dst->next;
1338 }
1339 }
1340 spin_unlock_bh(&icmp6_dst_lock);
1341}
1342
569d3645 1343static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1344{
1da177e4 1345 unsigned long now = jiffies;
86393e52 1346 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1347 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1348 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1349 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1350 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1351 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1352 int entries;
7019b78e 1353
fc66f95c 1354 entries = dst_entries_get_fast(ops);
7019b78e 1355 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1356 entries <= rt_max_size)
1da177e4
LT
1357 goto out;
1358
6891a346
BT
1359 net->ipv6.ip6_rt_gc_expire++;
1360 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1361 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1362 entries = dst_entries_get_slow(ops);
1363 if (entries < ops->gc_thresh)
7019b78e 1364 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1365out:
7019b78e 1366 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1367 return entries > rt_max_size;
1da177e4
LT
1368}
1369
6b75d090 1370int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1371{
5170ae82 1372 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1373 if (hoplimit == 0) {
6b75d090 1374 struct net_device *dev = dst->dev;
c68f24cc
ED
1375 struct inet6_dev *idev;
1376
1377 rcu_read_lock();
1378 idev = __in6_dev_get(dev);
1379 if (idev)
6b75d090 1380 hoplimit = idev->cnf.hop_limit;
c68f24cc 1381 else
53b7997f 1382 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1383 rcu_read_unlock();
1da177e4
LT
1384 }
1385 return hoplimit;
1386}
abbf46ae 1387EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1388
1389/*
1390 *
1391 */
1392
86872cb5 1393int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1394{
1395 int err;
5578689a 1396 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1397 struct rt6_info *rt = NULL;
1398 struct net_device *dev = NULL;
1399 struct inet6_dev *idev = NULL;
c71099ac 1400 struct fib6_table *table;
1da177e4
LT
1401 int addr_type;
1402
86872cb5 1403 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1404 return -EINVAL;
1405#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1406 if (cfg->fc_src_len)
1da177e4
LT
1407 return -EINVAL;
1408#endif
86872cb5 1409 if (cfg->fc_ifindex) {
1da177e4 1410 err = -ENODEV;
5578689a 1411 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1412 if (!dev)
1413 goto out;
1414 idev = in6_dev_get(dev);
1415 if (!idev)
1416 goto out;
1417 }
1418
86872cb5
TG
1419 if (cfg->fc_metric == 0)
1420 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1421
d71314b4 1422 err = -ENOBUFS;
38308473
DM
1423 if (cfg->fc_nlinfo.nlh &&
1424 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1425 table = fib6_get_table(net, cfg->fc_table);
38308473 1426 if (!table) {
f3213831 1427 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1428 table = fib6_new_table(net, cfg->fc_table);
1429 }
1430 } else {
1431 table = fib6_new_table(net, cfg->fc_table);
1432 }
38308473
DM
1433
1434 if (!table)
c71099ac 1435 goto out;
c71099ac 1436
8b96d22d 1437 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1da177e4 1438
38308473 1439 if (!rt) {
1da177e4
LT
1440 err = -ENOMEM;
1441 goto out;
1442 }
1443
1716a961
G
1444 if (cfg->fc_flags & RTF_EXPIRES)
1445 rt6_set_expires(rt, jiffies +
1446 clock_t_to_jiffies(cfg->fc_expires));
1447 else
1448 rt6_clean_expires(rt);
1da177e4 1449
86872cb5
TG
1450 if (cfg->fc_protocol == RTPROT_UNSPEC)
1451 cfg->fc_protocol = RTPROT_BOOT;
1452 rt->rt6i_protocol = cfg->fc_protocol;
1453
1454 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1455
1456 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1457 rt->dst.input = ip6_mc_input;
ab79ad14
1458 else if (cfg->fc_flags & RTF_LOCAL)
1459 rt->dst.input = ip6_input;
1da177e4 1460 else
d8d1f30b 1461 rt->dst.input = ip6_forward;
1da177e4 1462
d8d1f30b 1463 rt->dst.output = ip6_output;
1da177e4 1464
86872cb5
TG
1465 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1466 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1467 if (rt->rt6i_dst.plen == 128)
11d53b49 1468 rt->dst.flags |= DST_HOST;
1da177e4 1469
8e2ec639
YZ
1470 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1471 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1472 if (!metrics) {
1473 err = -ENOMEM;
1474 goto out;
1475 }
1476 dst_init_metrics(&rt->dst, metrics, 0);
1477 }
1da177e4 1478#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1479 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1480 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1481#endif
1482
86872cb5 1483 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1484
1485 /* We cannot add true routes via loopback here,
1486 they would result in kernel looping; promote them to reject routes
1487 */
86872cb5 1488 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1489 (dev && (dev->flags & IFF_LOOPBACK) &&
1490 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1491 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1492 /* hold loopback dev/idev if we haven't done so. */
5578689a 1493 if (dev != net->loopback_dev) {
1da177e4
LT
1494 if (dev) {
1495 dev_put(dev);
1496 in6_dev_put(idev);
1497 }
5578689a 1498 dev = net->loopback_dev;
1da177e4
LT
1499 dev_hold(dev);
1500 idev = in6_dev_get(dev);
1501 if (!idev) {
1502 err = -ENODEV;
1503 goto out;
1504 }
1505 }
d8d1f30b
CG
1506 rt->dst.output = ip6_pkt_discard_out;
1507 rt->dst.input = ip6_pkt_discard;
1da177e4 1508 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1509 switch (cfg->fc_type) {
1510 case RTN_BLACKHOLE:
1511 rt->dst.error = -EINVAL;
1512 break;
1513 case RTN_PROHIBIT:
1514 rt->dst.error = -EACCES;
1515 break;
b4949ab2
ND
1516 case RTN_THROW:
1517 rt->dst.error = -EAGAIN;
1518 break;
ef2c7d7b
ND
1519 default:
1520 rt->dst.error = -ENETUNREACH;
1521 break;
1522 }
1da177e4
LT
1523 goto install_route;
1524 }
1525
86872cb5 1526 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1527 const struct in6_addr *gw_addr;
1da177e4
LT
1528 int gwa_type;
1529
86872cb5 1530 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1531 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1532 gwa_type = ipv6_addr_type(gw_addr);
1533
1534 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1535 struct rt6_info *grt;
1536
1537 /* IPv6 strictly inhibits using not link-local
1538 addresses as nexthop address.
1539 Otherwise, router will not able to send redirects.
1540 It is very good, but in some (rare!) circumstances
1541 (SIT, PtP, NBMA NOARP links) it is handy to allow
1542 some exceptions. --ANK
1543 */
1544 err = -EINVAL;
38308473 1545 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1546 goto out;
1547
5578689a 1548 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1549
1550 err = -EHOSTUNREACH;
38308473 1551 if (!grt)
1da177e4
LT
1552 goto out;
1553 if (dev) {
d1918542 1554 if (dev != grt->dst.dev) {
94e187c0 1555 ip6_rt_put(grt);
1da177e4
LT
1556 goto out;
1557 }
1558 } else {
d1918542 1559 dev = grt->dst.dev;
1da177e4
LT
1560 idev = grt->rt6i_idev;
1561 dev_hold(dev);
1562 in6_dev_hold(grt->rt6i_idev);
1563 }
38308473 1564 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1565 err = 0;
94e187c0 1566 ip6_rt_put(grt);
1da177e4
LT
1567
1568 if (err)
1569 goto out;
1570 }
1571 err = -EINVAL;
38308473 1572 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1573 goto out;
1574 }
1575
1576 err = -ENODEV;
38308473 1577 if (!dev)
1da177e4
LT
1578 goto out;
1579
c3968a85
DW
1580 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1581 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1582 err = -EINVAL;
1583 goto out;
1584 }
4e3fd7a0 1585 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1586 rt->rt6i_prefsrc.plen = 128;
1587 } else
1588 rt->rt6i_prefsrc.plen = 0;
1589
86872cb5 1590 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1591 err = rt6_bind_neighbour(rt, dev);
f83c7790 1592 if (err)
1da177e4 1593 goto out;
1da177e4
LT
1594 }
1595
86872cb5 1596 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1597
1598install_route:
86872cb5
TG
1599 if (cfg->fc_mx) {
1600 struct nlattr *nla;
1601 int remaining;
1602
1603 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1604 int type = nla_type(nla);
86872cb5
TG
1605
1606 if (type) {
1607 if (type > RTAX_MAX) {
1da177e4
LT
1608 err = -EINVAL;
1609 goto out;
1610 }
86872cb5 1611
defb3519 1612 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1613 }
1da177e4
LT
1614 }
1615 }
1616
d8d1f30b 1617 rt->dst.dev = dev;
1da177e4 1618 rt->rt6i_idev = idev;
c71099ac 1619 rt->rt6i_table = table;
63152fc0 1620
c346dca1 1621 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1622
86872cb5 1623 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1624
1625out:
1626 if (dev)
1627 dev_put(dev);
1628 if (idev)
1629 in6_dev_put(idev);
1630 if (rt)
d8d1f30b 1631 dst_free(&rt->dst);
1da177e4
LT
1632 return err;
1633}
1634
86872cb5 1635static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1636{
1637 int err;
c71099ac 1638 struct fib6_table *table;
d1918542 1639 struct net *net = dev_net(rt->dst.dev);
1da177e4 1640
6825a26c
G
1641 if (rt == net->ipv6.ip6_null_entry) {
1642 err = -ENOENT;
1643 goto out;
1644 }
6c813a72 1645
c71099ac
TG
1646 table = rt->rt6i_table;
1647 write_lock_bh(&table->tb6_lock);
86872cb5 1648 err = fib6_del(rt, info);
c71099ac 1649 write_unlock_bh(&table->tb6_lock);
1da177e4 1650
6825a26c 1651out:
94e187c0 1652 ip6_rt_put(rt);
1da177e4
LT
1653 return err;
1654}
1655
e0a1ad73
TG
1656int ip6_del_rt(struct rt6_info *rt)
1657{
4d1169c1 1658 struct nl_info info = {
d1918542 1659 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1660 };
528c4ceb 1661 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1662}
1663
86872cb5 1664static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1665{
c71099ac 1666 struct fib6_table *table;
1da177e4
LT
1667 struct fib6_node *fn;
1668 struct rt6_info *rt;
1669 int err = -ESRCH;
1670
5578689a 1671 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1672 if (!table)
c71099ac
TG
1673 return err;
1674
1675 read_lock_bh(&table->tb6_lock);
1da177e4 1676
c71099ac 1677 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1678 &cfg->fc_dst, cfg->fc_dst_len,
1679 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1680
1da177e4 1681 if (fn) {
d8d1f30b 1682 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1683 if (cfg->fc_ifindex &&
d1918542
DM
1684 (!rt->dst.dev ||
1685 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1686 continue;
86872cb5
TG
1687 if (cfg->fc_flags & RTF_GATEWAY &&
1688 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1689 continue;
86872cb5 1690 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1691 continue;
d8d1f30b 1692 dst_hold(&rt->dst);
c71099ac 1693 read_unlock_bh(&table->tb6_lock);
1da177e4 1694
86872cb5 1695 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1696 }
1697 }
c71099ac 1698 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1699
1700 return err;
1701}
1702
6700c270 1703static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 1704{
e8599ff4 1705 struct net *net = dev_net(skb->dev);
a6279458 1706 struct netevent_redirect netevent;
e8599ff4 1707 struct rt6_info *rt, *nrt = NULL;
e8599ff4 1708 struct ndisc_options ndopts;
6e157b6a 1709 struct neighbour *old_neigh;
e8599ff4
DM
1710 struct inet6_dev *in6_dev;
1711 struct neighbour *neigh;
71bcdba0 1712 struct rd_msg *msg;
6e157b6a
DM
1713 int optlen, on_link;
1714 u8 *lladdr;
e8599ff4
DM
1715
1716 optlen = skb->tail - skb->transport_header;
71bcdba0 1717 optlen -= sizeof(*msg);
e8599ff4
DM
1718
1719 if (optlen < 0) {
6e157b6a 1720 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
1721 return;
1722 }
1723
71bcdba0 1724 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 1725
71bcdba0 1726 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 1727 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
1728 return;
1729 }
1730
6e157b6a 1731 on_link = 0;
71bcdba0 1732 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 1733 on_link = 1;
71bcdba0 1734 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 1735 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 1736 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
1737 return;
1738 }
1739
1740 in6_dev = __in6_dev_get(skb->dev);
1741 if (!in6_dev)
1742 return;
1743 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1744 return;
1745
1746 /* RFC2461 8.1:
1747 * The IP source address of the Redirect MUST be the same as the current
1748 * first-hop router for the specified ICMP Destination Address.
1749 */
1750
71bcdba0 1751 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
e8599ff4
DM
1752 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1753 return;
1754 }
6e157b6a
DM
1755
1756 lladdr = NULL;
e8599ff4
DM
1757 if (ndopts.nd_opts_tgt_lladdr) {
1758 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1759 skb->dev);
1760 if (!lladdr) {
1761 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1762 return;
1763 }
1764 }
1765
6e157b6a
DM
1766 rt = (struct rt6_info *) dst;
1767 if (rt == net->ipv6.ip6_null_entry) {
1768 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 1769 return;
6e157b6a 1770 }
e8599ff4 1771
6e157b6a
DM
1772 /* Redirect received -> path was valid.
1773 * Look, redirects are sent only in response to data packets,
1774 * so that this nexthop apparently is reachable. --ANK
1775 */
1776 dst_confirm(&rt->dst);
a6279458 1777
71bcdba0 1778 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
1779 if (!neigh)
1780 return;
a6279458 1781
6e157b6a
DM
1782 /* Duplicate redirect: silently ignore. */
1783 old_neigh = rt->n;
1784 if (neigh == old_neigh)
a6279458 1785 goto out;
1da177e4 1786
1da177e4
LT
1787 /*
1788 * We have finally decided to accept it.
1789 */
1790
1ab1457c 1791 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1792 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1793 NEIGH_UPDATE_F_OVERRIDE|
1794 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1795 NEIGH_UPDATE_F_ISROUTER))
1796 );
1797
71bcdba0 1798 nrt = ip6_rt_copy(rt, &msg->dest);
38308473 1799 if (!nrt)
1da177e4
LT
1800 goto out;
1801
1802 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1803 if (on_link)
1804 nrt->rt6i_flags &= ~RTF_GATEWAY;
1805
4e3fd7a0 1806 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
97cac082 1807 nrt->n = neigh_clone(neigh);
1da177e4 1808
40e22e8f 1809 if (ip6_ins_rt(nrt))
1da177e4
LT
1810 goto out;
1811
d8d1f30b 1812 netevent.old = &rt->dst;
1d248b1c 1813 netevent.old_neigh = old_neigh;
d8d1f30b 1814 netevent.new = &nrt->dst;
1d248b1c 1815 netevent.new_neigh = neigh;
71bcdba0 1816 netevent.daddr = &msg->dest;
8d71740c
TT
1817 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1818
38308473 1819 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 1820 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 1821 ip6_del_rt(rt);
1da177e4
LT
1822 }
1823
1824out:
e8599ff4 1825 neigh_release(neigh);
6e157b6a
DM
1826}
1827
1da177e4
LT
1828/*
1829 * Misc support functions
1830 */
1831
1716a961 1832static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1833 const struct in6_addr *dest)
1da177e4 1834{
d1918542 1835 struct net *net = dev_net(ort->dst.dev);
8b96d22d
DM
1836 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1837 ort->rt6i_table);
1da177e4
LT
1838
1839 if (rt) {
d8d1f30b
CG
1840 rt->dst.input = ort->dst.input;
1841 rt->dst.output = ort->dst.output;
8e2ec639 1842 rt->dst.flags |= DST_HOST;
d8d1f30b 1843
4e3fd7a0 1844 rt->rt6i_dst.addr = *dest;
8e2ec639 1845 rt->rt6i_dst.plen = 128;
defb3519 1846 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1847 rt->dst.error = ort->dst.error;
1da177e4
LT
1848 rt->rt6i_idev = ort->rt6i_idev;
1849 if (rt->rt6i_idev)
1850 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1851 rt->dst.lastuse = jiffies;
1da177e4 1852
4e3fd7a0 1853 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1854 rt->rt6i_flags = ort->rt6i_flags;
1855 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1856 (RTF_DEFAULT | RTF_ADDRCONF))
1857 rt6_set_from(rt, ort);
1858 else
1859 rt6_clean_expires(rt);
1da177e4
LT
1860 rt->rt6i_metric = 0;
1861
1da177e4
LT
1862#ifdef CONFIG_IPV6_SUBTREES
1863 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1864#endif
0f6c6392 1865 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1866 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1867 }
1868 return rt;
1869}
1870
70ceb4f5 1871#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1872static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1873 const struct in6_addr *prefix, int prefixlen,
1874 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1875{
1876 struct fib6_node *fn;
1877 struct rt6_info *rt = NULL;
c71099ac
TG
1878 struct fib6_table *table;
1879
efa2cea0 1880 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1881 if (!table)
c71099ac 1882 return NULL;
70ceb4f5 1883
5744dd9b 1884 read_lock_bh(&table->tb6_lock);
c71099ac 1885 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1886 if (!fn)
1887 goto out;
1888
d8d1f30b 1889 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1890 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1891 continue;
1892 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1893 continue;
1894 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1895 continue;
d8d1f30b 1896 dst_hold(&rt->dst);
70ceb4f5
YH
1897 break;
1898 }
1899out:
5744dd9b 1900 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1901 return rt;
1902}
1903
efa2cea0 1904static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1905 const struct in6_addr *prefix, int prefixlen,
1906 const struct in6_addr *gwaddr, int ifindex,
95c96174 1907 unsigned int pref)
70ceb4f5 1908{
86872cb5
TG
1909 struct fib6_config cfg = {
1910 .fc_table = RT6_TABLE_INFO,
238fc7ea 1911 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1912 .fc_ifindex = ifindex,
1913 .fc_dst_len = prefixlen,
1914 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1915 RTF_UP | RTF_PREF(pref),
15e47304 1916 .fc_nlinfo.portid = 0,
efa2cea0
DL
1917 .fc_nlinfo.nlh = NULL,
1918 .fc_nlinfo.nl_net = net,
86872cb5
TG
1919 };
1920
4e3fd7a0
AD
1921 cfg.fc_dst = *prefix;
1922 cfg.fc_gateway = *gwaddr;
70ceb4f5 1923
e317da96
YH
1924 /* We should treat it as a default route if prefix length is 0. */
1925 if (!prefixlen)
86872cb5 1926 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1927
86872cb5 1928 ip6_route_add(&cfg);
70ceb4f5 1929
efa2cea0 1930 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1931}
1932#endif
1933
b71d1d42 1934struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1935{
1da177e4 1936 struct rt6_info *rt;
c71099ac 1937 struct fib6_table *table;
1da177e4 1938
c346dca1 1939 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1940 if (!table)
c71099ac 1941 return NULL;
1da177e4 1942
5744dd9b 1943 read_lock_bh(&table->tb6_lock);
d8d1f30b 1944 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1945 if (dev == rt->dst.dev &&
045927ff 1946 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1947 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1948 break;
1949 }
1950 if (rt)
d8d1f30b 1951 dst_hold(&rt->dst);
5744dd9b 1952 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1953 return rt;
1954}
1955
b71d1d42 1956struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1957 struct net_device *dev,
1958 unsigned int pref)
1da177e4 1959{
86872cb5
TG
1960 struct fib6_config cfg = {
1961 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1962 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1963 .fc_ifindex = dev->ifindex,
1964 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1965 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 1966 .fc_nlinfo.portid = 0,
5578689a 1967 .fc_nlinfo.nlh = NULL,
c346dca1 1968 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1969 };
1da177e4 1970
4e3fd7a0 1971 cfg.fc_gateway = *gwaddr;
1da177e4 1972
86872cb5 1973 ip6_route_add(&cfg);
1da177e4 1974
1da177e4
LT
1975 return rt6_get_dflt_router(gwaddr, dev);
1976}
1977
7b4da532 1978void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1979{
1980 struct rt6_info *rt;
c71099ac
TG
1981 struct fib6_table *table;
1982
1983 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1984 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1985 if (!table)
c71099ac 1986 return;
1da177e4
LT
1987
1988restart:
c71099ac 1989 read_lock_bh(&table->tb6_lock);
d8d1f30b 1990 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1991 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1992 dst_hold(&rt->dst);
c71099ac 1993 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1994 ip6_del_rt(rt);
1da177e4
LT
1995 goto restart;
1996 }
1997 }
c71099ac 1998 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1999}
2000
5578689a
DL
2001static void rtmsg_to_fib6_config(struct net *net,
2002 struct in6_rtmsg *rtmsg,
86872cb5
TG
2003 struct fib6_config *cfg)
2004{
2005 memset(cfg, 0, sizeof(*cfg));
2006
2007 cfg->fc_table = RT6_TABLE_MAIN;
2008 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2009 cfg->fc_metric = rtmsg->rtmsg_metric;
2010 cfg->fc_expires = rtmsg->rtmsg_info;
2011 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2012 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2013 cfg->fc_flags = rtmsg->rtmsg_flags;
2014
5578689a 2015 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2016
4e3fd7a0
AD
2017 cfg->fc_dst = rtmsg->rtmsg_dst;
2018 cfg->fc_src = rtmsg->rtmsg_src;
2019 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2020}
2021
5578689a 2022int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2023{
86872cb5 2024 struct fib6_config cfg;
1da177e4
LT
2025 struct in6_rtmsg rtmsg;
2026 int err;
2027
2028 switch(cmd) {
2029 case SIOCADDRT: /* Add a route */
2030 case SIOCDELRT: /* Delete a route */
af31f412 2031 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2032 return -EPERM;
2033 err = copy_from_user(&rtmsg, arg,
2034 sizeof(struct in6_rtmsg));
2035 if (err)
2036 return -EFAULT;
86872cb5 2037
5578689a 2038 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2039
1da177e4
LT
2040 rtnl_lock();
2041 switch (cmd) {
2042 case SIOCADDRT:
86872cb5 2043 err = ip6_route_add(&cfg);
1da177e4
LT
2044 break;
2045 case SIOCDELRT:
86872cb5 2046 err = ip6_route_del(&cfg);
1da177e4
LT
2047 break;
2048 default:
2049 err = -EINVAL;
2050 }
2051 rtnl_unlock();
2052
2053 return err;
3ff50b79 2054 }
1da177e4
LT
2055
2056 return -EINVAL;
2057}
2058
2059/*
2060 * Drop the packet on the floor
2061 */
2062
d5fdd6ba 2063static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2064{
612f09e8 2065 int type;
adf30907 2066 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2067 switch (ipstats_mib_noroutes) {
2068 case IPSTATS_MIB_INNOROUTES:
0660e03f 2069 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2070 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2071 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2072 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2073 break;
2074 }
2075 /* FALLTHROUGH */
2076 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2077 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2078 ipstats_mib_noroutes);
612f09e8
YH
2079 break;
2080 }
3ffe533c 2081 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2082 kfree_skb(skb);
2083 return 0;
2084}
2085
9ce8ade0
TG
2086static int ip6_pkt_discard(struct sk_buff *skb)
2087{
612f09e8 2088 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2089}
2090
20380731 2091static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2092{
adf30907 2093 skb->dev = skb_dst(skb)->dev;
612f09e8 2094 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2095}
2096
6723ab54
DM
2097#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2098
9ce8ade0
TG
2099static int ip6_pkt_prohibit(struct sk_buff *skb)
2100{
612f09e8 2101 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2102}
2103
2104static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2105{
adf30907 2106 skb->dev = skb_dst(skb)->dev;
612f09e8 2107 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2108}
2109
6723ab54
DM
2110#endif
2111
1da177e4
LT
2112/*
2113 * Allocate a dst for local (unicast / anycast) address.
2114 */
2115
2116struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2117 const struct in6_addr *addr,
8f031519 2118 bool anycast)
1da177e4 2119{
c346dca1 2120 struct net *net = dev_net(idev->dev);
8b96d22d 2121 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
f83c7790 2122 int err;
1da177e4 2123
38308473 2124 if (!rt) {
f3213831 2125 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2126 return ERR_PTR(-ENOMEM);
40385653 2127 }
1da177e4 2128
1da177e4
LT
2129 in6_dev_hold(idev);
2130
11d53b49 2131 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2132 rt->dst.input = ip6_input;
2133 rt->dst.output = ip6_output;
1da177e4 2134 rt->rt6i_idev = idev;
1da177e4
LT
2135
2136 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2137 if (anycast)
2138 rt->rt6i_flags |= RTF_ANYCAST;
2139 else
1da177e4 2140 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2141 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2142 if (err) {
d8d1f30b 2143 dst_free(&rt->dst);
f83c7790 2144 return ERR_PTR(err);
1da177e4
LT
2145 }
2146
4e3fd7a0 2147 rt->rt6i_dst.addr = *addr;
1da177e4 2148 rt->rt6i_dst.plen = 128;
5578689a 2149 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2150
d8d1f30b 2151 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2152
2153 return rt;
2154}
2155
c3968a85
DW
2156int ip6_route_get_saddr(struct net *net,
2157 struct rt6_info *rt,
b71d1d42 2158 const struct in6_addr *daddr,
c3968a85
DW
2159 unsigned int prefs,
2160 struct in6_addr *saddr)
2161{
2162 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2163 int err = 0;
2164 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2165 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2166 else
2167 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2168 daddr, prefs, saddr);
2169 return err;
2170}
2171
2172/* remove deleted ip from prefsrc entries */
2173struct arg_dev_net_ip {
2174 struct net_device *dev;
2175 struct net *net;
2176 struct in6_addr *addr;
2177};
2178
2179static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2180{
2181 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2182 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2183 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2184
d1918542 2185 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2186 rt != net->ipv6.ip6_null_entry &&
2187 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2188 /* remove prefsrc entry */
2189 rt->rt6i_prefsrc.plen = 0;
2190 }
2191 return 0;
2192}
2193
2194void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2195{
2196 struct net *net = dev_net(ifp->idev->dev);
2197 struct arg_dev_net_ip adni = {
2198 .dev = ifp->idev->dev,
2199 .net = net,
2200 .addr = &ifp->addr,
2201 };
2202 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2203}
2204
8ed67789
DL
2205struct arg_dev_net {
2206 struct net_device *dev;
2207 struct net *net;
2208};
2209
1da177e4
LT
2210static int fib6_ifdown(struct rt6_info *rt, void *arg)
2211{
bc3ef660 2212 const struct arg_dev_net *adn = arg;
2213 const struct net_device *dev = adn->dev;
8ed67789 2214
d1918542 2215 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2216 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2217 return -1;
c159d30c 2218
1da177e4
LT
2219 return 0;
2220}
2221
f3db4851 2222void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2223{
8ed67789
DL
2224 struct arg_dev_net adn = {
2225 .dev = dev,
2226 .net = net,
2227 };
2228
2229 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2230 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2231}
2232
95c96174 2233struct rt6_mtu_change_arg {
1da177e4 2234 struct net_device *dev;
95c96174 2235 unsigned int mtu;
1da177e4
LT
2236};
2237
2238static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2239{
2240 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2241 struct inet6_dev *idev;
2242
2243 /* In IPv6 pmtu discovery is not optional,
2244 so that RTAX_MTU lock cannot disable it.
2245 We still use this lock to block changes
2246 caused by addrconf/ndisc.
2247 */
2248
2249 idev = __in6_dev_get(arg->dev);
38308473 2250 if (!idev)
1da177e4
LT
2251 return 0;
2252
2253 /* For administrative MTU increase, there is no way to discover
2254 IPv6 PMTU increase, so PMTU increase should be updated here.
2255 Since RFC 1981 doesn't include administrative MTU increase
2256 update PMTU increase is a MUST. (i.e. jumbo frame)
2257 */
2258 /*
2259 If new MTU is less than route PMTU, this new MTU will be the
2260 lowest MTU in the path, update the route PMTU to reflect PMTU
2261 decreases; if new MTU is greater than route PMTU, and the
2262 old MTU is the lowest MTU in the path, update the route PMTU
2263 to reflect the increase. In this case if the other nodes' MTU
2264 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2265 PMTU discouvery.
2266 */
d1918542 2267 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2268 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2269 (dst_mtu(&rt->dst) >= arg->mtu ||
2270 (dst_mtu(&rt->dst) < arg->mtu &&
2271 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2272 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2273 }
1da177e4
LT
2274 return 0;
2275}
2276
95c96174 2277void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2278{
c71099ac
TG
2279 struct rt6_mtu_change_arg arg = {
2280 .dev = dev,
2281 .mtu = mtu,
2282 };
1da177e4 2283
c346dca1 2284 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2285}
2286
ef7c79ed 2287static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2288 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2289 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2290 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2291 [RTA_PRIORITY] = { .type = NLA_U32 },
2292 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2293 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
86872cb5
TG
2294};
2295
2296static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2297 struct fib6_config *cfg)
1da177e4 2298{
86872cb5
TG
2299 struct rtmsg *rtm;
2300 struct nlattr *tb[RTA_MAX+1];
2301 int err;
1da177e4 2302
86872cb5
TG
2303 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2304 if (err < 0)
2305 goto errout;
1da177e4 2306
86872cb5
TG
2307 err = -EINVAL;
2308 rtm = nlmsg_data(nlh);
2309 memset(cfg, 0, sizeof(*cfg));
2310
2311 cfg->fc_table = rtm->rtm_table;
2312 cfg->fc_dst_len = rtm->rtm_dst_len;
2313 cfg->fc_src_len = rtm->rtm_src_len;
2314 cfg->fc_flags = RTF_UP;
2315 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2316 cfg->fc_type = rtm->rtm_type;
86872cb5 2317
ef2c7d7b
ND
2318 if (rtm->rtm_type == RTN_UNREACHABLE ||
2319 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2320 rtm->rtm_type == RTN_PROHIBIT ||
2321 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2322 cfg->fc_flags |= RTF_REJECT;
2323
ab79ad14
2324 if (rtm->rtm_type == RTN_LOCAL)
2325 cfg->fc_flags |= RTF_LOCAL;
2326
15e47304 2327 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2328 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2329 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2330
2331 if (tb[RTA_GATEWAY]) {
2332 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2333 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2334 }
86872cb5
TG
2335
2336 if (tb[RTA_DST]) {
2337 int plen = (rtm->rtm_dst_len + 7) >> 3;
2338
2339 if (nla_len(tb[RTA_DST]) < plen)
2340 goto errout;
2341
2342 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2343 }
86872cb5
TG
2344
2345 if (tb[RTA_SRC]) {
2346 int plen = (rtm->rtm_src_len + 7) >> 3;
2347
2348 if (nla_len(tb[RTA_SRC]) < plen)
2349 goto errout;
2350
2351 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2352 }
86872cb5 2353
c3968a85
DW
2354 if (tb[RTA_PREFSRC])
2355 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2356
86872cb5
TG
2357 if (tb[RTA_OIF])
2358 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2359
2360 if (tb[RTA_PRIORITY])
2361 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2362
2363 if (tb[RTA_METRICS]) {
2364 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2365 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2366 }
86872cb5
TG
2367
2368 if (tb[RTA_TABLE])
2369 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2370
51ebd318
ND
2371 if (tb[RTA_MULTIPATH]) {
2372 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2373 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2374 }
2375
86872cb5
TG
2376 err = 0;
2377errout:
2378 return err;
1da177e4
LT
2379}
2380
51ebd318
ND
2381static int ip6_route_multipath(struct fib6_config *cfg, int add)
2382{
2383 struct fib6_config r_cfg;
2384 struct rtnexthop *rtnh;
2385 int remaining;
2386 int attrlen;
2387 int err = 0, last_err = 0;
2388
2389beginning:
2390 rtnh = (struct rtnexthop *)cfg->fc_mp;
2391 remaining = cfg->fc_mp_len;
2392
2393 /* Parse a Multipath Entry */
2394 while (rtnh_ok(rtnh, remaining)) {
2395 memcpy(&r_cfg, cfg, sizeof(*cfg));
2396 if (rtnh->rtnh_ifindex)
2397 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2398
2399 attrlen = rtnh_attrlen(rtnh);
2400 if (attrlen > 0) {
2401 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2402
2403 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2404 if (nla) {
2405 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2406 r_cfg.fc_flags |= RTF_GATEWAY;
2407 }
2408 }
2409 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2410 if (err) {
2411 last_err = err;
2412 /* If we are trying to remove a route, do not stop the
2413 * loop when ip6_route_del() fails (because next hop is
2414 * already gone), we should try to remove all next hops.
2415 */
2416 if (add) {
2417 /* If add fails, we should try to delete all
2418 * next hops that have been already added.
2419 */
2420 add = 0;
2421 goto beginning;
2422 }
2423 }
1a72418b
ND
2424 /* Because each route is added like a single route we remove
2425 * this flag after the first nexthop (if there is a collision,
2426 * we have already fail to add the first nexthop:
2427 * fib6_add_rt2node() has reject it).
2428 */
2429 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
51ebd318
ND
2430 rtnh = rtnh_next(rtnh, &remaining);
2431 }
2432
2433 return last_err;
2434}
2435
c127ea2c 2436static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2437{
86872cb5
TG
2438 struct fib6_config cfg;
2439 int err;
1da177e4 2440
86872cb5
TG
2441 err = rtm_to_fib6_config(skb, nlh, &cfg);
2442 if (err < 0)
2443 return err;
2444
51ebd318
ND
2445 if (cfg.fc_mp)
2446 return ip6_route_multipath(&cfg, 0);
2447 else
2448 return ip6_route_del(&cfg);
1da177e4
LT
2449}
2450
c127ea2c 2451static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2452{
86872cb5
TG
2453 struct fib6_config cfg;
2454 int err;
1da177e4 2455
86872cb5
TG
2456 err = rtm_to_fib6_config(skb, nlh, &cfg);
2457 if (err < 0)
2458 return err;
2459
51ebd318
ND
2460 if (cfg.fc_mp)
2461 return ip6_route_multipath(&cfg, 1);
2462 else
2463 return ip6_route_add(&cfg);
1da177e4
LT
2464}
2465
339bf98f
TG
2466static inline size_t rt6_nlmsg_size(void)
2467{
2468 return NLMSG_ALIGN(sizeof(struct rtmsg))
2469 + nla_total_size(16) /* RTA_SRC */
2470 + nla_total_size(16) /* RTA_DST */
2471 + nla_total_size(16) /* RTA_GATEWAY */
2472 + nla_total_size(16) /* RTA_PREFSRC */
2473 + nla_total_size(4) /* RTA_TABLE */
2474 + nla_total_size(4) /* RTA_IIF */
2475 + nla_total_size(4) /* RTA_OIF */
2476 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2477 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2478 + nla_total_size(sizeof(struct rta_cacheinfo));
2479}
2480
191cd582
BH
2481static int rt6_fill_node(struct net *net,
2482 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 2483 struct in6_addr *dst, struct in6_addr *src,
15e47304 2484 int iif, int type, u32 portid, u32 seq,
7bc570c8 2485 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2486{
2487 struct rtmsg *rtm;
2d7202bf 2488 struct nlmsghdr *nlh;
e3703b3d 2489 long expires;
9e762a4a 2490 u32 table;
f2c31e32 2491 struct neighbour *n;
1da177e4
LT
2492
2493 if (prefix) { /* user wants prefix routes only */
2494 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2495 /* success since this is not a prefix route */
2496 return 1;
2497 }
2498 }
2499
15e47304 2500 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 2501 if (!nlh)
26932566 2502 return -EMSGSIZE;
2d7202bf
TG
2503
2504 rtm = nlmsg_data(nlh);
1da177e4
LT
2505 rtm->rtm_family = AF_INET6;
2506 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2507 rtm->rtm_src_len = rt->rt6i_src.plen;
2508 rtm->rtm_tos = 0;
c71099ac 2509 if (rt->rt6i_table)
9e762a4a 2510 table = rt->rt6i_table->tb6_id;
c71099ac 2511 else
9e762a4a
PM
2512 table = RT6_TABLE_UNSPEC;
2513 rtm->rtm_table = table;
c78679e8
DM
2514 if (nla_put_u32(skb, RTA_TABLE, table))
2515 goto nla_put_failure;
ef2c7d7b
ND
2516 if (rt->rt6i_flags & RTF_REJECT) {
2517 switch (rt->dst.error) {
2518 case -EINVAL:
2519 rtm->rtm_type = RTN_BLACKHOLE;
2520 break;
2521 case -EACCES:
2522 rtm->rtm_type = RTN_PROHIBIT;
2523 break;
b4949ab2
ND
2524 case -EAGAIN:
2525 rtm->rtm_type = RTN_THROW;
2526 break;
ef2c7d7b
ND
2527 default:
2528 rtm->rtm_type = RTN_UNREACHABLE;
2529 break;
2530 }
2531 }
38308473 2532 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2533 rtm->rtm_type = RTN_LOCAL;
d1918542 2534 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2535 rtm->rtm_type = RTN_LOCAL;
2536 else
2537 rtm->rtm_type = RTN_UNICAST;
2538 rtm->rtm_flags = 0;
2539 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2540 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2541 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 2542 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
2543 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2544 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2545 rtm->rtm_protocol = RTPROT_RA;
2546 else
2547 rtm->rtm_protocol = RTPROT_KERNEL;
2548 }
1da177e4 2549
38308473 2550 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2551 rtm->rtm_flags |= RTM_F_CLONED;
2552
2553 if (dst) {
c78679e8
DM
2554 if (nla_put(skb, RTA_DST, 16, dst))
2555 goto nla_put_failure;
1ab1457c 2556 rtm->rtm_dst_len = 128;
1da177e4 2557 } else if (rtm->rtm_dst_len)
c78679e8
DM
2558 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2559 goto nla_put_failure;
1da177e4
LT
2560#ifdef CONFIG_IPV6_SUBTREES
2561 if (src) {
c78679e8
DM
2562 if (nla_put(skb, RTA_SRC, 16, src))
2563 goto nla_put_failure;
1ab1457c 2564 rtm->rtm_src_len = 128;
c78679e8
DM
2565 } else if (rtm->rtm_src_len &&
2566 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2567 goto nla_put_failure;
1da177e4 2568#endif
7bc570c8
YH
2569 if (iif) {
2570#ifdef CONFIG_IPV6_MROUTE
2571 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2572 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2573 if (err <= 0) {
2574 if (!nowait) {
2575 if (err == 0)
2576 return 0;
2577 goto nla_put_failure;
2578 } else {
2579 if (err == -EMSGSIZE)
2580 goto nla_put_failure;
2581 }
2582 }
2583 } else
2584#endif
c78679e8
DM
2585 if (nla_put_u32(skb, RTA_IIF, iif))
2586 goto nla_put_failure;
7bc570c8 2587 } else if (dst) {
1da177e4 2588 struct in6_addr saddr_buf;
c78679e8
DM
2589 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2590 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2591 goto nla_put_failure;
1da177e4 2592 }
2d7202bf 2593
c3968a85
DW
2594 if (rt->rt6i_prefsrc.plen) {
2595 struct in6_addr saddr_buf;
4e3fd7a0 2596 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2597 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2598 goto nla_put_failure;
c3968a85
DW
2599 }
2600
defb3519 2601 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2602 goto nla_put_failure;
2603
97cac082 2604 n = rt->n;
94f826b8 2605 if (n) {
fdd6681d 2606 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
94f826b8 2607 goto nla_put_failure;
94f826b8 2608 }
2d7202bf 2609
c78679e8
DM
2610 if (rt->dst.dev &&
2611 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2612 goto nla_put_failure;
2613 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2614 goto nla_put_failure;
8253947e
LW
2615
2616 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 2617
87a50699 2618 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2619 goto nla_put_failure;
2d7202bf
TG
2620
2621 return nlmsg_end(skb, nlh);
2622
2623nla_put_failure:
26932566
PM
2624 nlmsg_cancel(skb, nlh);
2625 return -EMSGSIZE;
1da177e4
LT
2626}
2627
1b43af54 2628int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2629{
2630 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2631 int prefix;
2632
2d7202bf
TG
2633 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2634 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2635 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2636 } else
2637 prefix = 0;
2638
191cd582
BH
2639 return rt6_fill_node(arg->net,
2640 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 2641 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2642 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2643}
2644
c127ea2c 2645static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2646{
3b1e0a65 2647 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2648 struct nlattr *tb[RTA_MAX+1];
2649 struct rt6_info *rt;
1da177e4 2650 struct sk_buff *skb;
ab364a6f 2651 struct rtmsg *rtm;
4c9483b2 2652 struct flowi6 fl6;
72331bc0 2653 int err, iif = 0, oif = 0;
1da177e4 2654
ab364a6f
TG
2655 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2656 if (err < 0)
2657 goto errout;
1da177e4 2658
ab364a6f 2659 err = -EINVAL;
4c9483b2 2660 memset(&fl6, 0, sizeof(fl6));
1da177e4 2661
ab364a6f
TG
2662 if (tb[RTA_SRC]) {
2663 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2664 goto errout;
2665
4e3fd7a0 2666 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2667 }
2668
2669 if (tb[RTA_DST]) {
2670 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2671 goto errout;
2672
4e3fd7a0 2673 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2674 }
2675
2676 if (tb[RTA_IIF])
2677 iif = nla_get_u32(tb[RTA_IIF]);
2678
2679 if (tb[RTA_OIF])
72331bc0 2680 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2681
2682 if (iif) {
2683 struct net_device *dev;
72331bc0
SL
2684 int flags = 0;
2685
5578689a 2686 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2687 if (!dev) {
2688 err = -ENODEV;
ab364a6f 2689 goto errout;
1da177e4 2690 }
72331bc0
SL
2691
2692 fl6.flowi6_iif = iif;
2693
2694 if (!ipv6_addr_any(&fl6.saddr))
2695 flags |= RT6_LOOKUP_F_HAS_SADDR;
2696
2697 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2698 flags);
2699 } else {
2700 fl6.flowi6_oif = oif;
2701
2702 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2703 }
2704
ab364a6f 2705 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2706 if (!skb) {
94e187c0 2707 ip6_rt_put(rt);
ab364a6f
TG
2708 err = -ENOBUFS;
2709 goto errout;
2710 }
1da177e4 2711
ab364a6f
TG
2712 /* Reserve room for dummy headers, this skb can pass
2713 through good chunk of routing engine.
2714 */
459a98ed 2715 skb_reset_mac_header(skb);
ab364a6f 2716 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2717
d8d1f30b 2718 skb_dst_set(skb, &rt->dst);
1da177e4 2719
4c9483b2 2720 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 2721 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 2722 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2723 if (err < 0) {
ab364a6f
TG
2724 kfree_skb(skb);
2725 goto errout;
1da177e4
LT
2726 }
2727
15e47304 2728 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 2729errout:
1da177e4 2730 return err;
1da177e4
LT
2731}
2732
86872cb5 2733void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2734{
2735 struct sk_buff *skb;
5578689a 2736 struct net *net = info->nl_net;
528c4ceb
DL
2737 u32 seq;
2738 int err;
2739
2740 err = -ENOBUFS;
38308473 2741 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2742
339bf98f 2743 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2744 if (!skb)
21713ebc
TG
2745 goto errout;
2746
191cd582 2747 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
15e47304 2748 event, info->portid, seq, 0, 0, 0);
26932566
PM
2749 if (err < 0) {
2750 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2751 WARN_ON(err == -EMSGSIZE);
2752 kfree_skb(skb);
2753 goto errout;
2754 }
15e47304 2755 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
2756 info->nlh, gfp_any());
2757 return;
21713ebc
TG
2758errout:
2759 if (err < 0)
5578689a 2760 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2761}
2762
8ed67789
DL
2763static int ip6_route_dev_notify(struct notifier_block *this,
2764 unsigned long event, void *data)
2765{
2766 struct net_device *dev = (struct net_device *)data;
c346dca1 2767 struct net *net = dev_net(dev);
8ed67789
DL
2768
2769 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2770 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2771 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2772#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2773 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2774 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2775 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2776 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2777#endif
2778 }
2779
2780 return NOTIFY_OK;
2781}
2782
1da177e4
LT
2783/*
2784 * /proc
2785 */
2786
2787#ifdef CONFIG_PROC_FS
2788
1da177e4
LT
2789struct rt6_proc_arg
2790{
2791 char *buffer;
2792 int offset;
2793 int length;
2794 int skip;
2795 int len;
2796};
2797
2798static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2799{
33120b30 2800 struct seq_file *m = p_arg;
69cce1d1 2801 struct neighbour *n;
1da177e4 2802
4b7a4274 2803 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2804
2805#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2806 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2807#else
33120b30 2808 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2809#endif
97cac082 2810 n = rt->n;
69cce1d1
DM
2811 if (n) {
2812 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2813 } else {
33120b30 2814 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2815 }
33120b30 2816 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2817 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2818 rt->dst.__use, rt->rt6i_flags,
d1918542 2819 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2820 return 0;
2821}
2822
33120b30 2823static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2824{
f3db4851 2825 struct net *net = (struct net *)m->private;
32b293a5 2826 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2827 return 0;
2828}
1da177e4 2829
33120b30
AD
2830static int ipv6_route_open(struct inode *inode, struct file *file)
2831{
de05c557 2832 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2833}
2834
33120b30
AD
2835static const struct file_operations ipv6_route_proc_fops = {
2836 .owner = THIS_MODULE,
2837 .open = ipv6_route_open,
2838 .read = seq_read,
2839 .llseek = seq_lseek,
b6fcbdb4 2840 .release = single_release_net,
33120b30
AD
2841};
2842
1da177e4
LT
2843static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2844{
69ddb805 2845 struct net *net = (struct net *)seq->private;
1da177e4 2846 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2847 net->ipv6.rt6_stats->fib_nodes,
2848 net->ipv6.rt6_stats->fib_route_nodes,
2849 net->ipv6.rt6_stats->fib_rt_alloc,
2850 net->ipv6.rt6_stats->fib_rt_entries,
2851 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2852 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2853 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2854
2855 return 0;
2856}
2857
2858static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2859{
de05c557 2860 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2861}
2862
9a32144e 2863static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2864 .owner = THIS_MODULE,
2865 .open = rt6_stats_seq_open,
2866 .read = seq_read,
2867 .llseek = seq_lseek,
b6fcbdb4 2868 .release = single_release_net,
1da177e4
LT
2869};
2870#endif /* CONFIG_PROC_FS */
2871
2872#ifdef CONFIG_SYSCTL
2873
1da177e4 2874static
8d65af78 2875int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2876 void __user *buffer, size_t *lenp, loff_t *ppos)
2877{
c486da34
LAG
2878 struct net *net;
2879 int delay;
2880 if (!write)
1da177e4 2881 return -EINVAL;
c486da34
LAG
2882
2883 net = (struct net *)ctl->extra1;
2884 delay = net->ipv6.sysctl.flush_delay;
2885 proc_dointvec(ctl, write, buffer, lenp, ppos);
2886 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2887 return 0;
1da177e4
LT
2888}
2889
760f2d01 2890ctl_table ipv6_route_table_template[] = {
1ab1457c 2891 {
1da177e4 2892 .procname = "flush",
4990509f 2893 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2894 .maxlen = sizeof(int),
89c8b3a1 2895 .mode = 0200,
6d9f239a 2896 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2897 },
2898 {
1da177e4 2899 .procname = "gc_thresh",
9a7ec3a9 2900 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2901 .maxlen = sizeof(int),
2902 .mode = 0644,
6d9f239a 2903 .proc_handler = proc_dointvec,
1da177e4
LT
2904 },
2905 {
1da177e4 2906 .procname = "max_size",
4990509f 2907 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2908 .maxlen = sizeof(int),
2909 .mode = 0644,
6d9f239a 2910 .proc_handler = proc_dointvec,
1da177e4
LT
2911 },
2912 {
1da177e4 2913 .procname = "gc_min_interval",
4990509f 2914 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2915 .maxlen = sizeof(int),
2916 .mode = 0644,
6d9f239a 2917 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2918 },
2919 {
1da177e4 2920 .procname = "gc_timeout",
4990509f 2921 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2922 .maxlen = sizeof(int),
2923 .mode = 0644,
6d9f239a 2924 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2925 },
2926 {
1da177e4 2927 .procname = "gc_interval",
4990509f 2928 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2929 .maxlen = sizeof(int),
2930 .mode = 0644,
6d9f239a 2931 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2932 },
2933 {
1da177e4 2934 .procname = "gc_elasticity",
4990509f 2935 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2936 .maxlen = sizeof(int),
2937 .mode = 0644,
f3d3f616 2938 .proc_handler = proc_dointvec,
1da177e4
LT
2939 },
2940 {
1da177e4 2941 .procname = "mtu_expires",
4990509f 2942 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2943 .maxlen = sizeof(int),
2944 .mode = 0644,
6d9f239a 2945 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2946 },
2947 {
1da177e4 2948 .procname = "min_adv_mss",
4990509f 2949 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2950 .maxlen = sizeof(int),
2951 .mode = 0644,
f3d3f616 2952 .proc_handler = proc_dointvec,
1da177e4
LT
2953 },
2954 {
1da177e4 2955 .procname = "gc_min_interval_ms",
4990509f 2956 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2957 .maxlen = sizeof(int),
2958 .mode = 0644,
6d9f239a 2959 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2960 },
f8572d8f 2961 { }
1da177e4
LT
2962};
2963
2c8c1e72 2964struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2965{
2966 struct ctl_table *table;
2967
2968 table = kmemdup(ipv6_route_table_template,
2969 sizeof(ipv6_route_table_template),
2970 GFP_KERNEL);
5ee09105
YH
2971
2972 if (table) {
2973 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2974 table[0].extra1 = net;
86393e52 2975 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2976 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2977 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2978 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2979 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2980 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2981 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2982 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2983 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
2984
2985 /* Don't export sysctls to unprivileged users */
2986 if (net->user_ns != &init_user_ns)
2987 table[0].procname = NULL;
5ee09105
YH
2988 }
2989
760f2d01
DL
2990 return table;
2991}
1da177e4
LT
2992#endif
2993
2c8c1e72 2994static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2995{
633d424b 2996 int ret = -ENOMEM;
8ed67789 2997
86393e52
AD
2998 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2999 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3000
fc66f95c
ED
3001 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3002 goto out_ip6_dst_ops;
3003
8ed67789
DL
3004 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3005 sizeof(*net->ipv6.ip6_null_entry),
3006 GFP_KERNEL);
3007 if (!net->ipv6.ip6_null_entry)
fc66f95c 3008 goto out_ip6_dst_entries;
d8d1f30b 3009 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3010 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3011 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3012 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3013 ip6_template_metrics, true);
8ed67789
DL
3014
3015#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3016 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3017 sizeof(*net->ipv6.ip6_prohibit_entry),
3018 GFP_KERNEL);
68fffc67
PZ
3019 if (!net->ipv6.ip6_prohibit_entry)
3020 goto out_ip6_null_entry;
d8d1f30b 3021 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3022 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3023 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3024 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3025 ip6_template_metrics, true);
8ed67789
DL
3026
3027 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3028 sizeof(*net->ipv6.ip6_blk_hole_entry),
3029 GFP_KERNEL);
68fffc67
PZ
3030 if (!net->ipv6.ip6_blk_hole_entry)
3031 goto out_ip6_prohibit_entry;
d8d1f30b 3032 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3033 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3034 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3035 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3036 ip6_template_metrics, true);
8ed67789
DL
3037#endif
3038
b339a47c
PZ
3039 net->ipv6.sysctl.flush_delay = 0;
3040 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3041 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3042 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3043 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3044 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3045 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3046 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3047
6891a346
BT
3048 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3049
8ed67789
DL
3050 ret = 0;
3051out:
3052 return ret;
f2fc6a54 3053
68fffc67
PZ
3054#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3055out_ip6_prohibit_entry:
3056 kfree(net->ipv6.ip6_prohibit_entry);
3057out_ip6_null_entry:
3058 kfree(net->ipv6.ip6_null_entry);
3059#endif
fc66f95c
ED
3060out_ip6_dst_entries:
3061 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3062out_ip6_dst_ops:
f2fc6a54 3063 goto out;
cdb18761
DL
3064}
3065
2c8c1e72 3066static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3067{
8ed67789
DL
3068 kfree(net->ipv6.ip6_null_entry);
3069#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3070 kfree(net->ipv6.ip6_prohibit_entry);
3071 kfree(net->ipv6.ip6_blk_hole_entry);
3072#endif
41bb78b4 3073 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3074}
3075
d189634e
TG
3076static int __net_init ip6_route_net_init_late(struct net *net)
3077{
3078#ifdef CONFIG_PROC_FS
3079 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3080 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3081#endif
3082 return 0;
3083}
3084
3085static void __net_exit ip6_route_net_exit_late(struct net *net)
3086{
3087#ifdef CONFIG_PROC_FS
3088 proc_net_remove(net, "ipv6_route");
3089 proc_net_remove(net, "rt6_stats");
3090#endif
3091}
3092
cdb18761
DL
3093static struct pernet_operations ip6_route_net_ops = {
3094 .init = ip6_route_net_init,
3095 .exit = ip6_route_net_exit,
3096};
3097
c3426b47
DM
3098static int __net_init ipv6_inetpeer_init(struct net *net)
3099{
3100 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3101
3102 if (!bp)
3103 return -ENOMEM;
3104 inet_peer_base_init(bp);
3105 net->ipv6.peers = bp;
3106 return 0;
3107}
3108
3109static void __net_exit ipv6_inetpeer_exit(struct net *net)
3110{
3111 struct inet_peer_base *bp = net->ipv6.peers;
3112
3113 net->ipv6.peers = NULL;
56a6b248 3114 inetpeer_invalidate_tree(bp);
c3426b47
DM
3115 kfree(bp);
3116}
3117
2b823f72 3118static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3119 .init = ipv6_inetpeer_init,
3120 .exit = ipv6_inetpeer_exit,
3121};
3122
d189634e
TG
3123static struct pernet_operations ip6_route_net_late_ops = {
3124 .init = ip6_route_net_init_late,
3125 .exit = ip6_route_net_exit_late,
3126};
3127
8ed67789
DL
3128static struct notifier_block ip6_route_dev_notifier = {
3129 .notifier_call = ip6_route_dev_notify,
3130 .priority = 0,
3131};
3132
433d49c3 3133int __init ip6_route_init(void)
1da177e4 3134{
433d49c3
DL
3135 int ret;
3136
9a7ec3a9
DL
3137 ret = -ENOMEM;
3138 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3139 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3140 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3141 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3142 goto out;
14e50e57 3143
fc66f95c 3144 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3145 if (ret)
bdb3289f 3146 goto out_kmem_cache;
bdb3289f 3147
c3426b47
DM
3148 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3149 if (ret)
e8803b6c 3150 goto out_dst_entries;
2a0c451a 3151
7e52b33b
DM
3152 ret = register_pernet_subsys(&ip6_route_net_ops);
3153 if (ret)
3154 goto out_register_inetpeer;
c3426b47 3155
5dc121e9
AE
3156 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3157
8ed67789
DL
3158 /* Registering of the loopback is done before this portion of code,
3159 * the loopback reference in rt6_info will not be taken, do it
3160 * manually for init_net */
d8d1f30b 3161 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3162 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3163 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3164 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3165 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3166 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3167 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3168 #endif
e8803b6c 3169 ret = fib6_init();
433d49c3 3170 if (ret)
8ed67789 3171 goto out_register_subsys;
433d49c3 3172
433d49c3
DL
3173 ret = xfrm6_init();
3174 if (ret)
e8803b6c 3175 goto out_fib6_init;
c35b7e72 3176
433d49c3
DL
3177 ret = fib6_rules_init();
3178 if (ret)
3179 goto xfrm6_init;
7e5449c2 3180
d189634e
TG
3181 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3182 if (ret)
3183 goto fib6_rules_init;
3184
433d49c3 3185 ret = -ENOBUFS;
c7ac8679
GR
3186 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3187 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3188 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3189 goto out_register_late_subsys;
c127ea2c 3190
8ed67789 3191 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3192 if (ret)
d189634e 3193 goto out_register_late_subsys;
8ed67789 3194
433d49c3
DL
3195out:
3196 return ret;
3197
d189634e
TG
3198out_register_late_subsys:
3199 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3200fib6_rules_init:
433d49c3
DL
3201 fib6_rules_cleanup();
3202xfrm6_init:
433d49c3 3203 xfrm6_fini();
2a0c451a
TG
3204out_fib6_init:
3205 fib6_gc_cleanup();
8ed67789
DL
3206out_register_subsys:
3207 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3208out_register_inetpeer:
3209 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3210out_dst_entries:
3211 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3212out_kmem_cache:
f2fc6a54 3213 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3214 goto out;
1da177e4
LT
3215}
3216
3217void ip6_route_cleanup(void)
3218{
8ed67789 3219 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3220 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3221 fib6_rules_cleanup();
1da177e4 3222 xfrm6_fini();
1da177e4 3223 fib6_gc_cleanup();
c3426b47 3224 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3225 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3226 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3227 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3228}