ipv6: delete expired route in ip6_pmtu_deliver
[linux-2.6-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
1da177e4
LT
77static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
9a7ec3a9 100static struct dst_ops ip6_dst_ops_template = {
1da177e4 101 .family = AF_INET6,
09640e63 102 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 111 .local_out = __ip6_local_out,
1da177e4
LT
112};
113
14e50e57
DM
114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115{
116}
117
118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
09640e63 120 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
14e50e57
DM
124};
125
bdb3289f 126static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
127 .dst = {
128 .__refcnt = ATOMIC_INIT(1),
129 .__use = 1,
130 .obsolete = -1,
131 .error = -ENETUNREACH,
132 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
133 .input = ip6_pkt_discard,
134 .output = ip6_pkt_discard_out,
1da177e4
LT
135 },
136 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 137 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
140};
141
101367c2
TG
142#ifdef CONFIG_IPV6_MULTIPLE_TABLES
143
6723ab54
DM
144static int ip6_pkt_prohibit(struct sk_buff *skb);
145static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 146
280a34c8 147static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
148 .dst = {
149 .__refcnt = ATOMIC_INIT(1),
150 .__use = 1,
151 .obsolete = -1,
152 .error = -EACCES,
153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
154 .input = ip6_pkt_prohibit,
155 .output = ip6_pkt_prohibit_out,
101367c2
TG
156 },
157 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 158 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
159 .rt6i_metric = ~(u32) 0,
160 .rt6i_ref = ATOMIC_INIT(1),
161};
162
bdb3289f 163static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
164 .dst = {
165 .__refcnt = ATOMIC_INIT(1),
166 .__use = 1,
167 .obsolete = -1,
168 .error = -EINVAL,
169 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
170 .input = dst_discard,
171 .output = dst_discard,
101367c2
TG
172 },
173 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 174 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
175 .rt6i_metric = ~(u32) 0,
176 .rt6i_ref = ATOMIC_INIT(1),
177};
178
179#endif
180
1da177e4 181/* allocate dst with ip6_dst_ops */
f2fc6a54 182static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 183{
f2fc6a54 184 return (struct rt6_info *)dst_alloc(ops);
1da177e4
LT
185}
186
187static void ip6_dst_destroy(struct dst_entry *dst)
188{
189 struct rt6_info *rt = (struct rt6_info *)dst;
190 struct inet6_dev *idev = rt->rt6i_idev;
191
192 if (idev != NULL) {
193 rt->rt6i_idev = NULL;
194 in6_dev_put(idev);
1ab1457c 195 }
1da177e4
LT
196}
197
198static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
199 int how)
200{
201 struct rt6_info *rt = (struct rt6_info *)dst;
202 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 203 struct net_device *loopback_dev =
c346dca1 204 dev_net(dev)->loopback_dev;
1da177e4 205
5a3e55d6
DL
206 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
207 struct inet6_dev *loopback_idev =
208 in6_dev_get(loopback_dev);
1da177e4
LT
209 if (loopback_idev != NULL) {
210 rt->rt6i_idev = loopback_idev;
211 in6_dev_put(idev);
212 }
213 }
214}
215
216static __inline__ int rt6_check_expired(const struct rt6_info *rt)
217{
a02cec21
ED
218 return (rt->rt6i_flags & RTF_EXPIRES) &&
219 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
220}
221
c71099ac
TG
222static inline int rt6_need_strict(struct in6_addr *daddr)
223{
a02cec21
ED
224 return ipv6_addr_type(daddr) &
225 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
226}
227
1da177e4 228/*
c71099ac 229 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
230 */
231
8ed67789
DL
232static inline struct rt6_info *rt6_device_match(struct net *net,
233 struct rt6_info *rt,
dd3abc4e 234 struct in6_addr *saddr,
1da177e4 235 int oif,
d420895e 236 int flags)
1da177e4
LT
237{
238 struct rt6_info *local = NULL;
239 struct rt6_info *sprt;
240
dd3abc4e
YH
241 if (!oif && ipv6_addr_any(saddr))
242 goto out;
243
d8d1f30b 244 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
245 struct net_device *dev = sprt->rt6i_dev;
246
247 if (oif) {
1da177e4
LT
248 if (dev->ifindex == oif)
249 return sprt;
250 if (dev->flags & IFF_LOOPBACK) {
251 if (sprt->rt6i_idev == NULL ||
252 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 253 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 254 continue;
1ab1457c 255 if (local && (!oif ||
1da177e4
LT
256 local->rt6i_idev->dev->ifindex == oif))
257 continue;
258 }
259 local = sprt;
260 }
dd3abc4e
YH
261 } else {
262 if (ipv6_chk_addr(net, saddr, dev,
263 flags & RT6_LOOKUP_F_IFACE))
264 return sprt;
1da177e4 265 }
dd3abc4e 266 }
1da177e4 267
dd3abc4e 268 if (oif) {
1da177e4
LT
269 if (local)
270 return local;
271
d420895e 272 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 273 return net->ipv6.ip6_null_entry;
1da177e4 274 }
dd3abc4e 275out:
1da177e4
LT
276 return rt;
277}
278
27097255
YH
279#ifdef CONFIG_IPV6_ROUTER_PREF
280static void rt6_probe(struct rt6_info *rt)
281{
282 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
283 /*
284 * Okay, this does not seem to be appropriate
285 * for now, however, we need to check if it
286 * is really so; aka Router Reachability Probing.
287 *
288 * Router Reachability Probe MUST be rate-limited
289 * to no more than one per minute.
290 */
291 if (!neigh || (neigh->nud_state & NUD_VALID))
292 return;
293 read_lock_bh(&neigh->lock);
294 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 295 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
296 struct in6_addr mcaddr;
297 struct in6_addr *target;
298
299 neigh->updated = jiffies;
300 read_unlock_bh(&neigh->lock);
301
302 target = (struct in6_addr *)&neigh->primary_key;
303 addrconf_addr_solict_mult(target, &mcaddr);
304 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
305 } else
306 read_unlock_bh(&neigh->lock);
307}
308#else
309static inline void rt6_probe(struct rt6_info *rt)
310{
27097255
YH
311}
312#endif
313
1da177e4 314/*
554cfb7e 315 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 316 */
b6f99a21 317static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
318{
319 struct net_device *dev = rt->rt6i_dev;
161980f4 320 if (!oif || dev->ifindex == oif)
554cfb7e 321 return 2;
161980f4
DM
322 if ((dev->flags & IFF_LOOPBACK) &&
323 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
324 return 1;
325 return 0;
554cfb7e 326}
1da177e4 327
b6f99a21 328static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 329{
554cfb7e 330 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 331 int m;
4d0c5911
YH
332 if (rt->rt6i_flags & RTF_NONEXTHOP ||
333 !(rt->rt6i_flags & RTF_GATEWAY))
334 m = 1;
335 else if (neigh) {
554cfb7e
YH
336 read_lock_bh(&neigh->lock);
337 if (neigh->nud_state & NUD_VALID)
4d0c5911 338 m = 2;
398bcbeb
YH
339#ifdef CONFIG_IPV6_ROUTER_PREF
340 else if (neigh->nud_state & NUD_FAILED)
341 m = 0;
342#endif
343 else
ea73ee23 344 m = 1;
554cfb7e 345 read_unlock_bh(&neigh->lock);
398bcbeb
YH
346 } else
347 m = 0;
554cfb7e 348 return m;
1da177e4
LT
349}
350
554cfb7e
YH
351static int rt6_score_route(struct rt6_info *rt, int oif,
352 int strict)
1da177e4 353{
4d0c5911 354 int m, n;
1ab1457c 355
4d0c5911 356 m = rt6_check_dev(rt, oif);
77d16f45 357 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 358 return -1;
ebacaaa0
YH
359#ifdef CONFIG_IPV6_ROUTER_PREF
360 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
361#endif
4d0c5911 362 n = rt6_check_neigh(rt);
557e92ef 363 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
364 return -1;
365 return m;
366}
367
f11e6659
DM
368static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
369 int *mpri, struct rt6_info *match)
554cfb7e 370{
f11e6659
DM
371 int m;
372
373 if (rt6_check_expired(rt))
374 goto out;
375
376 m = rt6_score_route(rt, oif, strict);
377 if (m < 0)
378 goto out;
379
380 if (m > *mpri) {
381 if (strict & RT6_LOOKUP_F_REACHABLE)
382 rt6_probe(match);
383 *mpri = m;
384 match = rt;
385 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
386 rt6_probe(rt);
387 }
388
389out:
390 return match;
391}
392
393static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
394 struct rt6_info *rr_head,
395 u32 metric, int oif, int strict)
396{
397 struct rt6_info *rt, *match;
554cfb7e 398 int mpri = -1;
1da177e4 399
f11e6659
DM
400 match = NULL;
401 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 402 rt = rt->dst.rt6_next)
f11e6659
DM
403 match = find_match(rt, oif, strict, &mpri, match);
404 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 405 rt = rt->dst.rt6_next)
f11e6659 406 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 407
f11e6659
DM
408 return match;
409}
1da177e4 410
f11e6659
DM
411static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
412{
413 struct rt6_info *match, *rt0;
8ed67789 414 struct net *net;
1da177e4 415
f11e6659 416 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 417 __func__, fn->leaf, oif);
554cfb7e 418
f11e6659
DM
419 rt0 = fn->rr_ptr;
420 if (!rt0)
421 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 422
f11e6659 423 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 424
554cfb7e 425 if (!match &&
f11e6659 426 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 427 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 428
554cfb7e 429 /* no entries matched; do round-robin */
f11e6659
DM
430 if (!next || next->rt6i_metric != rt0->rt6i_metric)
431 next = fn->leaf;
432
433 if (next != rt0)
434 fn->rr_ptr = next;
1da177e4 435 }
1da177e4 436
f11e6659 437 RT6_TRACE("%s() => %p\n",
0dc47877 438 __func__, match);
1da177e4 439
c346dca1 440 net = dev_net(rt0->rt6i_dev);
a02cec21 441 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
442}
443
70ceb4f5
YH
444#ifdef CONFIG_IPV6_ROUTE_INFO
445int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
446 struct in6_addr *gwaddr)
447{
c346dca1 448 struct net *net = dev_net(dev);
70ceb4f5
YH
449 struct route_info *rinfo = (struct route_info *) opt;
450 struct in6_addr prefix_buf, *prefix;
451 unsigned int pref;
4bed72e4 452 unsigned long lifetime;
70ceb4f5
YH
453 struct rt6_info *rt;
454
455 if (len < sizeof(struct route_info)) {
456 return -EINVAL;
457 }
458
459 /* Sanity check for prefix_len and length */
460 if (rinfo->length > 3) {
461 return -EINVAL;
462 } else if (rinfo->prefix_len > 128) {
463 return -EINVAL;
464 } else if (rinfo->prefix_len > 64) {
465 if (rinfo->length < 2) {
466 return -EINVAL;
467 }
468 } else if (rinfo->prefix_len > 0) {
469 if (rinfo->length < 1) {
470 return -EINVAL;
471 }
472 }
473
474 pref = rinfo->route_pref;
475 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 476 return -EINVAL;
70ceb4f5 477
4bed72e4 478 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
479
480 if (rinfo->length == 3)
481 prefix = (struct in6_addr *)rinfo->prefix;
482 else {
483 /* this function is safe */
484 ipv6_addr_prefix(&prefix_buf,
485 (struct in6_addr *)rinfo->prefix,
486 rinfo->prefix_len);
487 prefix = &prefix_buf;
488 }
489
efa2cea0
DL
490 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
491 dev->ifindex);
70ceb4f5
YH
492
493 if (rt && !lifetime) {
e0a1ad73 494 ip6_del_rt(rt);
70ceb4f5
YH
495 rt = NULL;
496 }
497
498 if (!rt && lifetime)
efa2cea0 499 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
500 pref);
501 else if (rt)
502 rt->rt6i_flags = RTF_ROUTEINFO |
503 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
504
505 if (rt) {
4bed72e4 506 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
507 rt->rt6i_flags &= ~RTF_EXPIRES;
508 } else {
509 rt->rt6i_expires = jiffies + HZ * lifetime;
510 rt->rt6i_flags |= RTF_EXPIRES;
511 }
d8d1f30b 512 dst_release(&rt->dst);
70ceb4f5
YH
513 }
514 return 0;
515}
516#endif
517
8ed67789 518#define BACKTRACK(__net, saddr) \
982f56f3 519do { \
8ed67789 520 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 521 struct fib6_node *pn; \
e0eda7bb 522 while (1) { \
982f56f3
YH
523 if (fn->fn_flags & RTN_TL_ROOT) \
524 goto out; \
525 pn = fn->parent; \
526 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 527 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
528 else \
529 fn = pn; \
530 if (fn->fn_flags & RTN_RTINFO) \
531 goto restart; \
c71099ac 532 } \
c71099ac 533 } \
982f56f3 534} while(0)
c71099ac 535
8ed67789
DL
536static struct rt6_info *ip6_pol_route_lookup(struct net *net,
537 struct fib6_table *table,
c71099ac 538 struct flowi *fl, int flags)
1da177e4
LT
539{
540 struct fib6_node *fn;
541 struct rt6_info *rt;
542
c71099ac
TG
543 read_lock_bh(&table->tb6_lock);
544 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
545restart:
546 rt = fn->leaf;
dd3abc4e 547 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 548 BACKTRACK(net, &fl->fl6_src);
c71099ac 549out:
d8d1f30b 550 dst_use(&rt->dst, jiffies);
c71099ac 551 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
552 return rt;
553
554}
555
9acd9f3a
YH
556struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
557 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
558{
559 struct flowi fl = {
560 .oif = oif,
561 .nl_u = {
562 .ip6_u = {
563 .daddr = *daddr,
c71099ac
TG
564 },
565 },
566 };
567 struct dst_entry *dst;
77d16f45 568 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 569
adaa70bb
TG
570 if (saddr) {
571 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
572 flags |= RT6_LOOKUP_F_HAS_SADDR;
573 }
574
606a2b48 575 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
576 if (dst->error == 0)
577 return (struct rt6_info *) dst;
578
579 dst_release(dst);
580
1da177e4
LT
581 return NULL;
582}
583
7159039a
YH
584EXPORT_SYMBOL(rt6_lookup);
585
c71099ac 586/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
587 It takes new route entry, the addition fails by any reason the
588 route is freed. In any case, if caller does not hold it, it may
589 be destroyed.
590 */
591
86872cb5 592static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
593{
594 int err;
c71099ac 595 struct fib6_table *table;
1da177e4 596
c71099ac
TG
597 table = rt->rt6i_table;
598 write_lock_bh(&table->tb6_lock);
86872cb5 599 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 600 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
601
602 return err;
603}
604
40e22e8f
TG
605int ip6_ins_rt(struct rt6_info *rt)
606{
4d1169c1 607 struct nl_info info = {
c346dca1 608 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 609 };
528c4ceb 610 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
611}
612
95a9a5ba
YH
613static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
614 struct in6_addr *saddr)
1da177e4 615{
1da177e4
LT
616 struct rt6_info *rt;
617
618 /*
619 * Clone the route.
620 */
621
622 rt = ip6_rt_copy(ort);
623
624 if (rt) {
14deae41
DM
625 struct neighbour *neigh;
626 int attempts = !in_softirq();
627
58c4fb86
YH
628 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
629 if (rt->rt6i_dst.plen != 128 &&
630 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
631 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 632 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 633 }
1da177e4 634
58c4fb86 635 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
636 rt->rt6i_dst.plen = 128;
637 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 638 rt->dst.flags |= DST_HOST;
1da177e4
LT
639
640#ifdef CONFIG_IPV6_SUBTREES
641 if (rt->rt6i_src.plen && saddr) {
642 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
643 rt->rt6i_src.plen = 128;
644 }
645#endif
646
14deae41
DM
647 retry:
648 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
649 if (IS_ERR(neigh)) {
650 struct net *net = dev_net(rt->rt6i_dev);
651 int saved_rt_min_interval =
652 net->ipv6.sysctl.ip6_rt_gc_min_interval;
653 int saved_rt_elasticity =
654 net->ipv6.sysctl.ip6_rt_gc_elasticity;
655
656 if (attempts-- > 0) {
657 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
658 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
659
86393e52 660 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
661
662 net->ipv6.sysctl.ip6_rt_gc_elasticity =
663 saved_rt_elasticity;
664 net->ipv6.sysctl.ip6_rt_gc_min_interval =
665 saved_rt_min_interval;
666 goto retry;
667 }
668
669 if (net_ratelimit())
670 printk(KERN_WARNING
7e1b33e5 671 "ipv6: Neighbour table overflow.\n");
d8d1f30b 672 dst_free(&rt->dst);
14deae41
DM
673 return NULL;
674 }
675 rt->rt6i_nexthop = neigh;
1da177e4 676
95a9a5ba 677 }
1da177e4 678
95a9a5ba
YH
679 return rt;
680}
1da177e4 681
299d9939
YH
682static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
683{
684 struct rt6_info *rt = ip6_rt_copy(ort);
685 if (rt) {
686 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
687 rt->rt6i_dst.plen = 128;
688 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 689 rt->dst.flags |= DST_HOST;
299d9939
YH
690 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
691 }
692 return rt;
693}
694
8ed67789
DL
695static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
696 struct flowi *fl, int flags)
1da177e4
LT
697{
698 struct fib6_node *fn;
519fbd87 699 struct rt6_info *rt, *nrt;
c71099ac 700 int strict = 0;
1da177e4 701 int attempts = 3;
519fbd87 702 int err;
53b7997f 703 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 704
77d16f45 705 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
706
707relookup:
c71099ac 708 read_lock_bh(&table->tb6_lock);
1da177e4 709
8238dd06 710restart_2:
c71099ac 711 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
712
713restart:
4acad72d 714 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
715
716 BACKTRACK(net, &fl->fl6_src);
717 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 718 rt->rt6i_flags & RTF_CACHE)
1ddef044 719 goto out;
1da177e4 720
d8d1f30b 721 dst_hold(&rt->dst);
c71099ac 722 read_unlock_bh(&table->tb6_lock);
fb9de91e 723
519fbd87 724 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 725 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
726 else {
727#if CLONE_OFFLINK_ROUTE
c71099ac 728 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
729#else
730 goto out2;
731#endif
732 }
e40cf353 733
d8d1f30b 734 dst_release(&rt->dst);
8ed67789 735 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 736
d8d1f30b 737 dst_hold(&rt->dst);
519fbd87 738 if (nrt) {
40e22e8f 739 err = ip6_ins_rt(nrt);
519fbd87 740 if (!err)
1da177e4 741 goto out2;
1da177e4 742 }
1da177e4 743
519fbd87
YH
744 if (--attempts <= 0)
745 goto out2;
746
747 /*
c71099ac 748 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
749 * released someone could insert this route. Relookup.
750 */
d8d1f30b 751 dst_release(&rt->dst);
519fbd87
YH
752 goto relookup;
753
754out:
8238dd06
YH
755 if (reachable) {
756 reachable = 0;
757 goto restart_2;
758 }
d8d1f30b 759 dst_hold(&rt->dst);
c71099ac 760 read_unlock_bh(&table->tb6_lock);
1da177e4 761out2:
d8d1f30b
CG
762 rt->dst.lastuse = jiffies;
763 rt->dst.__use++;
c71099ac
TG
764
765 return rt;
1da177e4
LT
766}
767
8ed67789 768static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
769 struct flowi *fl, int flags)
770{
8ed67789 771 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
772}
773
c71099ac
TG
774void ip6_route_input(struct sk_buff *skb)
775{
0660e03f 776 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 777 struct net *net = dev_net(skb->dev);
adaa70bb 778 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
779 struct flowi fl = {
780 .iif = skb->dev->ifindex,
781 .nl_u = {
782 .ip6_u = {
783 .daddr = iph->daddr,
784 .saddr = iph->saddr,
90bcaf7b 785 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
c71099ac
TG
786 },
787 },
1ab1457c 788 .mark = skb->mark,
c71099ac
TG
789 .proto = iph->nexthdr,
790 };
adaa70bb 791
1d6e55f1 792 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 793 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 794
adf30907 795 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
c71099ac
TG
796}
797
8ed67789 798static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 799 struct flowi *fl, int flags)
1da177e4 800{
8ed67789 801 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
802}
803
4591db4f
DL
804struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
805 struct flowi *fl)
c71099ac
TG
806{
807 int flags = 0;
808
6057fd78 809 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
77d16f45 810 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 811
adaa70bb
TG
812 if (!ipv6_addr_any(&fl->fl6_src))
813 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
814 else if (sk)
815 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 816
4591db4f 817 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
818}
819
7159039a 820EXPORT_SYMBOL(ip6_route_output);
1da177e4 821
14e50e57
DM
822int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
823{
824 struct rt6_info *ort = (struct rt6_info *) *dstp;
825 struct rt6_info *rt = (struct rt6_info *)
826 dst_alloc(&ip6_dst_blackhole_ops);
827 struct dst_entry *new = NULL;
828
829 if (rt) {
d8d1f30b 830 new = &rt->dst;
14e50e57
DM
831
832 atomic_set(&new->__refcnt, 1);
833 new->__use = 1;
352e512c
HX
834 new->input = dst_discard;
835 new->output = dst_discard;
14e50e57 836
d8d1f30b
CG
837 memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
838 new->dev = ort->dst.dev;
14e50e57
DM
839 if (new->dev)
840 dev_hold(new->dev);
841 rt->rt6i_idev = ort->rt6i_idev;
842 if (rt->rt6i_idev)
843 in6_dev_hold(rt->rt6i_idev);
844 rt->rt6i_expires = 0;
845
846 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
847 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
848 rt->rt6i_metric = 0;
849
850 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
851#ifdef CONFIG_IPV6_SUBTREES
852 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
853#endif
854
855 dst_free(new);
856 }
857
858 dst_release(*dstp);
859 *dstp = new;
a02cec21 860 return new ? 0 : -ENOMEM;
14e50e57
DM
861}
862EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
863
1da177e4
LT
864/*
865 * Destination cache support functions
866 */
867
868static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
869{
870 struct rt6_info *rt;
871
872 rt = (struct rt6_info *) dst;
873
10414444 874 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1da177e4
LT
875 return dst;
876
877 return NULL;
878}
879
880static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
881{
882 struct rt6_info *rt = (struct rt6_info *) dst;
883
884 if (rt) {
54c1a859
YH
885 if (rt->rt6i_flags & RTF_CACHE) {
886 if (rt6_check_expired(rt)) {
887 ip6_del_rt(rt);
888 dst = NULL;
889 }
890 } else {
1da177e4 891 dst_release(dst);
54c1a859
YH
892 dst = NULL;
893 }
1da177e4 894 }
54c1a859 895 return dst;
1da177e4
LT
896}
897
898static void ip6_link_failure(struct sk_buff *skb)
899{
900 struct rt6_info *rt;
901
3ffe533c 902 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 903
adf30907 904 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
905 if (rt) {
906 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 907 dst_set_expires(&rt->dst, 0);
1da177e4
LT
908 rt->rt6i_flags |= RTF_EXPIRES;
909 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
910 rt->rt6i_node->fn_sernum = -1;
911 }
912}
913
914static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
915{
916 struct rt6_info *rt6 = (struct rt6_info*)dst;
917
918 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
919 rt6->rt6i_flags |= RTF_MODIFIED;
920 if (mtu < IPV6_MIN_MTU) {
921 mtu = IPV6_MIN_MTU;
922 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
923 }
924 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 925 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
926 }
927}
928
1da177e4
LT
929static int ipv6_get_mtu(struct net_device *dev);
930
5578689a 931static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
1da177e4
LT
932{
933 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
934
5578689a
DL
935 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
936 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
937
938 /*
1ab1457c
YH
939 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
940 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
941 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
942 * rely only on pmtu discovery"
943 */
944 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
945 mtu = IPV6_MAXPLEN;
946 return mtu;
947}
948
3b00944c
YH
949static struct dst_entry *icmp6_dst_gc_list;
950static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 951
3b00944c 952struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 953 struct neighbour *neigh,
9acd9f3a 954 const struct in6_addr *addr)
1da177e4
LT
955{
956 struct rt6_info *rt;
957 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 958 struct net *net = dev_net(dev);
1da177e4
LT
959
960 if (unlikely(idev == NULL))
961 return NULL;
962
86393e52 963 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
964 if (unlikely(rt == NULL)) {
965 in6_dev_put(idev);
966 goto out;
967 }
968
969 dev_hold(dev);
970 if (neigh)
971 neigh_hold(neigh);
14deae41 972 else {
1da177e4 973 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
974 if (IS_ERR(neigh))
975 neigh = NULL;
976 }
1da177e4
LT
977
978 rt->rt6i_dev = dev;
979 rt->rt6i_idev = idev;
980 rt->rt6i_nexthop = neigh;
d8d1f30b
CG
981 atomic_set(&rt->dst.__refcnt, 1);
982 rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
983 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
984 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
985 rt->dst.output = ip6_output;
1da177e4
LT
986
987#if 0 /* there's no chance to use these for ndisc */
d8d1f30b 988 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1ab1457c 989 ? DST_HOST
1da177e4
LT
990 : 0;
991 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
992 rt->rt6i_dst.plen = 128;
993#endif
994
3b00944c 995 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
996 rt->dst.next = icmp6_dst_gc_list;
997 icmp6_dst_gc_list = &rt->dst;
3b00944c 998 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 999
5578689a 1000 fib6_force_start_gc(net);
1da177e4
LT
1001
1002out:
d8d1f30b 1003 return &rt->dst;
1da177e4
LT
1004}
1005
3d0f24a7 1006int icmp6_dst_gc(void)
1da177e4
LT
1007{
1008 struct dst_entry *dst, *next, **pprev;
3d0f24a7 1009 int more = 0;
1da177e4
LT
1010
1011 next = NULL;
5d0bbeeb 1012
3b00944c
YH
1013 spin_lock_bh(&icmp6_dst_lock);
1014 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1015
1da177e4
LT
1016 while ((dst = *pprev) != NULL) {
1017 if (!atomic_read(&dst->__refcnt)) {
1018 *pprev = dst->next;
1019 dst_free(dst);
1da177e4
LT
1020 } else {
1021 pprev = &dst->next;
3d0f24a7 1022 ++more;
1da177e4
LT
1023 }
1024 }
1025
3b00944c 1026 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1027
3d0f24a7 1028 return more;
1da177e4
LT
1029}
1030
1e493d19
DM
1031static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1032 void *arg)
1033{
1034 struct dst_entry *dst, **pprev;
1035
1036 spin_lock_bh(&icmp6_dst_lock);
1037 pprev = &icmp6_dst_gc_list;
1038 while ((dst = *pprev) != NULL) {
1039 struct rt6_info *rt = (struct rt6_info *) dst;
1040 if (func(rt, arg)) {
1041 *pprev = dst->next;
1042 dst_free(dst);
1043 } else {
1044 pprev = &dst->next;
1045 }
1046 }
1047 spin_unlock_bh(&icmp6_dst_lock);
1048}
1049
569d3645 1050static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1051{
1da177e4 1052 unsigned long now = jiffies;
86393e52 1053 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1054 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1055 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1056 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1057 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1058 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1059 int entries;
7019b78e 1060
fc66f95c 1061 entries = dst_entries_get_fast(ops);
7019b78e 1062 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1063 entries <= rt_max_size)
1da177e4
LT
1064 goto out;
1065
6891a346
BT
1066 net->ipv6.ip6_rt_gc_expire++;
1067 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1068 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1069 entries = dst_entries_get_slow(ops);
1070 if (entries < ops->gc_thresh)
7019b78e 1071 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1072out:
7019b78e 1073 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1074 return entries > rt_max_size;
1da177e4
LT
1075}
1076
1077/* Clean host part of a prefix. Not necessary in radix tree,
1078 but results in cleaner routing tables.
1079
1080 Remove it only when all the things will work!
1081 */
1082
1083static int ipv6_get_mtu(struct net_device *dev)
1084{
1085 int mtu = IPV6_MIN_MTU;
1086 struct inet6_dev *idev;
1087
c68f24cc
ED
1088 rcu_read_lock();
1089 idev = __in6_dev_get(dev);
1090 if (idev)
1da177e4 1091 mtu = idev->cnf.mtu6;
c68f24cc 1092 rcu_read_unlock();
1da177e4
LT
1093 return mtu;
1094}
1095
6b75d090 1096int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1097{
6b75d090
YH
1098 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1099 if (hoplimit < 0) {
1100 struct net_device *dev = dst->dev;
c68f24cc
ED
1101 struct inet6_dev *idev;
1102
1103 rcu_read_lock();
1104 idev = __in6_dev_get(dev);
1105 if (idev)
6b75d090 1106 hoplimit = idev->cnf.hop_limit;
c68f24cc 1107 else
53b7997f 1108 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1109 rcu_read_unlock();
1da177e4
LT
1110 }
1111 return hoplimit;
1112}
1113
1114/*
1115 *
1116 */
1117
86872cb5 1118int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1119{
1120 int err;
5578689a 1121 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1122 struct rt6_info *rt = NULL;
1123 struct net_device *dev = NULL;
1124 struct inet6_dev *idev = NULL;
c71099ac 1125 struct fib6_table *table;
1da177e4
LT
1126 int addr_type;
1127
86872cb5 1128 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1129 return -EINVAL;
1130#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1131 if (cfg->fc_src_len)
1da177e4
LT
1132 return -EINVAL;
1133#endif
86872cb5 1134 if (cfg->fc_ifindex) {
1da177e4 1135 err = -ENODEV;
5578689a 1136 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1137 if (!dev)
1138 goto out;
1139 idev = in6_dev_get(dev);
1140 if (!idev)
1141 goto out;
1142 }
1143
86872cb5
TG
1144 if (cfg->fc_metric == 0)
1145 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1146
5578689a 1147 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1148 if (table == NULL) {
1149 err = -ENOBUFS;
1150 goto out;
1151 }
1152
86393e52 1153 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1154
1155 if (rt == NULL) {
1156 err = -ENOMEM;
1157 goto out;
1158 }
1159
d8d1f30b 1160 rt->dst.obsolete = -1;
6f704992
YH
1161 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1162 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1163 0;
1da177e4 1164
86872cb5
TG
1165 if (cfg->fc_protocol == RTPROT_UNSPEC)
1166 cfg->fc_protocol = RTPROT_BOOT;
1167 rt->rt6i_protocol = cfg->fc_protocol;
1168
1169 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1170
1171 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1172 rt->dst.input = ip6_mc_input;
ab79ad14
1173 else if (cfg->fc_flags & RTF_LOCAL)
1174 rt->dst.input = ip6_input;
1da177e4 1175 else
d8d1f30b 1176 rt->dst.input = ip6_forward;
1da177e4 1177
d8d1f30b 1178 rt->dst.output = ip6_output;
1da177e4 1179
86872cb5
TG
1180 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1181 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1182 if (rt->rt6i_dst.plen == 128)
d8d1f30b 1183 rt->dst.flags = DST_HOST;
1da177e4
LT
1184
1185#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1186 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1187 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1188#endif
1189
86872cb5 1190 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1191
1192 /* We cannot add true routes via loopback here,
1193 they would result in kernel looping; promote them to reject routes
1194 */
86872cb5 1195 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1196 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1197 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1198 /* hold loopback dev/idev if we haven't done so. */
5578689a 1199 if (dev != net->loopback_dev) {
1da177e4
LT
1200 if (dev) {
1201 dev_put(dev);
1202 in6_dev_put(idev);
1203 }
5578689a 1204 dev = net->loopback_dev;
1da177e4
LT
1205 dev_hold(dev);
1206 idev = in6_dev_get(dev);
1207 if (!idev) {
1208 err = -ENODEV;
1209 goto out;
1210 }
1211 }
d8d1f30b
CG
1212 rt->dst.output = ip6_pkt_discard_out;
1213 rt->dst.input = ip6_pkt_discard;
1214 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1215 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1216 goto install_route;
1217 }
1218
86872cb5 1219 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1220 struct in6_addr *gw_addr;
1221 int gwa_type;
1222
86872cb5
TG
1223 gw_addr = &cfg->fc_gateway;
1224 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1225 gwa_type = ipv6_addr_type(gw_addr);
1226
1227 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1228 struct rt6_info *grt;
1229
1230 /* IPv6 strictly inhibits using not link-local
1231 addresses as nexthop address.
1232 Otherwise, router will not able to send redirects.
1233 It is very good, but in some (rare!) circumstances
1234 (SIT, PtP, NBMA NOARP links) it is handy to allow
1235 some exceptions. --ANK
1236 */
1237 err = -EINVAL;
1238 if (!(gwa_type&IPV6_ADDR_UNICAST))
1239 goto out;
1240
5578689a 1241 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1242
1243 err = -EHOSTUNREACH;
1244 if (grt == NULL)
1245 goto out;
1246 if (dev) {
1247 if (dev != grt->rt6i_dev) {
d8d1f30b 1248 dst_release(&grt->dst);
1da177e4
LT
1249 goto out;
1250 }
1251 } else {
1252 dev = grt->rt6i_dev;
1253 idev = grt->rt6i_idev;
1254 dev_hold(dev);
1255 in6_dev_hold(grt->rt6i_idev);
1256 }
1257 if (!(grt->rt6i_flags&RTF_GATEWAY))
1258 err = 0;
d8d1f30b 1259 dst_release(&grt->dst);
1da177e4
LT
1260
1261 if (err)
1262 goto out;
1263 }
1264 err = -EINVAL;
1265 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1266 goto out;
1267 }
1268
1269 err = -ENODEV;
1270 if (dev == NULL)
1271 goto out;
1272
86872cb5 1273 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1274 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1275 if (IS_ERR(rt->rt6i_nexthop)) {
1276 err = PTR_ERR(rt->rt6i_nexthop);
1277 rt->rt6i_nexthop = NULL;
1278 goto out;
1279 }
1280 }
1281
86872cb5 1282 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1283
1284install_route:
86872cb5
TG
1285 if (cfg->fc_mx) {
1286 struct nlattr *nla;
1287 int remaining;
1288
1289 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1290 int type = nla_type(nla);
86872cb5
TG
1291
1292 if (type) {
1293 if (type > RTAX_MAX) {
1da177e4
LT
1294 err = -EINVAL;
1295 goto out;
1296 }
86872cb5 1297
d8d1f30b 1298 rt->dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1299 }
1da177e4
LT
1300 }
1301 }
1302
d8d1f30b
CG
1303 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1304 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1305 if (!dst_mtu(&rt->dst))
1306 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1307 if (!dst_metric(&rt->dst, RTAX_ADVMSS))
1308 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1309 rt->dst.dev = dev;
1da177e4 1310 rt->rt6i_idev = idev;
c71099ac 1311 rt->rt6i_table = table;
63152fc0 1312
c346dca1 1313 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1314
86872cb5 1315 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1316
1317out:
1318 if (dev)
1319 dev_put(dev);
1320 if (idev)
1321 in6_dev_put(idev);
1322 if (rt)
d8d1f30b 1323 dst_free(&rt->dst);
1da177e4
LT
1324 return err;
1325}
1326
86872cb5 1327static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1328{
1329 int err;
c71099ac 1330 struct fib6_table *table;
c346dca1 1331 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1332
8ed67789 1333 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1334 return -ENOENT;
1335
c71099ac
TG
1336 table = rt->rt6i_table;
1337 write_lock_bh(&table->tb6_lock);
1da177e4 1338
86872cb5 1339 err = fib6_del(rt, info);
d8d1f30b 1340 dst_release(&rt->dst);
1da177e4 1341
c71099ac 1342 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1343
1344 return err;
1345}
1346
e0a1ad73
TG
1347int ip6_del_rt(struct rt6_info *rt)
1348{
4d1169c1 1349 struct nl_info info = {
c346dca1 1350 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1351 };
528c4ceb 1352 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1353}
1354
86872cb5 1355static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1356{
c71099ac 1357 struct fib6_table *table;
1da177e4
LT
1358 struct fib6_node *fn;
1359 struct rt6_info *rt;
1360 int err = -ESRCH;
1361
5578689a 1362 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1363 if (table == NULL)
1364 return err;
1365
1366 read_lock_bh(&table->tb6_lock);
1da177e4 1367
c71099ac 1368 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1369 &cfg->fc_dst, cfg->fc_dst_len,
1370 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1371
1da177e4 1372 if (fn) {
d8d1f30b 1373 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1374 if (cfg->fc_ifindex &&
1da177e4 1375 (rt->rt6i_dev == NULL ||
86872cb5 1376 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1377 continue;
86872cb5
TG
1378 if (cfg->fc_flags & RTF_GATEWAY &&
1379 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1380 continue;
86872cb5 1381 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1382 continue;
d8d1f30b 1383 dst_hold(&rt->dst);
c71099ac 1384 read_unlock_bh(&table->tb6_lock);
1da177e4 1385
86872cb5 1386 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1387 }
1388 }
c71099ac 1389 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1390
1391 return err;
1392}
1393
1394/*
1395 * Handle redirects
1396 */
a6279458
YH
1397struct ip6rd_flowi {
1398 struct flowi fl;
1399 struct in6_addr gateway;
1400};
1401
8ed67789
DL
1402static struct rt6_info *__ip6_route_redirect(struct net *net,
1403 struct fib6_table *table,
a6279458
YH
1404 struct flowi *fl,
1405 int flags)
1da177e4 1406{
a6279458
YH
1407 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1408 struct rt6_info *rt;
e843b9e1 1409 struct fib6_node *fn;
c71099ac 1410
1da177e4 1411 /*
e843b9e1
YH
1412 * Get the "current" route for this destination and
1413 * check if the redirect has come from approriate router.
1414 *
1415 * RFC 2461 specifies that redirects should only be
1416 * accepted if they come from the nexthop to the target.
1417 * Due to the way the routes are chosen, this notion
1418 * is a bit fuzzy and one might need to check all possible
1419 * routes.
1da177e4 1420 */
1da177e4 1421
c71099ac 1422 read_lock_bh(&table->tb6_lock);
a6279458 1423 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1424restart:
d8d1f30b 1425 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1426 /*
1427 * Current route is on-link; redirect is always invalid.
1428 *
1429 * Seems, previous statement is not true. It could
1430 * be node, which looks for us as on-link (f.e. proxy ndisc)
1431 * But then router serving it might decide, that we should
1432 * know truth 8)8) --ANK (980726).
1433 */
1434 if (rt6_check_expired(rt))
1435 continue;
1436 if (!(rt->rt6i_flags & RTF_GATEWAY))
1437 continue;
a6279458 1438 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1439 continue;
a6279458 1440 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1441 continue;
1442 break;
1443 }
a6279458 1444
cb15d9c2 1445 if (!rt)
8ed67789
DL
1446 rt = net->ipv6.ip6_null_entry;
1447 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1448out:
d8d1f30b 1449 dst_hold(&rt->dst);
a6279458 1450
c71099ac 1451 read_unlock_bh(&table->tb6_lock);
e843b9e1 1452
a6279458
YH
1453 return rt;
1454};
1455
1456static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1457 struct in6_addr *src,
1458 struct in6_addr *gateway,
1459 struct net_device *dev)
1460{
adaa70bb 1461 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1462 struct net *net = dev_net(dev);
a6279458
YH
1463 struct ip6rd_flowi rdfl = {
1464 .fl = {
1465 .oif = dev->ifindex,
1466 .nl_u = {
1467 .ip6_u = {
1468 .daddr = *dest,
1469 .saddr = *src,
1470 },
1471 },
1472 },
a6279458 1473 };
adaa70bb 1474
86c36ce4
BH
1475 ipv6_addr_copy(&rdfl.gateway, gateway);
1476
adaa70bb
TG
1477 if (rt6_need_strict(dest))
1478 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1479
5578689a 1480 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1481 flags, __ip6_route_redirect);
a6279458
YH
1482}
1483
1484void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1485 struct in6_addr *saddr,
1486 struct neighbour *neigh, u8 *lladdr, int on_link)
1487{
1488 struct rt6_info *rt, *nrt = NULL;
1489 struct netevent_redirect netevent;
c346dca1 1490 struct net *net = dev_net(neigh->dev);
a6279458
YH
1491
1492 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1493
8ed67789 1494 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1495 if (net_ratelimit())
1496 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1497 "for redirect target\n");
a6279458 1498 goto out;
1da177e4
LT
1499 }
1500
1da177e4
LT
1501 /*
1502 * We have finally decided to accept it.
1503 */
1504
1ab1457c 1505 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1506 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1507 NEIGH_UPDATE_F_OVERRIDE|
1508 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1509 NEIGH_UPDATE_F_ISROUTER))
1510 );
1511
1512 /*
1513 * Redirect received -> path was valid.
1514 * Look, redirects are sent only in response to data packets,
1515 * so that this nexthop apparently is reachable. --ANK
1516 */
d8d1f30b 1517 dst_confirm(&rt->dst);
1da177e4
LT
1518
1519 /* Duplicate redirect: silently ignore. */
d8d1f30b 1520 if (neigh == rt->dst.neighbour)
1da177e4
LT
1521 goto out;
1522
1523 nrt = ip6_rt_copy(rt);
1524 if (nrt == NULL)
1525 goto out;
1526
1527 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1528 if (on_link)
1529 nrt->rt6i_flags &= ~RTF_GATEWAY;
1530
1531 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1532 nrt->rt6i_dst.plen = 128;
d8d1f30b 1533 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1534
1535 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1536 nrt->rt6i_nexthop = neigh_clone(neigh);
1537 /* Reset pmtu, it may be better */
d8d1f30b
CG
1538 nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1539 nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1540 dst_mtu(&nrt->dst));
1da177e4 1541
40e22e8f 1542 if (ip6_ins_rt(nrt))
1da177e4
LT
1543 goto out;
1544
d8d1f30b
CG
1545 netevent.old = &rt->dst;
1546 netevent.new = &nrt->dst;
8d71740c
TT
1547 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1548
1da177e4 1549 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1550 ip6_del_rt(rt);
1da177e4
LT
1551 return;
1552 }
1553
1554out:
d8d1f30b 1555 dst_release(&rt->dst);
1da177e4
LT
1556}
1557
1558/*
1559 * Handle ICMP "packet too big" messages
1560 * i.e. Path MTU discovery
1561 */
1562
ae878ae2
1563static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1564 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1565{
1566 struct rt6_info *rt, *nrt;
1567 int allfrag = 0;
d3052b55 1568again:
ae878ae2 1569 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1570 if (rt == NULL)
1571 return;
1572
d3052b55
AV
1573 if (rt6_check_expired(rt)) {
1574 ip6_del_rt(rt);
1575 goto again;
1576 }
1577
d8d1f30b 1578 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1579 goto out;
1580
1581 if (pmtu < IPV6_MIN_MTU) {
1582 /*
1ab1457c 1583 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1584 * MTU (1280) and a fragment header should always be included
1585 * after a node receiving Too Big message reporting PMTU is
1586 * less than the IPv6 Minimum Link MTU.
1587 */
1588 pmtu = IPV6_MIN_MTU;
1589 allfrag = 1;
1590 }
1591
1592 /* New mtu received -> path was valid.
1593 They are sent only in response to data packets,
1594 so that this nexthop apparently is reachable. --ANK
1595 */
d8d1f30b 1596 dst_confirm(&rt->dst);
1da177e4
LT
1597
1598 /* Host route. If it is static, it would be better
1599 not to override it, but add new one, so that
1600 when cache entry will expire old pmtu
1601 would return automatically.
1602 */
1603 if (rt->rt6i_flags & RTF_CACHE) {
d8d1f30b 1604 rt->dst.metrics[RTAX_MTU-1] = pmtu;
1da177e4 1605 if (allfrag)
d8d1f30b
CG
1606 rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1607 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1608 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1609 goto out;
1610 }
1611
1612 /* Network route.
1613 Two cases are possible:
1614 1. It is connected route. Action: COW
1615 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1616 */
d5315b50 1617 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1618 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1619 else
1620 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1621
d5315b50 1622 if (nrt) {
d8d1f30b 1623 nrt->dst.metrics[RTAX_MTU-1] = pmtu;
a1e78363 1624 if (allfrag)
d8d1f30b 1625 nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
a1e78363
YH
1626
1627 /* According to RFC 1981, detecting PMTU increase shouldn't be
1628 * happened within 5 mins, the recommended timer is 10 mins.
1629 * Here this route expiration time is set to ip6_rt_mtu_expires
1630 * which is 10 mins. After 10 mins the decreased pmtu is expired
1631 * and detecting PMTU increase will be automatically happened.
1632 */
d8d1f30b 1633 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1634 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1635
40e22e8f 1636 ip6_ins_rt(nrt);
1da177e4 1637 }
1da177e4 1638out:
d8d1f30b 1639 dst_release(&rt->dst);
1da177e4
LT
1640}
1641
ae878ae2
1642void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1643 struct net_device *dev, u32 pmtu)
1644{
1645 struct net *net = dev_net(dev);
1646
1647 /*
1648 * RFC 1981 states that a node "MUST reduce the size of the packets it
1649 * is sending along the path" that caused the Packet Too Big message.
1650 * Since it's not possible in the general case to determine which
1651 * interface was used to send the original packet, we update the MTU
1652 * on the interface that will be used to send future packets. We also
1653 * update the MTU on the interface that received the Packet Too Big in
1654 * case the original packet was forced out that interface with
1655 * SO_BINDTODEVICE or similar. This is the next best thing to the
1656 * correct behaviour, which would be to update the MTU on all
1657 * interfaces.
1658 */
1659 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1660 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1661}
1662
1da177e4
LT
1663/*
1664 * Misc support functions
1665 */
1666
1667static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1668{
c346dca1 1669 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1670 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1671
1672 if (rt) {
d8d1f30b
CG
1673 rt->dst.input = ort->dst.input;
1674 rt->dst.output = ort->dst.output;
1675
1676 memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
1677 rt->dst.error = ort->dst.error;
1678 rt->dst.dev = ort->dst.dev;
1679 if (rt->dst.dev)
1680 dev_hold(rt->dst.dev);
1da177e4
LT
1681 rt->rt6i_idev = ort->rt6i_idev;
1682 if (rt->rt6i_idev)
1683 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1684 rt->dst.lastuse = jiffies;
1da177e4
LT
1685 rt->rt6i_expires = 0;
1686
1687 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1688 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1689 rt->rt6i_metric = 0;
1690
1691 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1692#ifdef CONFIG_IPV6_SUBTREES
1693 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1694#endif
c71099ac 1695 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1696 }
1697 return rt;
1698}
1699
70ceb4f5 1700#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1701static struct rt6_info *rt6_get_route_info(struct net *net,
1702 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1703 struct in6_addr *gwaddr, int ifindex)
1704{
1705 struct fib6_node *fn;
1706 struct rt6_info *rt = NULL;
c71099ac
TG
1707 struct fib6_table *table;
1708
efa2cea0 1709 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1710 if (table == NULL)
1711 return NULL;
70ceb4f5 1712
c71099ac
TG
1713 write_lock_bh(&table->tb6_lock);
1714 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1715 if (!fn)
1716 goto out;
1717
d8d1f30b 1718 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1719 if (rt->rt6i_dev->ifindex != ifindex)
1720 continue;
1721 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1722 continue;
1723 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1724 continue;
d8d1f30b 1725 dst_hold(&rt->dst);
70ceb4f5
YH
1726 break;
1727 }
1728out:
c71099ac 1729 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1730 return rt;
1731}
1732
efa2cea0
DL
1733static struct rt6_info *rt6_add_route_info(struct net *net,
1734 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1735 struct in6_addr *gwaddr, int ifindex,
1736 unsigned pref)
1737{
86872cb5
TG
1738 struct fib6_config cfg = {
1739 .fc_table = RT6_TABLE_INFO,
238fc7ea 1740 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1741 .fc_ifindex = ifindex,
1742 .fc_dst_len = prefixlen,
1743 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1744 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1745 .fc_nlinfo.pid = 0,
1746 .fc_nlinfo.nlh = NULL,
1747 .fc_nlinfo.nl_net = net,
86872cb5
TG
1748 };
1749
1750 ipv6_addr_copy(&cfg.fc_dst, prefix);
1751 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1752
e317da96
YH
1753 /* We should treat it as a default route if prefix length is 0. */
1754 if (!prefixlen)
86872cb5 1755 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1756
86872cb5 1757 ip6_route_add(&cfg);
70ceb4f5 1758
efa2cea0 1759 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1760}
1761#endif
1762
1da177e4 1763struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1764{
1da177e4 1765 struct rt6_info *rt;
c71099ac 1766 struct fib6_table *table;
1da177e4 1767
c346dca1 1768 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1769 if (table == NULL)
1770 return NULL;
1da177e4 1771
c71099ac 1772 write_lock_bh(&table->tb6_lock);
d8d1f30b 1773 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1774 if (dev == rt->rt6i_dev &&
045927ff 1775 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1776 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1777 break;
1778 }
1779 if (rt)
d8d1f30b 1780 dst_hold(&rt->dst);
c71099ac 1781 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1782 return rt;
1783}
1784
1785struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1786 struct net_device *dev,
1787 unsigned int pref)
1da177e4 1788{
86872cb5
TG
1789 struct fib6_config cfg = {
1790 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1791 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1792 .fc_ifindex = dev->ifindex,
1793 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1794 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1795 .fc_nlinfo.pid = 0,
1796 .fc_nlinfo.nlh = NULL,
c346dca1 1797 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1798 };
1da177e4 1799
86872cb5 1800 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1801
86872cb5 1802 ip6_route_add(&cfg);
1da177e4 1803
1da177e4
LT
1804 return rt6_get_dflt_router(gwaddr, dev);
1805}
1806
7b4da532 1807void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1808{
1809 struct rt6_info *rt;
c71099ac
TG
1810 struct fib6_table *table;
1811
1812 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1813 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1814 if (table == NULL)
1815 return;
1da177e4
LT
1816
1817restart:
c71099ac 1818 read_lock_bh(&table->tb6_lock);
d8d1f30b 1819 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1820 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1821 dst_hold(&rt->dst);
c71099ac 1822 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1823 ip6_del_rt(rt);
1da177e4
LT
1824 goto restart;
1825 }
1826 }
c71099ac 1827 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1828}
1829
5578689a
DL
1830static void rtmsg_to_fib6_config(struct net *net,
1831 struct in6_rtmsg *rtmsg,
86872cb5
TG
1832 struct fib6_config *cfg)
1833{
1834 memset(cfg, 0, sizeof(*cfg));
1835
1836 cfg->fc_table = RT6_TABLE_MAIN;
1837 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1838 cfg->fc_metric = rtmsg->rtmsg_metric;
1839 cfg->fc_expires = rtmsg->rtmsg_info;
1840 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1841 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1842 cfg->fc_flags = rtmsg->rtmsg_flags;
1843
5578689a 1844 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1845
86872cb5
TG
1846 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1847 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1848 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1849}
1850
5578689a 1851int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1852{
86872cb5 1853 struct fib6_config cfg;
1da177e4
LT
1854 struct in6_rtmsg rtmsg;
1855 int err;
1856
1857 switch(cmd) {
1858 case SIOCADDRT: /* Add a route */
1859 case SIOCDELRT: /* Delete a route */
1860 if (!capable(CAP_NET_ADMIN))
1861 return -EPERM;
1862 err = copy_from_user(&rtmsg, arg,
1863 sizeof(struct in6_rtmsg));
1864 if (err)
1865 return -EFAULT;
86872cb5 1866
5578689a 1867 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1868
1da177e4
LT
1869 rtnl_lock();
1870 switch (cmd) {
1871 case SIOCADDRT:
86872cb5 1872 err = ip6_route_add(&cfg);
1da177e4
LT
1873 break;
1874 case SIOCDELRT:
86872cb5 1875 err = ip6_route_del(&cfg);
1da177e4
LT
1876 break;
1877 default:
1878 err = -EINVAL;
1879 }
1880 rtnl_unlock();
1881
1882 return err;
3ff50b79 1883 }
1da177e4
LT
1884
1885 return -EINVAL;
1886}
1887
1888/*
1889 * Drop the packet on the floor
1890 */
1891
d5fdd6ba 1892static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1893{
612f09e8 1894 int type;
adf30907 1895 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1896 switch (ipstats_mib_noroutes) {
1897 case IPSTATS_MIB_INNOROUTES:
0660e03f 1898 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1899 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1900 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1901 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1902 break;
1903 }
1904 /* FALLTHROUGH */
1905 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1906 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1907 ipstats_mib_noroutes);
612f09e8
YH
1908 break;
1909 }
3ffe533c 1910 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1911 kfree_skb(skb);
1912 return 0;
1913}
1914
9ce8ade0
TG
1915static int ip6_pkt_discard(struct sk_buff *skb)
1916{
612f09e8 1917 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1918}
1919
20380731 1920static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1921{
adf30907 1922 skb->dev = skb_dst(skb)->dev;
612f09e8 1923 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1924}
1925
6723ab54
DM
1926#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1927
9ce8ade0
TG
1928static int ip6_pkt_prohibit(struct sk_buff *skb)
1929{
612f09e8 1930 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1931}
1932
1933static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1934{
adf30907 1935 skb->dev = skb_dst(skb)->dev;
612f09e8 1936 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1937}
1938
6723ab54
DM
1939#endif
1940
1da177e4
LT
1941/*
1942 * Allocate a dst for local (unicast / anycast) address.
1943 */
1944
1945struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1946 const struct in6_addr *addr,
1947 int anycast)
1948{
c346dca1 1949 struct net *net = dev_net(idev->dev);
86393e52 1950 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 1951 struct neighbour *neigh;
1da177e4 1952
40385653
BG
1953 if (rt == NULL) {
1954 if (net_ratelimit())
1955 pr_warning("IPv6: Maximum number of routes reached,"
1956 " consider increasing route/max_size.\n");
1da177e4 1957 return ERR_PTR(-ENOMEM);
40385653 1958 }
1da177e4 1959
5578689a 1960 dev_hold(net->loopback_dev);
1da177e4
LT
1961 in6_dev_hold(idev);
1962
d8d1f30b
CG
1963 rt->dst.flags = DST_HOST;
1964 rt->dst.input = ip6_input;
1965 rt->dst.output = ip6_output;
5578689a 1966 rt->rt6i_dev = net->loopback_dev;
1da177e4 1967 rt->rt6i_idev = idev;
d8d1f30b
CG
1968 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1969 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1970 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1971 rt->dst.obsolete = -1;
1da177e4
LT
1972
1973 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1974 if (anycast)
1975 rt->rt6i_flags |= RTF_ANYCAST;
1976 else
1da177e4 1977 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
1978 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1979 if (IS_ERR(neigh)) {
d8d1f30b 1980 dst_free(&rt->dst);
14deae41
DM
1981
1982 /* We are casting this because that is the return
1983 * value type. But an errno encoded pointer is the
1984 * same regardless of the underlying pointer type,
1985 * and that's what we are returning. So this is OK.
1986 */
1987 return (struct rt6_info *) neigh;
1da177e4 1988 }
14deae41 1989 rt->rt6i_nexthop = neigh;
1da177e4
LT
1990
1991 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1992 rt->rt6i_dst.plen = 128;
5578689a 1993 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 1994
d8d1f30b 1995 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
1996
1997 return rt;
1998}
1999
8ed67789
DL
2000struct arg_dev_net {
2001 struct net_device *dev;
2002 struct net *net;
2003};
2004
1da177e4
LT
2005static int fib6_ifdown(struct rt6_info *rt, void *arg)
2006{
8ed67789
DL
2007 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
2008 struct net *net = ((struct arg_dev_net *)arg)->net;
2009
2010 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2011 rt != net->ipv6.ip6_null_entry) {
1da177e4
LT
2012 RT6_TRACE("deleted by ifdown %p\n", rt);
2013 return -1;
2014 }
2015 return 0;
2016}
2017
f3db4851 2018void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2019{
8ed67789
DL
2020 struct arg_dev_net adn = {
2021 .dev = dev,
2022 .net = net,
2023 };
2024
2025 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2026 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2027}
2028
2029struct rt6_mtu_change_arg
2030{
2031 struct net_device *dev;
2032 unsigned mtu;
2033};
2034
2035static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2036{
2037 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2038 struct inet6_dev *idev;
c346dca1 2039 struct net *net = dev_net(arg->dev);
1da177e4
LT
2040
2041 /* In IPv6 pmtu discovery is not optional,
2042 so that RTAX_MTU lock cannot disable it.
2043 We still use this lock to block changes
2044 caused by addrconf/ndisc.
2045 */
2046
2047 idev = __in6_dev_get(arg->dev);
2048 if (idev == NULL)
2049 return 0;
2050
2051 /* For administrative MTU increase, there is no way to discover
2052 IPv6 PMTU increase, so PMTU increase should be updated here.
2053 Since RFC 1981 doesn't include administrative MTU increase
2054 update PMTU increase is a MUST. (i.e. jumbo frame)
2055 */
2056 /*
2057 If new MTU is less than route PMTU, this new MTU will be the
2058 lowest MTU in the path, update the route PMTU to reflect PMTU
2059 decreases; if new MTU is greater than route PMTU, and the
2060 old MTU is the lowest MTU in the path, update the route PMTU
2061 to reflect the increase. In this case if the other nodes' MTU
2062 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2063 PMTU discouvery.
2064 */
2065 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2066 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2067 (dst_mtu(&rt->dst) >= arg->mtu ||
2068 (dst_mtu(&rt->dst) < arg->mtu &&
2069 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2070 rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
2071 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
566cfd8f 2072 }
1da177e4
LT
2073 return 0;
2074}
2075
2076void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2077{
c71099ac
TG
2078 struct rt6_mtu_change_arg arg = {
2079 .dev = dev,
2080 .mtu = mtu,
2081 };
1da177e4 2082
c346dca1 2083 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2084}
2085
ef7c79ed 2086static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2087 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2088 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2089 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2090 [RTA_PRIORITY] = { .type = NLA_U32 },
2091 [RTA_METRICS] = { .type = NLA_NESTED },
2092};
2093
2094static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2095 struct fib6_config *cfg)
1da177e4 2096{
86872cb5
TG
2097 struct rtmsg *rtm;
2098 struct nlattr *tb[RTA_MAX+1];
2099 int err;
1da177e4 2100
86872cb5
TG
2101 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2102 if (err < 0)
2103 goto errout;
1da177e4 2104
86872cb5
TG
2105 err = -EINVAL;
2106 rtm = nlmsg_data(nlh);
2107 memset(cfg, 0, sizeof(*cfg));
2108
2109 cfg->fc_table = rtm->rtm_table;
2110 cfg->fc_dst_len = rtm->rtm_dst_len;
2111 cfg->fc_src_len = rtm->rtm_src_len;
2112 cfg->fc_flags = RTF_UP;
2113 cfg->fc_protocol = rtm->rtm_protocol;
2114
2115 if (rtm->rtm_type == RTN_UNREACHABLE)
2116 cfg->fc_flags |= RTF_REJECT;
2117
ab79ad14
2118 if (rtm->rtm_type == RTN_LOCAL)
2119 cfg->fc_flags |= RTF_LOCAL;
2120
86872cb5
TG
2121 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2122 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2123 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2124
2125 if (tb[RTA_GATEWAY]) {
2126 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2127 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2128 }
86872cb5
TG
2129
2130 if (tb[RTA_DST]) {
2131 int plen = (rtm->rtm_dst_len + 7) >> 3;
2132
2133 if (nla_len(tb[RTA_DST]) < plen)
2134 goto errout;
2135
2136 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2137 }
86872cb5
TG
2138
2139 if (tb[RTA_SRC]) {
2140 int plen = (rtm->rtm_src_len + 7) >> 3;
2141
2142 if (nla_len(tb[RTA_SRC]) < plen)
2143 goto errout;
2144
2145 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2146 }
86872cb5
TG
2147
2148 if (tb[RTA_OIF])
2149 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2150
2151 if (tb[RTA_PRIORITY])
2152 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2153
2154 if (tb[RTA_METRICS]) {
2155 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2156 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2157 }
86872cb5
TG
2158
2159 if (tb[RTA_TABLE])
2160 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2161
2162 err = 0;
2163errout:
2164 return err;
1da177e4
LT
2165}
2166
c127ea2c 2167static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2168{
86872cb5
TG
2169 struct fib6_config cfg;
2170 int err;
1da177e4 2171
86872cb5
TG
2172 err = rtm_to_fib6_config(skb, nlh, &cfg);
2173 if (err < 0)
2174 return err;
2175
2176 return ip6_route_del(&cfg);
1da177e4
LT
2177}
2178
c127ea2c 2179static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2180{
86872cb5
TG
2181 struct fib6_config cfg;
2182 int err;
1da177e4 2183
86872cb5
TG
2184 err = rtm_to_fib6_config(skb, nlh, &cfg);
2185 if (err < 0)
2186 return err;
2187
2188 return ip6_route_add(&cfg);
1da177e4
LT
2189}
2190
339bf98f
TG
2191static inline size_t rt6_nlmsg_size(void)
2192{
2193 return NLMSG_ALIGN(sizeof(struct rtmsg))
2194 + nla_total_size(16) /* RTA_SRC */
2195 + nla_total_size(16) /* RTA_DST */
2196 + nla_total_size(16) /* RTA_GATEWAY */
2197 + nla_total_size(16) /* RTA_PREFSRC */
2198 + nla_total_size(4) /* RTA_TABLE */
2199 + nla_total_size(4) /* RTA_IIF */
2200 + nla_total_size(4) /* RTA_OIF */
2201 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2202 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2203 + nla_total_size(sizeof(struct rta_cacheinfo));
2204}
2205
191cd582
BH
2206static int rt6_fill_node(struct net *net,
2207 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2208 struct in6_addr *dst, struct in6_addr *src,
2209 int iif, int type, u32 pid, u32 seq,
7bc570c8 2210 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2211{
2212 struct rtmsg *rtm;
2d7202bf 2213 struct nlmsghdr *nlh;
e3703b3d 2214 long expires;
9e762a4a 2215 u32 table;
1da177e4
LT
2216
2217 if (prefix) { /* user wants prefix routes only */
2218 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2219 /* success since this is not a prefix route */
2220 return 1;
2221 }
2222 }
2223
2d7202bf
TG
2224 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2225 if (nlh == NULL)
26932566 2226 return -EMSGSIZE;
2d7202bf
TG
2227
2228 rtm = nlmsg_data(nlh);
1da177e4
LT
2229 rtm->rtm_family = AF_INET6;
2230 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2231 rtm->rtm_src_len = rt->rt6i_src.plen;
2232 rtm->rtm_tos = 0;
c71099ac 2233 if (rt->rt6i_table)
9e762a4a 2234 table = rt->rt6i_table->tb6_id;
c71099ac 2235 else
9e762a4a
PM
2236 table = RT6_TABLE_UNSPEC;
2237 rtm->rtm_table = table;
2d7202bf 2238 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2239 if (rt->rt6i_flags&RTF_REJECT)
2240 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2241 else if (rt->rt6i_flags&RTF_LOCAL)
2242 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2243 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2244 rtm->rtm_type = RTN_LOCAL;
2245 else
2246 rtm->rtm_type = RTN_UNICAST;
2247 rtm->rtm_flags = 0;
2248 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2249 rtm->rtm_protocol = rt->rt6i_protocol;
2250 if (rt->rt6i_flags&RTF_DYNAMIC)
2251 rtm->rtm_protocol = RTPROT_REDIRECT;
2252 else if (rt->rt6i_flags & RTF_ADDRCONF)
2253 rtm->rtm_protocol = RTPROT_KERNEL;
2254 else if (rt->rt6i_flags&RTF_DEFAULT)
2255 rtm->rtm_protocol = RTPROT_RA;
2256
2257 if (rt->rt6i_flags&RTF_CACHE)
2258 rtm->rtm_flags |= RTM_F_CLONED;
2259
2260 if (dst) {
2d7202bf 2261 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2262 rtm->rtm_dst_len = 128;
1da177e4 2263 } else if (rtm->rtm_dst_len)
2d7202bf 2264 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2265#ifdef CONFIG_IPV6_SUBTREES
2266 if (src) {
2d7202bf 2267 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2268 rtm->rtm_src_len = 128;
1da177e4 2269 } else if (rtm->rtm_src_len)
2d7202bf 2270 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2271#endif
7bc570c8
YH
2272 if (iif) {
2273#ifdef CONFIG_IPV6_MROUTE
2274 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2275 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2276 if (err <= 0) {
2277 if (!nowait) {
2278 if (err == 0)
2279 return 0;
2280 goto nla_put_failure;
2281 } else {
2282 if (err == -EMSGSIZE)
2283 goto nla_put_failure;
2284 }
2285 }
2286 } else
2287#endif
2288 NLA_PUT_U32(skb, RTA_IIF, iif);
2289 } else if (dst) {
d8d1f30b 2290 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
1da177e4 2291 struct in6_addr saddr_buf;
191cd582 2292 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2293 dst, 0, &saddr_buf) == 0)
2d7202bf 2294 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2295 }
2d7202bf 2296
d8d1f30b 2297 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
2d7202bf
TG
2298 goto nla_put_failure;
2299
d8d1f30b
CG
2300 if (rt->dst.neighbour)
2301 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2d7202bf 2302
d8d1f30b 2303 if (rt->dst.dev)
2d7202bf
TG
2304 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2305
2306 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2307
36e3deae
YH
2308 if (!(rt->rt6i_flags & RTF_EXPIRES))
2309 expires = 0;
2310 else if (rt->rt6i_expires - jiffies < INT_MAX)
2311 expires = rt->rt6i_expires - jiffies;
2312 else
2313 expires = INT_MAX;
69cdf8f9 2314
d8d1f30b
CG
2315 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2316 expires, rt->dst.error) < 0)
e3703b3d 2317 goto nla_put_failure;
2d7202bf
TG
2318
2319 return nlmsg_end(skb, nlh);
2320
2321nla_put_failure:
26932566
PM
2322 nlmsg_cancel(skb, nlh);
2323 return -EMSGSIZE;
1da177e4
LT
2324}
2325
1b43af54 2326int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2327{
2328 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2329 int prefix;
2330
2d7202bf
TG
2331 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2332 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2333 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2334 } else
2335 prefix = 0;
2336
191cd582
BH
2337 return rt6_fill_node(arg->net,
2338 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2339 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2340 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2341}
2342
c127ea2c 2343static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2344{
3b1e0a65 2345 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2346 struct nlattr *tb[RTA_MAX+1];
2347 struct rt6_info *rt;
1da177e4 2348 struct sk_buff *skb;
ab364a6f 2349 struct rtmsg *rtm;
1da177e4 2350 struct flowi fl;
ab364a6f 2351 int err, iif = 0;
1da177e4 2352
ab364a6f
TG
2353 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2354 if (err < 0)
2355 goto errout;
1da177e4 2356
ab364a6f 2357 err = -EINVAL;
1da177e4 2358 memset(&fl, 0, sizeof(fl));
1da177e4 2359
ab364a6f
TG
2360 if (tb[RTA_SRC]) {
2361 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2362 goto errout;
2363
2364 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2365 }
2366
2367 if (tb[RTA_DST]) {
2368 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2369 goto errout;
2370
2371 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2372 }
2373
2374 if (tb[RTA_IIF])
2375 iif = nla_get_u32(tb[RTA_IIF]);
2376
2377 if (tb[RTA_OIF])
2378 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2379
2380 if (iif) {
2381 struct net_device *dev;
5578689a 2382 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2383 if (!dev) {
2384 err = -ENODEV;
ab364a6f 2385 goto errout;
1da177e4
LT
2386 }
2387 }
2388
ab364a6f
TG
2389 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2390 if (skb == NULL) {
2391 err = -ENOBUFS;
2392 goto errout;
2393 }
1da177e4 2394
ab364a6f
TG
2395 /* Reserve room for dummy headers, this skb can pass
2396 through good chunk of routing engine.
2397 */
459a98ed 2398 skb_reset_mac_header(skb);
ab364a6f 2399 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2400
8a3edd80 2401 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
d8d1f30b 2402 skb_dst_set(skb, &rt->dst);
1da177e4 2403
191cd582 2404 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2405 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2406 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2407 if (err < 0) {
ab364a6f
TG
2408 kfree_skb(skb);
2409 goto errout;
1da177e4
LT
2410 }
2411
5578689a 2412 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2413errout:
1da177e4 2414 return err;
1da177e4
LT
2415}
2416
86872cb5 2417void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2418{
2419 struct sk_buff *skb;
5578689a 2420 struct net *net = info->nl_net;
528c4ceb
DL
2421 u32 seq;
2422 int err;
2423
2424 err = -ENOBUFS;
2425 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2426
339bf98f 2427 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2428 if (skb == NULL)
2429 goto errout;
2430
191cd582 2431 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2432 event, info->pid, seq, 0, 0, 0);
26932566
PM
2433 if (err < 0) {
2434 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2435 WARN_ON(err == -EMSGSIZE);
2436 kfree_skb(skb);
2437 goto errout;
2438 }
1ce85fe4
PNA
2439 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2440 info->nlh, gfp_any());
2441 return;
21713ebc
TG
2442errout:
2443 if (err < 0)
5578689a 2444 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2445}
2446
8ed67789
DL
2447static int ip6_route_dev_notify(struct notifier_block *this,
2448 unsigned long event, void *data)
2449{
2450 struct net_device *dev = (struct net_device *)data;
c346dca1 2451 struct net *net = dev_net(dev);
8ed67789
DL
2452
2453 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2454 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2455 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2456#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2457 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2458 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2459 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2460 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2461#endif
2462 }
2463
2464 return NOTIFY_OK;
2465}
2466
1da177e4
LT
2467/*
2468 * /proc
2469 */
2470
2471#ifdef CONFIG_PROC_FS
2472
2473#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2474
2475struct rt6_proc_arg
2476{
2477 char *buffer;
2478 int offset;
2479 int length;
2480 int skip;
2481 int len;
2482};
2483
2484static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2485{
33120b30 2486 struct seq_file *m = p_arg;
1da177e4 2487
4b7a4274 2488 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2489
2490#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2491 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2492#else
33120b30 2493 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2494#endif
2495
2496 if (rt->rt6i_nexthop) {
4b7a4274 2497 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2498 } else {
33120b30 2499 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2500 }
33120b30 2501 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2502 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2503 rt->dst.__use, rt->rt6i_flags,
33120b30 2504 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2505 return 0;
2506}
2507
33120b30 2508static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2509{
f3db4851
DL
2510 struct net *net = (struct net *)m->private;
2511 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2512 return 0;
2513}
1da177e4 2514
33120b30
AD
2515static int ipv6_route_open(struct inode *inode, struct file *file)
2516{
de05c557 2517 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2518}
2519
33120b30
AD
2520static const struct file_operations ipv6_route_proc_fops = {
2521 .owner = THIS_MODULE,
2522 .open = ipv6_route_open,
2523 .read = seq_read,
2524 .llseek = seq_lseek,
b6fcbdb4 2525 .release = single_release_net,
33120b30
AD
2526};
2527
1da177e4
LT
2528static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2529{
69ddb805 2530 struct net *net = (struct net *)seq->private;
1da177e4 2531 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2532 net->ipv6.rt6_stats->fib_nodes,
2533 net->ipv6.rt6_stats->fib_route_nodes,
2534 net->ipv6.rt6_stats->fib_rt_alloc,
2535 net->ipv6.rt6_stats->fib_rt_entries,
2536 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2537 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2538 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2539
2540 return 0;
2541}
2542
2543static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2544{
de05c557 2545 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2546}
2547
9a32144e 2548static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2549 .owner = THIS_MODULE,
2550 .open = rt6_stats_seq_open,
2551 .read = seq_read,
2552 .llseek = seq_lseek,
b6fcbdb4 2553 .release = single_release_net,
1da177e4
LT
2554};
2555#endif /* CONFIG_PROC_FS */
2556
2557#ifdef CONFIG_SYSCTL
2558
1da177e4 2559static
8d65af78 2560int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2561 void __user *buffer, size_t *lenp, loff_t *ppos)
2562{
5b7c931d
DL
2563 struct net *net = current->nsproxy->net_ns;
2564 int delay = net->ipv6.sysctl.flush_delay;
1da177e4 2565 if (write) {
8d65af78 2566 proc_dointvec(ctl, write, buffer, lenp, ppos);
5b7c931d 2567 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2568 return 0;
2569 } else
2570 return -EINVAL;
2571}
2572
760f2d01 2573ctl_table ipv6_route_table_template[] = {
1ab1457c 2574 {
1da177e4 2575 .procname = "flush",
4990509f 2576 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2577 .maxlen = sizeof(int),
89c8b3a1 2578 .mode = 0200,
6d9f239a 2579 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2580 },
2581 {
1da177e4 2582 .procname = "gc_thresh",
9a7ec3a9 2583 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2584 .maxlen = sizeof(int),
2585 .mode = 0644,
6d9f239a 2586 .proc_handler = proc_dointvec,
1da177e4
LT
2587 },
2588 {
1da177e4 2589 .procname = "max_size",
4990509f 2590 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2591 .maxlen = sizeof(int),
2592 .mode = 0644,
6d9f239a 2593 .proc_handler = proc_dointvec,
1da177e4
LT
2594 },
2595 {
1da177e4 2596 .procname = "gc_min_interval",
4990509f 2597 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2598 .maxlen = sizeof(int),
2599 .mode = 0644,
6d9f239a 2600 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2601 },
2602 {
1da177e4 2603 .procname = "gc_timeout",
4990509f 2604 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2605 .maxlen = sizeof(int),
2606 .mode = 0644,
6d9f239a 2607 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2608 },
2609 {
1da177e4 2610 .procname = "gc_interval",
4990509f 2611 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2612 .maxlen = sizeof(int),
2613 .mode = 0644,
6d9f239a 2614 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2615 },
2616 {
1da177e4 2617 .procname = "gc_elasticity",
4990509f 2618 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2619 .maxlen = sizeof(int),
2620 .mode = 0644,
f3d3f616 2621 .proc_handler = proc_dointvec,
1da177e4
LT
2622 },
2623 {
1da177e4 2624 .procname = "mtu_expires",
4990509f 2625 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2626 .maxlen = sizeof(int),
2627 .mode = 0644,
6d9f239a 2628 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2629 },
2630 {
1da177e4 2631 .procname = "min_adv_mss",
4990509f 2632 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2633 .maxlen = sizeof(int),
2634 .mode = 0644,
f3d3f616 2635 .proc_handler = proc_dointvec,
1da177e4
LT
2636 },
2637 {
1da177e4 2638 .procname = "gc_min_interval_ms",
4990509f 2639 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2640 .maxlen = sizeof(int),
2641 .mode = 0644,
6d9f239a 2642 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2643 },
f8572d8f 2644 { }
1da177e4
LT
2645};
2646
2c8c1e72 2647struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2648{
2649 struct ctl_table *table;
2650
2651 table = kmemdup(ipv6_route_table_template,
2652 sizeof(ipv6_route_table_template),
2653 GFP_KERNEL);
5ee09105
YH
2654
2655 if (table) {
2656 table[0].data = &net->ipv6.sysctl.flush_delay;
86393e52 2657 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2658 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2659 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2660 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2661 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2662 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2663 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2664 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2665 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2666 }
2667
760f2d01
DL
2668 return table;
2669}
1da177e4
LT
2670#endif
2671
2c8c1e72 2672static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2673{
633d424b 2674 int ret = -ENOMEM;
8ed67789 2675
86393e52
AD
2676 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2677 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2678
fc66f95c
ED
2679 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2680 goto out_ip6_dst_ops;
2681
8ed67789
DL
2682 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2683 sizeof(*net->ipv6.ip6_null_entry),
2684 GFP_KERNEL);
2685 if (!net->ipv6.ip6_null_entry)
fc66f95c 2686 goto out_ip6_dst_entries;
d8d1f30b 2687 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2688 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2689 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2690
2691#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2692 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2693 sizeof(*net->ipv6.ip6_prohibit_entry),
2694 GFP_KERNEL);
68fffc67
PZ
2695 if (!net->ipv6.ip6_prohibit_entry)
2696 goto out_ip6_null_entry;
d8d1f30b 2697 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2698 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2699 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2700
2701 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2702 sizeof(*net->ipv6.ip6_blk_hole_entry),
2703 GFP_KERNEL);
68fffc67
PZ
2704 if (!net->ipv6.ip6_blk_hole_entry)
2705 goto out_ip6_prohibit_entry;
d8d1f30b 2706 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2707 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2708 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2709#endif
2710
b339a47c
PZ
2711 net->ipv6.sysctl.flush_delay = 0;
2712 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2713 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2714 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2715 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2716 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2717 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2718 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2719
cdb18761
DL
2720#ifdef CONFIG_PROC_FS
2721 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2722 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2723#endif
6891a346
BT
2724 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2725
8ed67789
DL
2726 ret = 0;
2727out:
2728 return ret;
f2fc6a54 2729
68fffc67
PZ
2730#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2731out_ip6_prohibit_entry:
2732 kfree(net->ipv6.ip6_prohibit_entry);
2733out_ip6_null_entry:
2734 kfree(net->ipv6.ip6_null_entry);
2735#endif
fc66f95c
ED
2736out_ip6_dst_entries:
2737 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2738out_ip6_dst_ops:
f2fc6a54 2739 goto out;
cdb18761
DL
2740}
2741
2c8c1e72 2742static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2743{
2744#ifdef CONFIG_PROC_FS
2745 proc_net_remove(net, "ipv6_route");
2746 proc_net_remove(net, "rt6_stats");
2747#endif
8ed67789
DL
2748 kfree(net->ipv6.ip6_null_entry);
2749#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2750 kfree(net->ipv6.ip6_prohibit_entry);
2751 kfree(net->ipv6.ip6_blk_hole_entry);
2752#endif
41bb78b4 2753 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2754}
2755
2756static struct pernet_operations ip6_route_net_ops = {
2757 .init = ip6_route_net_init,
2758 .exit = ip6_route_net_exit,
2759};
2760
8ed67789
DL
2761static struct notifier_block ip6_route_dev_notifier = {
2762 .notifier_call = ip6_route_dev_notify,
2763 .priority = 0,
2764};
2765
433d49c3 2766int __init ip6_route_init(void)
1da177e4 2767{
433d49c3
DL
2768 int ret;
2769
9a7ec3a9
DL
2770 ret = -ENOMEM;
2771 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2772 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2773 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2774 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2775 goto out;
14e50e57 2776
fc66f95c 2777 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2778 if (ret)
bdb3289f 2779 goto out_kmem_cache;
bdb3289f 2780
fc66f95c
ED
2781 ret = register_pernet_subsys(&ip6_route_net_ops);
2782 if (ret)
2783 goto out_dst_entries;
2784
5dc121e9
AE
2785 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2786
8ed67789
DL
2787 /* Registering of the loopback is done before this portion of code,
2788 * the loopback reference in rt6_info will not be taken, do it
2789 * manually for init_net */
d8d1f30b 2790 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2791 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2792 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2793 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2794 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2795 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2796 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2797 #endif
433d49c3
DL
2798 ret = fib6_init();
2799 if (ret)
8ed67789 2800 goto out_register_subsys;
433d49c3 2801
433d49c3
DL
2802 ret = xfrm6_init();
2803 if (ret)
cdb18761 2804 goto out_fib6_init;
c35b7e72 2805
433d49c3
DL
2806 ret = fib6_rules_init();
2807 if (ret)
2808 goto xfrm6_init;
7e5449c2 2809
433d49c3
DL
2810 ret = -ENOBUFS;
2811 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2812 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2813 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2814 goto fib6_rules_init;
c127ea2c 2815
8ed67789 2816 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2817 if (ret)
2818 goto fib6_rules_init;
8ed67789 2819
433d49c3
DL
2820out:
2821 return ret;
2822
2823fib6_rules_init:
433d49c3
DL
2824 fib6_rules_cleanup();
2825xfrm6_init:
433d49c3 2826 xfrm6_fini();
433d49c3 2827out_fib6_init:
433d49c3 2828 fib6_gc_cleanup();
8ed67789
DL
2829out_register_subsys:
2830 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2831out_dst_entries:
2832 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2833out_kmem_cache:
f2fc6a54 2834 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2835 goto out;
1da177e4
LT
2836}
2837
2838void ip6_route_cleanup(void)
2839{
8ed67789 2840 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2841 fib6_rules_cleanup();
1da177e4 2842 xfrm6_fini();
1da177e4 2843 fib6_gc_cleanup();
8ed67789 2844 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2845 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2846 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2847}