[IPV6]: Add ip6_local_out
[linux-2.6-block.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4
LT
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
c0bece9f
YH
25 * Ville Nuorvala
26 * Fixed routing subtrees.
1da177e4
LT
27 */
28
4fc268d2 29#include <linux/capability.h>
1da177e4
LT
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
457c4cbc 43#include <net/net_namespace.h>
1da177e4
LT
44#include <net/snmp.h>
45#include <net/ipv6.h>
46#include <net/ip6_fib.h>
47#include <net/ip6_route.h>
48#include <net/ndisc.h>
49#include <net/addrconf.h>
50#include <net/tcp.h>
51#include <linux/rtnetlink.h>
52#include <net/dst.h>
53#include <net/xfrm.h>
8d71740c 54#include <net/netevent.h>
21713ebc 55#include <net/netlink.h>
1da177e4
LT
56
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
519fbd87 74#define CLONE_OFFLINK_ROUTE 0
1da177e4
LT
75
76static int ip6_rt_max_size = 4096;
77static int ip6_rt_gc_min_interval = HZ / 2;
78static int ip6_rt_gc_timeout = 60*HZ;
79int ip6_rt_gc_interval = 30*HZ;
80static int ip6_rt_gc_elasticity = 9;
81static int ip6_rt_mtu_expires = 10*60*HZ;
82static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83
84static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87static void ip6_dst_destroy(struct dst_entry *);
88static void ip6_dst_ifdown(struct dst_entry *,
89 struct net_device *dev, int how);
90static int ip6_dst_gc(void);
91
92static int ip6_pkt_discard(struct sk_buff *skb);
93static int ip6_pkt_discard_out(struct sk_buff *skb);
94static void ip6_link_failure(struct sk_buff *skb);
95static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96
70ceb4f5
YH
97#ifdef CONFIG_IPV6_ROUTE_INFO
98static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
99 struct in6_addr *gwaddr, int ifindex,
100 unsigned pref);
101static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
102 struct in6_addr *gwaddr, int ifindex);
103#endif
104
1da177e4
LT
105static struct dst_ops ip6_dst_ops = {
106 .family = AF_INET6,
107 .protocol = __constant_htons(ETH_P_IPV6),
108 .gc = ip6_dst_gc,
109 .gc_thresh = 1024,
110 .check = ip6_dst_check,
111 .destroy = ip6_dst_destroy,
112 .ifdown = ip6_dst_ifdown,
113 .negative_advice = ip6_negative_advice,
114 .link_failure = ip6_link_failure,
115 .update_pmtu = ip6_rt_update_pmtu,
116 .entry_size = sizeof(struct rt6_info),
117};
118
14e50e57
DM
119static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
120{
121}
122
123static struct dst_ops ip6_dst_blackhole_ops = {
124 .family = AF_INET6,
125 .protocol = __constant_htons(ETH_P_IPV6),
126 .destroy = ip6_dst_destroy,
127 .check = ip6_dst_check,
128 .update_pmtu = ip6_rt_blackhole_update_pmtu,
129 .entry_size = sizeof(struct rt6_info),
130};
131
1da177e4
LT
132struct rt6_info ip6_null_entry = {
133 .u = {
134 .dst = {
135 .__refcnt = ATOMIC_INIT(1),
136 .__use = 1,
1da177e4
LT
137 .obsolete = -1,
138 .error = -ENETUNREACH,
139 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
140 .input = ip6_pkt_discard,
141 .output = ip6_pkt_discard_out,
142 .ops = &ip6_dst_ops,
143 .path = (struct dst_entry*)&ip6_null_entry,
144 }
145 },
146 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
147 .rt6i_metric = ~(u32) 0,
148 .rt6i_ref = ATOMIC_INIT(1),
149};
150
101367c2
TG
151#ifdef CONFIG_IPV6_MULTIPLE_TABLES
152
6723ab54
DM
153static int ip6_pkt_prohibit(struct sk_buff *skb);
154static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 155
101367c2
TG
156struct rt6_info ip6_prohibit_entry = {
157 .u = {
158 .dst = {
159 .__refcnt = ATOMIC_INIT(1),
160 .__use = 1,
101367c2
TG
161 .obsolete = -1,
162 .error = -EACCES,
163 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
164 .input = ip6_pkt_prohibit,
165 .output = ip6_pkt_prohibit_out,
101367c2
TG
166 .ops = &ip6_dst_ops,
167 .path = (struct dst_entry*)&ip6_prohibit_entry,
168 }
169 },
170 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
171 .rt6i_metric = ~(u32) 0,
172 .rt6i_ref = ATOMIC_INIT(1),
173};
174
175struct rt6_info ip6_blk_hole_entry = {
176 .u = {
177 .dst = {
178 .__refcnt = ATOMIC_INIT(1),
179 .__use = 1,
101367c2
TG
180 .obsolete = -1,
181 .error = -EINVAL,
182 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
352e512c
HX
183 .input = dst_discard,
184 .output = dst_discard,
101367c2
TG
185 .ops = &ip6_dst_ops,
186 .path = (struct dst_entry*)&ip6_blk_hole_entry,
187 }
188 },
189 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
190 .rt6i_metric = ~(u32) 0,
191 .rt6i_ref = ATOMIC_INIT(1),
192};
193
194#endif
195
1da177e4
LT
196/* allocate dst with ip6_dst_ops */
197static __inline__ struct rt6_info *ip6_dst_alloc(void)
198{
199 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
200}
201
202static void ip6_dst_destroy(struct dst_entry *dst)
203{
204 struct rt6_info *rt = (struct rt6_info *)dst;
205 struct inet6_dev *idev = rt->rt6i_idev;
206
207 if (idev != NULL) {
208 rt->rt6i_idev = NULL;
209 in6_dev_put(idev);
1ab1457c 210 }
1da177e4
LT
211}
212
213static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
214 int how)
215{
216 struct rt6_info *rt = (struct rt6_info *)dst;
217 struct inet6_dev *idev = rt->rt6i_idev;
218
2774c7ab
EB
219 if (dev != init_net.loopback_dev && idev != NULL && idev->dev == dev) {
220 struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev);
1da177e4
LT
221 if (loopback_idev != NULL) {
222 rt->rt6i_idev = loopback_idev;
223 in6_dev_put(idev);
224 }
225 }
226}
227
228static __inline__ int rt6_check_expired(const struct rt6_info *rt)
229{
230 return (rt->rt6i_flags & RTF_EXPIRES &&
231 time_after(jiffies, rt->rt6i_expires));
232}
233
c71099ac
TG
234static inline int rt6_need_strict(struct in6_addr *daddr)
235{
236 return (ipv6_addr_type(daddr) &
237 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
238}
239
1da177e4 240/*
c71099ac 241 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
242 */
243
244static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
245 int oif,
246 int strict)
247{
248 struct rt6_info *local = NULL;
249 struct rt6_info *sprt;
250
251 if (oif) {
7cc48263 252 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
1da177e4
LT
253 struct net_device *dev = sprt->rt6i_dev;
254 if (dev->ifindex == oif)
255 return sprt;
256 if (dev->flags & IFF_LOOPBACK) {
257 if (sprt->rt6i_idev == NULL ||
258 sprt->rt6i_idev->dev->ifindex != oif) {
259 if (strict && oif)
260 continue;
1ab1457c 261 if (local && (!oif ||
1da177e4
LT
262 local->rt6i_idev->dev->ifindex == oif))
263 continue;
264 }
265 local = sprt;
266 }
267 }
268
269 if (local)
270 return local;
271
272 if (strict)
273 return &ip6_null_entry;
274 }
275 return rt;
276}
277
27097255
YH
278#ifdef CONFIG_IPV6_ROUTER_PREF
279static void rt6_probe(struct rt6_info *rt)
280{
281 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
282 /*
283 * Okay, this does not seem to be appropriate
284 * for now, however, we need to check if it
285 * is really so; aka Router Reachability Probing.
286 *
287 * Router Reachability Probe MUST be rate-limited
288 * to no more than one per minute.
289 */
290 if (!neigh || (neigh->nud_state & NUD_VALID))
291 return;
292 read_lock_bh(&neigh->lock);
293 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 294 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
295 struct in6_addr mcaddr;
296 struct in6_addr *target;
297
298 neigh->updated = jiffies;
299 read_unlock_bh(&neigh->lock);
300
301 target = (struct in6_addr *)&neigh->primary_key;
302 addrconf_addr_solict_mult(target, &mcaddr);
303 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
304 } else
305 read_unlock_bh(&neigh->lock);
306}
307#else
308static inline void rt6_probe(struct rt6_info *rt)
309{
310 return;
311}
312#endif
313
1da177e4 314/*
554cfb7e 315 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 316 */
b6f99a21 317static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
318{
319 struct net_device *dev = rt->rt6i_dev;
161980f4 320 if (!oif || dev->ifindex == oif)
554cfb7e 321 return 2;
161980f4
DM
322 if ((dev->flags & IFF_LOOPBACK) &&
323 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
324 return 1;
325 return 0;
554cfb7e 326}
1da177e4 327
b6f99a21 328static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 329{
554cfb7e 330 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 331 int m;
4d0c5911
YH
332 if (rt->rt6i_flags & RTF_NONEXTHOP ||
333 !(rt->rt6i_flags & RTF_GATEWAY))
334 m = 1;
335 else if (neigh) {
554cfb7e
YH
336 read_lock_bh(&neigh->lock);
337 if (neigh->nud_state & NUD_VALID)
4d0c5911 338 m = 2;
398bcbeb
YH
339#ifdef CONFIG_IPV6_ROUTER_PREF
340 else if (neigh->nud_state & NUD_FAILED)
341 m = 0;
342#endif
343 else
ea73ee23 344 m = 1;
554cfb7e 345 read_unlock_bh(&neigh->lock);
398bcbeb
YH
346 } else
347 m = 0;
554cfb7e 348 return m;
1da177e4
LT
349}
350
554cfb7e
YH
351static int rt6_score_route(struct rt6_info *rt, int oif,
352 int strict)
1da177e4 353{
4d0c5911 354 int m, n;
1ab1457c 355
4d0c5911 356 m = rt6_check_dev(rt, oif);
77d16f45 357 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 358 return -1;
ebacaaa0
YH
359#ifdef CONFIG_IPV6_ROUTER_PREF
360 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
361#endif
4d0c5911 362 n = rt6_check_neigh(rt);
557e92ef 363 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
364 return -1;
365 return m;
366}
367
f11e6659
DM
368static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
369 int *mpri, struct rt6_info *match)
554cfb7e 370{
f11e6659
DM
371 int m;
372
373 if (rt6_check_expired(rt))
374 goto out;
375
376 m = rt6_score_route(rt, oif, strict);
377 if (m < 0)
378 goto out;
379
380 if (m > *mpri) {
381 if (strict & RT6_LOOKUP_F_REACHABLE)
382 rt6_probe(match);
383 *mpri = m;
384 match = rt;
385 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
386 rt6_probe(rt);
387 }
388
389out:
390 return match;
391}
392
393static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
394 struct rt6_info *rr_head,
395 u32 metric, int oif, int strict)
396{
397 struct rt6_info *rt, *match;
554cfb7e 398 int mpri = -1;
1da177e4 399
f11e6659
DM
400 match = NULL;
401 for (rt = rr_head; rt && rt->rt6i_metric == metric;
402 rt = rt->u.dst.rt6_next)
403 match = find_match(rt, oif, strict, &mpri, match);
404 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
405 rt = rt->u.dst.rt6_next)
406 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 407
f11e6659
DM
408 return match;
409}
1da177e4 410
f11e6659
DM
411static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
412{
413 struct rt6_info *match, *rt0;
1da177e4 414
f11e6659
DM
415 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
416 __FUNCTION__, fn->leaf, oif);
554cfb7e 417
f11e6659
DM
418 rt0 = fn->rr_ptr;
419 if (!rt0)
420 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 421
f11e6659 422 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 423
554cfb7e 424 if (!match &&
f11e6659
DM
425 (strict & RT6_LOOKUP_F_REACHABLE)) {
426 struct rt6_info *next = rt0->u.dst.rt6_next;
427
554cfb7e 428 /* no entries matched; do round-robin */
f11e6659
DM
429 if (!next || next->rt6i_metric != rt0->rt6i_metric)
430 next = fn->leaf;
431
432 if (next != rt0)
433 fn->rr_ptr = next;
1da177e4 434 }
1da177e4 435
f11e6659
DM
436 RT6_TRACE("%s() => %p\n",
437 __FUNCTION__, match);
1da177e4 438
554cfb7e 439 return (match ? match : &ip6_null_entry);
1da177e4
LT
440}
441
70ceb4f5
YH
442#ifdef CONFIG_IPV6_ROUTE_INFO
443int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
444 struct in6_addr *gwaddr)
445{
446 struct route_info *rinfo = (struct route_info *) opt;
447 struct in6_addr prefix_buf, *prefix;
448 unsigned int pref;
449 u32 lifetime;
450 struct rt6_info *rt;
451
452 if (len < sizeof(struct route_info)) {
453 return -EINVAL;
454 }
455
456 /* Sanity check for prefix_len and length */
457 if (rinfo->length > 3) {
458 return -EINVAL;
459 } else if (rinfo->prefix_len > 128) {
460 return -EINVAL;
461 } else if (rinfo->prefix_len > 64) {
462 if (rinfo->length < 2) {
463 return -EINVAL;
464 }
465 } else if (rinfo->prefix_len > 0) {
466 if (rinfo->length < 1) {
467 return -EINVAL;
468 }
469 }
470
471 pref = rinfo->route_pref;
472 if (pref == ICMPV6_ROUTER_PREF_INVALID)
473 pref = ICMPV6_ROUTER_PREF_MEDIUM;
474
e69a4adc 475 lifetime = ntohl(rinfo->lifetime);
70ceb4f5
YH
476 if (lifetime == 0xffffffff) {
477 /* infinity */
478 } else if (lifetime > 0x7fffffff/HZ) {
479 /* Avoid arithmetic overflow */
480 lifetime = 0x7fffffff/HZ - 1;
481 }
482
483 if (rinfo->length == 3)
484 prefix = (struct in6_addr *)rinfo->prefix;
485 else {
486 /* this function is safe */
487 ipv6_addr_prefix(&prefix_buf,
488 (struct in6_addr *)rinfo->prefix,
489 rinfo->prefix_len);
490 prefix = &prefix_buf;
491 }
492
493 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
494
495 if (rt && !lifetime) {
e0a1ad73 496 ip6_del_rt(rt);
70ceb4f5
YH
497 rt = NULL;
498 }
499
500 if (!rt && lifetime)
501 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
502 pref);
503 else if (rt)
504 rt->rt6i_flags = RTF_ROUTEINFO |
505 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
506
507 if (rt) {
508 if (lifetime == 0xffffffff) {
509 rt->rt6i_flags &= ~RTF_EXPIRES;
510 } else {
511 rt->rt6i_expires = jiffies + HZ * lifetime;
512 rt->rt6i_flags |= RTF_EXPIRES;
513 }
514 dst_release(&rt->u.dst);
515 }
516 return 0;
517}
518#endif
519
982f56f3
YH
520#define BACKTRACK(saddr) \
521do { \
522 if (rt == &ip6_null_entry) { \
523 struct fib6_node *pn; \
e0eda7bb 524 while (1) { \
982f56f3
YH
525 if (fn->fn_flags & RTN_TL_ROOT) \
526 goto out; \
527 pn = fn->parent; \
528 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 529 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
530 else \
531 fn = pn; \
532 if (fn->fn_flags & RTN_RTINFO) \
533 goto restart; \
c71099ac 534 } \
c71099ac 535 } \
982f56f3 536} while(0)
c71099ac
TG
537
538static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
539 struct flowi *fl, int flags)
1da177e4
LT
540{
541 struct fib6_node *fn;
542 struct rt6_info *rt;
543
c71099ac
TG
544 read_lock_bh(&table->tb6_lock);
545 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
546restart:
547 rt = fn->leaf;
77d16f45 548 rt = rt6_device_match(rt, fl->oif, flags);
982f56f3 549 BACKTRACK(&fl->fl6_src);
c71099ac 550out:
03f49f34 551 dst_use(&rt->u.dst, jiffies);
c71099ac 552 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
553 return rt;
554
555}
556
557struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
558 int oif, int strict)
559{
560 struct flowi fl = {
561 .oif = oif,
562 .nl_u = {
563 .ip6_u = {
564 .daddr = *daddr,
c71099ac
TG
565 },
566 },
567 };
568 struct dst_entry *dst;
77d16f45 569 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 570
adaa70bb
TG
571 if (saddr) {
572 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
573 flags |= RT6_LOOKUP_F_HAS_SADDR;
574 }
575
c71099ac
TG
576 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
577 if (dst->error == 0)
578 return (struct rt6_info *) dst;
579
580 dst_release(dst);
581
1da177e4
LT
582 return NULL;
583}
584
7159039a
YH
585EXPORT_SYMBOL(rt6_lookup);
586
c71099ac 587/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
588 It takes new route entry, the addition fails by any reason the
589 route is freed. In any case, if caller does not hold it, it may
590 be destroyed.
591 */
592
86872cb5 593static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
594{
595 int err;
c71099ac 596 struct fib6_table *table;
1da177e4 597
c71099ac
TG
598 table = rt->rt6i_table;
599 write_lock_bh(&table->tb6_lock);
86872cb5 600 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 601 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
602
603 return err;
604}
605
40e22e8f
TG
606int ip6_ins_rt(struct rt6_info *rt)
607{
86872cb5 608 return __ip6_ins_rt(rt, NULL);
40e22e8f
TG
609}
610
95a9a5ba
YH
611static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
612 struct in6_addr *saddr)
1da177e4 613{
1da177e4
LT
614 struct rt6_info *rt;
615
616 /*
617 * Clone the route.
618 */
619
620 rt = ip6_rt_copy(ort);
621
622 if (rt) {
58c4fb86
YH
623 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
624 if (rt->rt6i_dst.plen != 128 &&
625 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
626 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 627 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 628 }
1da177e4 629
58c4fb86 630 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
631 rt->rt6i_dst.plen = 128;
632 rt->rt6i_flags |= RTF_CACHE;
633 rt->u.dst.flags |= DST_HOST;
634
635#ifdef CONFIG_IPV6_SUBTREES
636 if (rt->rt6i_src.plen && saddr) {
637 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
638 rt->rt6i_src.plen = 128;
639 }
640#endif
641
642 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
643
95a9a5ba 644 }
1da177e4 645
95a9a5ba
YH
646 return rt;
647}
1da177e4 648
299d9939
YH
649static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
650{
651 struct rt6_info *rt = ip6_rt_copy(ort);
652 if (rt) {
653 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
654 rt->rt6i_dst.plen = 128;
655 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
656 rt->u.dst.flags |= DST_HOST;
657 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
658 }
659 return rt;
660}
661
4acad72d 662static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
8ce11e6a 663 struct flowi *fl, int flags)
1da177e4
LT
664{
665 struct fib6_node *fn;
519fbd87 666 struct rt6_info *rt, *nrt;
c71099ac 667 int strict = 0;
1da177e4 668 int attempts = 3;
519fbd87 669 int err;
ea659e07 670 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 671
77d16f45 672 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
673
674relookup:
c71099ac 675 read_lock_bh(&table->tb6_lock);
1da177e4 676
8238dd06 677restart_2:
c71099ac 678 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
679
680restart:
4acad72d 681 rt = rt6_select(fn, oif, strict | reachable);
982f56f3 682 BACKTRACK(&fl->fl6_src);
8238dd06
YH
683 if (rt == &ip6_null_entry ||
684 rt->rt6i_flags & RTF_CACHE)
1ddef044 685 goto out;
1da177e4 686
fb9de91e 687 dst_hold(&rt->u.dst);
c71099ac 688 read_unlock_bh(&table->tb6_lock);
fb9de91e 689
519fbd87 690 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 691 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
692 else {
693#if CLONE_OFFLINK_ROUTE
c71099ac 694 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
695#else
696 goto out2;
697#endif
698 }
e40cf353 699
519fbd87
YH
700 dst_release(&rt->u.dst);
701 rt = nrt ? : &ip6_null_entry;
1da177e4 702
519fbd87
YH
703 dst_hold(&rt->u.dst);
704 if (nrt) {
40e22e8f 705 err = ip6_ins_rt(nrt);
519fbd87 706 if (!err)
1da177e4 707 goto out2;
1da177e4 708 }
1da177e4 709
519fbd87
YH
710 if (--attempts <= 0)
711 goto out2;
712
713 /*
c71099ac 714 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
715 * released someone could insert this route. Relookup.
716 */
717 dst_release(&rt->u.dst);
718 goto relookup;
719
720out:
8238dd06
YH
721 if (reachable) {
722 reachable = 0;
723 goto restart_2;
724 }
519fbd87 725 dst_hold(&rt->u.dst);
c71099ac 726 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
727out2:
728 rt->u.dst.lastuse = jiffies;
729 rt->u.dst.__use++;
c71099ac
TG
730
731 return rt;
1da177e4
LT
732}
733
4acad72d
PE
734static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
735 struct flowi *fl, int flags)
736{
737 return ip6_pol_route(table, fl->iif, fl, flags);
738}
739
c71099ac
TG
740void ip6_route_input(struct sk_buff *skb)
741{
0660e03f 742 struct ipv6hdr *iph = ipv6_hdr(skb);
adaa70bb 743 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
744 struct flowi fl = {
745 .iif = skb->dev->ifindex,
746 .nl_u = {
747 .ip6_u = {
748 .daddr = iph->daddr,
749 .saddr = iph->saddr,
90bcaf7b 750 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
c71099ac
TG
751 },
752 },
1ab1457c 753 .mark = skb->mark,
c71099ac
TG
754 .proto = iph->nexthdr,
755 };
adaa70bb
TG
756
757 if (rt6_need_strict(&iph->daddr))
758 flags |= RT6_LOOKUP_F_IFACE;
c71099ac
TG
759
760 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
761}
762
763static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
764 struct flowi *fl, int flags)
1da177e4 765{
4acad72d 766 return ip6_pol_route(table, fl->oif, fl, flags);
c71099ac
TG
767}
768
769struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
770{
771 int flags = 0;
772
773 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 774 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 775
adaa70bb
TG
776 if (!ipv6_addr_any(&fl->fl6_src))
777 flags |= RT6_LOOKUP_F_HAS_SADDR;
778
c71099ac 779 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
1da177e4
LT
780}
781
7159039a 782EXPORT_SYMBOL(ip6_route_output);
1da177e4 783
14e50e57
DM
784int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
785{
786 struct rt6_info *ort = (struct rt6_info *) *dstp;
787 struct rt6_info *rt = (struct rt6_info *)
788 dst_alloc(&ip6_dst_blackhole_ops);
789 struct dst_entry *new = NULL;
790
791 if (rt) {
792 new = &rt->u.dst;
793
794 atomic_set(&new->__refcnt, 1);
795 new->__use = 1;
352e512c
HX
796 new->input = dst_discard;
797 new->output = dst_discard;
14e50e57
DM
798
799 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
800 new->dev = ort->u.dst.dev;
801 if (new->dev)
802 dev_hold(new->dev);
803 rt->rt6i_idev = ort->rt6i_idev;
804 if (rt->rt6i_idev)
805 in6_dev_hold(rt->rt6i_idev);
806 rt->rt6i_expires = 0;
807
808 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
809 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
810 rt->rt6i_metric = 0;
811
812 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
813#ifdef CONFIG_IPV6_SUBTREES
814 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
815#endif
816
817 dst_free(new);
818 }
819
820 dst_release(*dstp);
821 *dstp = new;
822 return (new ? 0 : -ENOMEM);
823}
824EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
825
1da177e4
LT
826/*
827 * Destination cache support functions
828 */
829
830static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
831{
832 struct rt6_info *rt;
833
834 rt = (struct rt6_info *) dst;
835
836 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
837 return dst;
838
839 return NULL;
840}
841
842static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
843{
844 struct rt6_info *rt = (struct rt6_info *) dst;
845
846 if (rt) {
847 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 848 ip6_del_rt(rt);
1da177e4
LT
849 else
850 dst_release(dst);
851 }
852 return NULL;
853}
854
855static void ip6_link_failure(struct sk_buff *skb)
856{
857 struct rt6_info *rt;
858
859 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
860
861 rt = (struct rt6_info *) skb->dst;
862 if (rt) {
863 if (rt->rt6i_flags&RTF_CACHE) {
864 dst_set_expires(&rt->u.dst, 0);
865 rt->rt6i_flags |= RTF_EXPIRES;
866 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
867 rt->rt6i_node->fn_sernum = -1;
868 }
869}
870
871static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
872{
873 struct rt6_info *rt6 = (struct rt6_info*)dst;
874
875 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
876 rt6->rt6i_flags |= RTF_MODIFIED;
877 if (mtu < IPV6_MIN_MTU) {
878 mtu = IPV6_MIN_MTU;
879 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
880 }
881 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 882 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
883 }
884}
885
1da177e4
LT
886static int ipv6_get_mtu(struct net_device *dev);
887
888static inline unsigned int ipv6_advmss(unsigned int mtu)
889{
890 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
891
892 if (mtu < ip6_rt_min_advmss)
893 mtu = ip6_rt_min_advmss;
894
895 /*
1ab1457c
YH
896 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
897 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
898 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
899 * rely only on pmtu discovery"
900 */
901 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
902 mtu = IPV6_MAXPLEN;
903 return mtu;
904}
905
5d0bbeeb 906static struct dst_entry *ndisc_dst_gc_list;
8ce11e6a 907static DEFINE_SPINLOCK(ndisc_lock);
5d0bbeeb 908
1ab1457c 909struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
1da177e4
LT
910 struct neighbour *neigh,
911 struct in6_addr *addr,
912 int (*output)(struct sk_buff *))
913{
914 struct rt6_info *rt;
915 struct inet6_dev *idev = in6_dev_get(dev);
916
917 if (unlikely(idev == NULL))
918 return NULL;
919
920 rt = ip6_dst_alloc();
921 if (unlikely(rt == NULL)) {
922 in6_dev_put(idev);
923 goto out;
924 }
925
926 dev_hold(dev);
927 if (neigh)
928 neigh_hold(neigh);
929 else
930 neigh = ndisc_get_neigh(dev, addr);
931
932 rt->rt6i_dev = dev;
933 rt->rt6i_idev = idev;
934 rt->rt6i_nexthop = neigh;
935 atomic_set(&rt->u.dst.__refcnt, 1);
936 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
937 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
938 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
939 rt->u.dst.output = output;
940
941#if 0 /* there's no chance to use these for ndisc */
1ab1457c
YH
942 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
943 ? DST_HOST
1da177e4
LT
944 : 0;
945 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
946 rt->rt6i_dst.plen = 128;
947#endif
948
5d0bbeeb 949 spin_lock_bh(&ndisc_lock);
1da177e4
LT
950 rt->u.dst.next = ndisc_dst_gc_list;
951 ndisc_dst_gc_list = &rt->u.dst;
5d0bbeeb 952 spin_unlock_bh(&ndisc_lock);
1da177e4
LT
953
954 fib6_force_start_gc();
955
956out:
40aa7b90 957 return &rt->u.dst;
1da177e4
LT
958}
959
960int ndisc_dst_gc(int *more)
961{
962 struct dst_entry *dst, *next, **pprev;
963 int freed;
964
965 next = NULL;
1ab1457c 966 freed = 0;
5d0bbeeb
TG
967
968 spin_lock_bh(&ndisc_lock);
1da177e4 969 pprev = &ndisc_dst_gc_list;
5d0bbeeb 970
1da177e4
LT
971 while ((dst = *pprev) != NULL) {
972 if (!atomic_read(&dst->__refcnt)) {
973 *pprev = dst->next;
974 dst_free(dst);
975 freed++;
976 } else {
977 pprev = &dst->next;
978 (*more)++;
979 }
980 }
981
5d0bbeeb
TG
982 spin_unlock_bh(&ndisc_lock);
983
1da177e4
LT
984 return freed;
985}
986
987static int ip6_dst_gc(void)
988{
989 static unsigned expire = 30*HZ;
990 static unsigned long last_gc;
991 unsigned long now = jiffies;
992
993 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
994 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
995 goto out;
996
997 expire++;
998 fib6_run_gc(expire);
999 last_gc = now;
1000 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1001 expire = ip6_rt_gc_timeout>>1;
1002
1003out:
1004 expire -= expire>>ip6_rt_gc_elasticity;
1005 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1006}
1007
1008/* Clean host part of a prefix. Not necessary in radix tree,
1009 but results in cleaner routing tables.
1010
1011 Remove it only when all the things will work!
1012 */
1013
1014static int ipv6_get_mtu(struct net_device *dev)
1015{
1016 int mtu = IPV6_MIN_MTU;
1017 struct inet6_dev *idev;
1018
1019 idev = in6_dev_get(dev);
1020 if (idev) {
1021 mtu = idev->cnf.mtu6;
1022 in6_dev_put(idev);
1023 }
1024 return mtu;
1025}
1026
1027int ipv6_get_hoplimit(struct net_device *dev)
1028{
1029 int hoplimit = ipv6_devconf.hop_limit;
1030 struct inet6_dev *idev;
1031
1032 idev = in6_dev_get(dev);
1033 if (idev) {
1034 hoplimit = idev->cnf.hop_limit;
1035 in6_dev_put(idev);
1036 }
1037 return hoplimit;
1038}
1039
1040/*
1041 *
1042 */
1043
86872cb5 1044int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1045{
1046 int err;
1da177e4
LT
1047 struct rt6_info *rt = NULL;
1048 struct net_device *dev = NULL;
1049 struct inet6_dev *idev = NULL;
c71099ac 1050 struct fib6_table *table;
1da177e4
LT
1051 int addr_type;
1052
86872cb5 1053 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1054 return -EINVAL;
1055#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1056 if (cfg->fc_src_len)
1da177e4
LT
1057 return -EINVAL;
1058#endif
86872cb5 1059 if (cfg->fc_ifindex) {
1da177e4 1060 err = -ENODEV;
881d966b 1061 dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
1da177e4
LT
1062 if (!dev)
1063 goto out;
1064 idev = in6_dev_get(dev);
1065 if (!idev)
1066 goto out;
1067 }
1068
86872cb5
TG
1069 if (cfg->fc_metric == 0)
1070 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1071
86872cb5 1072 table = fib6_new_table(cfg->fc_table);
c71099ac
TG
1073 if (table == NULL) {
1074 err = -ENOBUFS;
1075 goto out;
1076 }
1077
1da177e4
LT
1078 rt = ip6_dst_alloc();
1079
1080 if (rt == NULL) {
1081 err = -ENOMEM;
1082 goto out;
1083 }
1084
1085 rt->u.dst.obsolete = -1;
86872cb5 1086 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1da177e4 1087
86872cb5
TG
1088 if (cfg->fc_protocol == RTPROT_UNSPEC)
1089 cfg->fc_protocol = RTPROT_BOOT;
1090 rt->rt6i_protocol = cfg->fc_protocol;
1091
1092 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1093
1094 if (addr_type & IPV6_ADDR_MULTICAST)
1095 rt->u.dst.input = ip6_mc_input;
1096 else
1097 rt->u.dst.input = ip6_forward;
1098
1099 rt->u.dst.output = ip6_output;
1100
86872cb5
TG
1101 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1102 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1103 if (rt->rt6i_dst.plen == 128)
1104 rt->u.dst.flags = DST_HOST;
1105
1106#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1107 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1108 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1109#endif
1110
86872cb5 1111 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1112
1113 /* We cannot add true routes via loopback here,
1114 they would result in kernel looping; promote them to reject routes
1115 */
86872cb5 1116 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1117 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1118 /* hold loopback dev/idev if we haven't done so. */
2774c7ab 1119 if (dev != init_net.loopback_dev) {
1da177e4
LT
1120 if (dev) {
1121 dev_put(dev);
1122 in6_dev_put(idev);
1123 }
2774c7ab 1124 dev = init_net.loopback_dev;
1da177e4
LT
1125 dev_hold(dev);
1126 idev = in6_dev_get(dev);
1127 if (!idev) {
1128 err = -ENODEV;
1129 goto out;
1130 }
1131 }
1132 rt->u.dst.output = ip6_pkt_discard_out;
1133 rt->u.dst.input = ip6_pkt_discard;
1134 rt->u.dst.error = -ENETUNREACH;
1135 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1136 goto install_route;
1137 }
1138
86872cb5 1139 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1140 struct in6_addr *gw_addr;
1141 int gwa_type;
1142
86872cb5
TG
1143 gw_addr = &cfg->fc_gateway;
1144 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1145 gwa_type = ipv6_addr_type(gw_addr);
1146
1147 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1148 struct rt6_info *grt;
1149
1150 /* IPv6 strictly inhibits using not link-local
1151 addresses as nexthop address.
1152 Otherwise, router will not able to send redirects.
1153 It is very good, but in some (rare!) circumstances
1154 (SIT, PtP, NBMA NOARP links) it is handy to allow
1155 some exceptions. --ANK
1156 */
1157 err = -EINVAL;
1158 if (!(gwa_type&IPV6_ADDR_UNICAST))
1159 goto out;
1160
86872cb5 1161 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1162
1163 err = -EHOSTUNREACH;
1164 if (grt == NULL)
1165 goto out;
1166 if (dev) {
1167 if (dev != grt->rt6i_dev) {
1168 dst_release(&grt->u.dst);
1169 goto out;
1170 }
1171 } else {
1172 dev = grt->rt6i_dev;
1173 idev = grt->rt6i_idev;
1174 dev_hold(dev);
1175 in6_dev_hold(grt->rt6i_idev);
1176 }
1177 if (!(grt->rt6i_flags&RTF_GATEWAY))
1178 err = 0;
1179 dst_release(&grt->u.dst);
1180
1181 if (err)
1182 goto out;
1183 }
1184 err = -EINVAL;
1185 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1186 goto out;
1187 }
1188
1189 err = -ENODEV;
1190 if (dev == NULL)
1191 goto out;
1192
86872cb5 1193 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1194 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1195 if (IS_ERR(rt->rt6i_nexthop)) {
1196 err = PTR_ERR(rt->rt6i_nexthop);
1197 rt->rt6i_nexthop = NULL;
1198 goto out;
1199 }
1200 }
1201
86872cb5 1202 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1203
1204install_route:
86872cb5
TG
1205 if (cfg->fc_mx) {
1206 struct nlattr *nla;
1207 int remaining;
1208
1209 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1210 int type = nla_type(nla);
86872cb5
TG
1211
1212 if (type) {
1213 if (type > RTAX_MAX) {
1da177e4
LT
1214 err = -EINVAL;
1215 goto out;
1216 }
86872cb5
TG
1217
1218 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1219 }
1da177e4
LT
1220 }
1221 }
1222
1223 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1224 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1225 if (!rt->u.dst.metrics[RTAX_MTU-1])
1226 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1227 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1228 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1229 rt->u.dst.dev = dev;
1230 rt->rt6i_idev = idev;
c71099ac 1231 rt->rt6i_table = table;
86872cb5 1232 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1233
1234out:
1235 if (dev)
1236 dev_put(dev);
1237 if (idev)
1238 in6_dev_put(idev);
1239 if (rt)
40aa7b90 1240 dst_free(&rt->u.dst);
1da177e4
LT
1241 return err;
1242}
1243
86872cb5 1244static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1245{
1246 int err;
c71099ac 1247 struct fib6_table *table;
1da177e4 1248
6c813a72
PM
1249 if (rt == &ip6_null_entry)
1250 return -ENOENT;
1251
c71099ac
TG
1252 table = rt->rt6i_table;
1253 write_lock_bh(&table->tb6_lock);
1da177e4 1254
86872cb5 1255 err = fib6_del(rt, info);
1da177e4
LT
1256 dst_release(&rt->u.dst);
1257
c71099ac 1258 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1259
1260 return err;
1261}
1262
e0a1ad73
TG
1263int ip6_del_rt(struct rt6_info *rt)
1264{
86872cb5 1265 return __ip6_del_rt(rt, NULL);
e0a1ad73
TG
1266}
1267
86872cb5 1268static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1269{
c71099ac 1270 struct fib6_table *table;
1da177e4
LT
1271 struct fib6_node *fn;
1272 struct rt6_info *rt;
1273 int err = -ESRCH;
1274
86872cb5 1275 table = fib6_get_table(cfg->fc_table);
c71099ac
TG
1276 if (table == NULL)
1277 return err;
1278
1279 read_lock_bh(&table->tb6_lock);
1da177e4 1280
c71099ac 1281 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1282 &cfg->fc_dst, cfg->fc_dst_len,
1283 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1284
1da177e4 1285 if (fn) {
7cc48263 1286 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
86872cb5 1287 if (cfg->fc_ifindex &&
1da177e4 1288 (rt->rt6i_dev == NULL ||
86872cb5 1289 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1290 continue;
86872cb5
TG
1291 if (cfg->fc_flags & RTF_GATEWAY &&
1292 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1293 continue;
86872cb5 1294 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1295 continue;
1296 dst_hold(&rt->u.dst);
c71099ac 1297 read_unlock_bh(&table->tb6_lock);
1da177e4 1298
86872cb5 1299 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1300 }
1301 }
c71099ac 1302 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1303
1304 return err;
1305}
1306
1307/*
1308 * Handle redirects
1309 */
a6279458
YH
1310struct ip6rd_flowi {
1311 struct flowi fl;
1312 struct in6_addr gateway;
1313};
1314
1315static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1316 struct flowi *fl,
1317 int flags)
1da177e4 1318{
a6279458
YH
1319 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1320 struct rt6_info *rt;
e843b9e1 1321 struct fib6_node *fn;
c71099ac 1322
1da177e4 1323 /*
e843b9e1
YH
1324 * Get the "current" route for this destination and
1325 * check if the redirect has come from approriate router.
1326 *
1327 * RFC 2461 specifies that redirects should only be
1328 * accepted if they come from the nexthop to the target.
1329 * Due to the way the routes are chosen, this notion
1330 * is a bit fuzzy and one might need to check all possible
1331 * routes.
1da177e4 1332 */
1da177e4 1333
c71099ac 1334 read_lock_bh(&table->tb6_lock);
a6279458 1335 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1336restart:
7cc48263 1337 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
e843b9e1
YH
1338 /*
1339 * Current route is on-link; redirect is always invalid.
1340 *
1341 * Seems, previous statement is not true. It could
1342 * be node, which looks for us as on-link (f.e. proxy ndisc)
1343 * But then router serving it might decide, that we should
1344 * know truth 8)8) --ANK (980726).
1345 */
1346 if (rt6_check_expired(rt))
1347 continue;
1348 if (!(rt->rt6i_flags & RTF_GATEWAY))
1349 continue;
a6279458 1350 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1351 continue;
a6279458 1352 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1353 continue;
1354 break;
1355 }
a6279458 1356
cb15d9c2 1357 if (!rt)
a6279458 1358 rt = &ip6_null_entry;
cb15d9c2
YH
1359 BACKTRACK(&fl->fl6_src);
1360out:
a6279458
YH
1361 dst_hold(&rt->u.dst);
1362
c71099ac 1363 read_unlock_bh(&table->tb6_lock);
e843b9e1 1364
a6279458
YH
1365 return rt;
1366};
1367
1368static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1369 struct in6_addr *src,
1370 struct in6_addr *gateway,
1371 struct net_device *dev)
1372{
adaa70bb 1373 int flags = RT6_LOOKUP_F_HAS_SADDR;
a6279458
YH
1374 struct ip6rd_flowi rdfl = {
1375 .fl = {
1376 .oif = dev->ifindex,
1377 .nl_u = {
1378 .ip6_u = {
1379 .daddr = *dest,
1380 .saddr = *src,
1381 },
1382 },
1383 },
1384 .gateway = *gateway,
1385 };
adaa70bb
TG
1386
1387 if (rt6_need_strict(dest))
1388 flags |= RT6_LOOKUP_F_IFACE;
a6279458
YH
1389
1390 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1391}
1392
1393void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1394 struct in6_addr *saddr,
1395 struct neighbour *neigh, u8 *lladdr, int on_link)
1396{
1397 struct rt6_info *rt, *nrt = NULL;
1398 struct netevent_redirect netevent;
1399
1400 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1401
1402 if (rt == &ip6_null_entry) {
1da177e4
LT
1403 if (net_ratelimit())
1404 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1405 "for redirect target\n");
a6279458 1406 goto out;
1da177e4
LT
1407 }
1408
1da177e4
LT
1409 /*
1410 * We have finally decided to accept it.
1411 */
1412
1ab1457c 1413 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1414 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1415 NEIGH_UPDATE_F_OVERRIDE|
1416 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1417 NEIGH_UPDATE_F_ISROUTER))
1418 );
1419
1420 /*
1421 * Redirect received -> path was valid.
1422 * Look, redirects are sent only in response to data packets,
1423 * so that this nexthop apparently is reachable. --ANK
1424 */
1425 dst_confirm(&rt->u.dst);
1426
1427 /* Duplicate redirect: silently ignore. */
1428 if (neigh == rt->u.dst.neighbour)
1429 goto out;
1430
1431 nrt = ip6_rt_copy(rt);
1432 if (nrt == NULL)
1433 goto out;
1434
1435 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1436 if (on_link)
1437 nrt->rt6i_flags &= ~RTF_GATEWAY;
1438
1439 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1440 nrt->rt6i_dst.plen = 128;
1441 nrt->u.dst.flags |= DST_HOST;
1442
1443 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1444 nrt->rt6i_nexthop = neigh_clone(neigh);
1445 /* Reset pmtu, it may be better */
1446 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1447 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1448
40e22e8f 1449 if (ip6_ins_rt(nrt))
1da177e4
LT
1450 goto out;
1451
8d71740c
TT
1452 netevent.old = &rt->u.dst;
1453 netevent.new = &nrt->u.dst;
1454 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1455
1da177e4 1456 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1457 ip6_del_rt(rt);
1da177e4
LT
1458 return;
1459 }
1460
1461out:
1ab1457c 1462 dst_release(&rt->u.dst);
1da177e4
LT
1463 return;
1464}
1465
1466/*
1467 * Handle ICMP "packet too big" messages
1468 * i.e. Path MTU discovery
1469 */
1470
1471void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1472 struct net_device *dev, u32 pmtu)
1473{
1474 struct rt6_info *rt, *nrt;
1475 int allfrag = 0;
1476
1477 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1478 if (rt == NULL)
1479 return;
1480
1481 if (pmtu >= dst_mtu(&rt->u.dst))
1482 goto out;
1483
1484 if (pmtu < IPV6_MIN_MTU) {
1485 /*
1ab1457c 1486 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1487 * MTU (1280) and a fragment header should always be included
1488 * after a node receiving Too Big message reporting PMTU is
1489 * less than the IPv6 Minimum Link MTU.
1490 */
1491 pmtu = IPV6_MIN_MTU;
1492 allfrag = 1;
1493 }
1494
1495 /* New mtu received -> path was valid.
1496 They are sent only in response to data packets,
1497 so that this nexthop apparently is reachable. --ANK
1498 */
1499 dst_confirm(&rt->u.dst);
1500
1501 /* Host route. If it is static, it would be better
1502 not to override it, but add new one, so that
1503 when cache entry will expire old pmtu
1504 would return automatically.
1505 */
1506 if (rt->rt6i_flags & RTF_CACHE) {
1507 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1508 if (allfrag)
1509 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1510 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1511 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1512 goto out;
1513 }
1514
1515 /* Network route.
1516 Two cases are possible:
1517 1. It is connected route. Action: COW
1518 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1519 */
d5315b50 1520 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1521 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1522 else
1523 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1524
d5315b50 1525 if (nrt) {
a1e78363
YH
1526 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1527 if (allfrag)
1528 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1529
1530 /* According to RFC 1981, detecting PMTU increase shouldn't be
1531 * happened within 5 mins, the recommended timer is 10 mins.
1532 * Here this route expiration time is set to ip6_rt_mtu_expires
1533 * which is 10 mins. After 10 mins the decreased pmtu is expired
1534 * and detecting PMTU increase will be automatically happened.
1535 */
1536 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1537 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1538
40e22e8f 1539 ip6_ins_rt(nrt);
1da177e4 1540 }
1da177e4
LT
1541out:
1542 dst_release(&rt->u.dst);
1543}
1544
1545/*
1546 * Misc support functions
1547 */
1548
1549static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1550{
1551 struct rt6_info *rt = ip6_dst_alloc();
1552
1553 if (rt) {
1554 rt->u.dst.input = ort->u.dst.input;
1555 rt->u.dst.output = ort->u.dst.output;
1556
1557 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
22e1e4d8 1558 rt->u.dst.error = ort->u.dst.error;
1da177e4
LT
1559 rt->u.dst.dev = ort->u.dst.dev;
1560 if (rt->u.dst.dev)
1561 dev_hold(rt->u.dst.dev);
1562 rt->rt6i_idev = ort->rt6i_idev;
1563 if (rt->rt6i_idev)
1564 in6_dev_hold(rt->rt6i_idev);
1565 rt->u.dst.lastuse = jiffies;
1566 rt->rt6i_expires = 0;
1567
1568 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1569 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1570 rt->rt6i_metric = 0;
1571
1572 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1573#ifdef CONFIG_IPV6_SUBTREES
1574 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1575#endif
c71099ac 1576 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1577 }
1578 return rt;
1579}
1580
70ceb4f5
YH
1581#ifdef CONFIG_IPV6_ROUTE_INFO
1582static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1583 struct in6_addr *gwaddr, int ifindex)
1584{
1585 struct fib6_node *fn;
1586 struct rt6_info *rt = NULL;
c71099ac
TG
1587 struct fib6_table *table;
1588
1589 table = fib6_get_table(RT6_TABLE_INFO);
1590 if (table == NULL)
1591 return NULL;
70ceb4f5 1592
c71099ac
TG
1593 write_lock_bh(&table->tb6_lock);
1594 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1595 if (!fn)
1596 goto out;
1597
7cc48263 1598 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
70ceb4f5
YH
1599 if (rt->rt6i_dev->ifindex != ifindex)
1600 continue;
1601 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1602 continue;
1603 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1604 continue;
1605 dst_hold(&rt->u.dst);
1606 break;
1607 }
1608out:
c71099ac 1609 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1610 return rt;
1611}
1612
1613static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1614 struct in6_addr *gwaddr, int ifindex,
1615 unsigned pref)
1616{
86872cb5
TG
1617 struct fib6_config cfg = {
1618 .fc_table = RT6_TABLE_INFO,
1619 .fc_metric = 1024,
1620 .fc_ifindex = ifindex,
1621 .fc_dst_len = prefixlen,
1622 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1623 RTF_UP | RTF_PREF(pref),
1624 };
1625
1626 ipv6_addr_copy(&cfg.fc_dst, prefix);
1627 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1628
e317da96
YH
1629 /* We should treat it as a default route if prefix length is 0. */
1630 if (!prefixlen)
86872cb5 1631 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1632
86872cb5 1633 ip6_route_add(&cfg);
70ceb4f5
YH
1634
1635 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1636}
1637#endif
1638
1da177e4 1639struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1640{
1da177e4 1641 struct rt6_info *rt;
c71099ac 1642 struct fib6_table *table;
1da177e4 1643
c71099ac
TG
1644 table = fib6_get_table(RT6_TABLE_DFLT);
1645 if (table == NULL)
1646 return NULL;
1da177e4 1647
c71099ac 1648 write_lock_bh(&table->tb6_lock);
7cc48263 1649 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1da177e4 1650 if (dev == rt->rt6i_dev &&
045927ff 1651 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1652 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1653 break;
1654 }
1655 if (rt)
1656 dst_hold(&rt->u.dst);
c71099ac 1657 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1658 return rt;
1659}
1660
1661struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1662 struct net_device *dev,
1663 unsigned int pref)
1da177e4 1664{
86872cb5
TG
1665 struct fib6_config cfg = {
1666 .fc_table = RT6_TABLE_DFLT,
1667 .fc_metric = 1024,
1668 .fc_ifindex = dev->ifindex,
1669 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1670 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1671 };
1da177e4 1672
86872cb5 1673 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1674
86872cb5 1675 ip6_route_add(&cfg);
1da177e4 1676
1da177e4
LT
1677 return rt6_get_dflt_router(gwaddr, dev);
1678}
1679
1680void rt6_purge_dflt_routers(void)
1681{
1682 struct rt6_info *rt;
c71099ac
TG
1683 struct fib6_table *table;
1684
1685 /* NOTE: Keep consistent with rt6_get_dflt_router */
1686 table = fib6_get_table(RT6_TABLE_DFLT);
1687 if (table == NULL)
1688 return;
1da177e4
LT
1689
1690restart:
c71099ac 1691 read_lock_bh(&table->tb6_lock);
7cc48263 1692 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1da177e4
LT
1693 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1694 dst_hold(&rt->u.dst);
c71099ac 1695 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1696 ip6_del_rt(rt);
1da177e4
LT
1697 goto restart;
1698 }
1699 }
c71099ac 1700 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1701}
1702
86872cb5
TG
1703static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1704 struct fib6_config *cfg)
1705{
1706 memset(cfg, 0, sizeof(*cfg));
1707
1708 cfg->fc_table = RT6_TABLE_MAIN;
1709 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1710 cfg->fc_metric = rtmsg->rtmsg_metric;
1711 cfg->fc_expires = rtmsg->rtmsg_info;
1712 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1713 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1714 cfg->fc_flags = rtmsg->rtmsg_flags;
1715
1716 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1717 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1718 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1719}
1720
1da177e4
LT
1721int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1722{
86872cb5 1723 struct fib6_config cfg;
1da177e4
LT
1724 struct in6_rtmsg rtmsg;
1725 int err;
1726
1727 switch(cmd) {
1728 case SIOCADDRT: /* Add a route */
1729 case SIOCDELRT: /* Delete a route */
1730 if (!capable(CAP_NET_ADMIN))
1731 return -EPERM;
1732 err = copy_from_user(&rtmsg, arg,
1733 sizeof(struct in6_rtmsg));
1734 if (err)
1735 return -EFAULT;
86872cb5
TG
1736
1737 rtmsg_to_fib6_config(&rtmsg, &cfg);
1738
1da177e4
LT
1739 rtnl_lock();
1740 switch (cmd) {
1741 case SIOCADDRT:
86872cb5 1742 err = ip6_route_add(&cfg);
1da177e4
LT
1743 break;
1744 case SIOCDELRT:
86872cb5 1745 err = ip6_route_del(&cfg);
1da177e4
LT
1746 break;
1747 default:
1748 err = -EINVAL;
1749 }
1750 rtnl_unlock();
1751
1752 return err;
3ff50b79 1753 }
1da177e4
LT
1754
1755 return -EINVAL;
1756}
1757
1758/*
1759 * Drop the packet on the floor
1760 */
1761
612f09e8
YH
1762static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1763 int ipstats_mib_noroutes)
1da177e4 1764{
612f09e8
YH
1765 int type;
1766 switch (ipstats_mib_noroutes) {
1767 case IPSTATS_MIB_INNOROUTES:
0660e03f 1768 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
612f09e8
YH
1769 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1770 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1771 break;
1772 }
1773 /* FALLTHROUGH */
1774 case IPSTATS_MIB_OUTNOROUTES:
1775 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1776 break;
1777 }
9ce8ade0 1778 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1da177e4
LT
1779 kfree_skb(skb);
1780 return 0;
1781}
1782
9ce8ade0
TG
1783static int ip6_pkt_discard(struct sk_buff *skb)
1784{
612f09e8 1785 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1786}
1787
20380731 1788static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1789{
1790 skb->dev = skb->dst->dev;
612f09e8 1791 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1792}
1793
6723ab54
DM
1794#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1795
9ce8ade0
TG
1796static int ip6_pkt_prohibit(struct sk_buff *skb)
1797{
612f09e8 1798 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1799}
1800
1801static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1802{
1803 skb->dev = skb->dst->dev;
612f09e8 1804 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1805}
1806
6723ab54
DM
1807#endif
1808
1da177e4
LT
1809/*
1810 * Allocate a dst for local (unicast / anycast) address.
1811 */
1812
1813struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1814 const struct in6_addr *addr,
1815 int anycast)
1816{
1817 struct rt6_info *rt = ip6_dst_alloc();
1818
1819 if (rt == NULL)
1820 return ERR_PTR(-ENOMEM);
1821
2774c7ab 1822 dev_hold(init_net.loopback_dev);
1da177e4
LT
1823 in6_dev_hold(idev);
1824
1825 rt->u.dst.flags = DST_HOST;
1826 rt->u.dst.input = ip6_input;
1827 rt->u.dst.output = ip6_output;
2774c7ab 1828 rt->rt6i_dev = init_net.loopback_dev;
1da177e4
LT
1829 rt->rt6i_idev = idev;
1830 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1831 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1832 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1833 rt->u.dst.obsolete = -1;
1834
1835 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1836 if (anycast)
1837 rt->rt6i_flags |= RTF_ANYCAST;
1838 else
1da177e4
LT
1839 rt->rt6i_flags |= RTF_LOCAL;
1840 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1841 if (rt->rt6i_nexthop == NULL) {
40aa7b90 1842 dst_free(&rt->u.dst);
1da177e4
LT
1843 return ERR_PTR(-ENOMEM);
1844 }
1845
1846 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1847 rt->rt6i_dst.plen = 128;
c71099ac 1848 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1da177e4
LT
1849
1850 atomic_set(&rt->u.dst.__refcnt, 1);
1851
1852 return rt;
1853}
1854
1855static int fib6_ifdown(struct rt6_info *rt, void *arg)
1856{
1857 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1858 rt != &ip6_null_entry) {
1859 RT6_TRACE("deleted by ifdown %p\n", rt);
1860 return -1;
1861 }
1862 return 0;
1863}
1864
1865void rt6_ifdown(struct net_device *dev)
1866{
c71099ac 1867 fib6_clean_all(fib6_ifdown, 0, dev);
1da177e4
LT
1868}
1869
1870struct rt6_mtu_change_arg
1871{
1872 struct net_device *dev;
1873 unsigned mtu;
1874};
1875
1876static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1877{
1878 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1879 struct inet6_dev *idev;
1880
1881 /* In IPv6 pmtu discovery is not optional,
1882 so that RTAX_MTU lock cannot disable it.
1883 We still use this lock to block changes
1884 caused by addrconf/ndisc.
1885 */
1886
1887 idev = __in6_dev_get(arg->dev);
1888 if (idev == NULL)
1889 return 0;
1890
1891 /* For administrative MTU increase, there is no way to discover
1892 IPv6 PMTU increase, so PMTU increase should be updated here.
1893 Since RFC 1981 doesn't include administrative MTU increase
1894 update PMTU increase is a MUST. (i.e. jumbo frame)
1895 */
1896 /*
1897 If new MTU is less than route PMTU, this new MTU will be the
1898 lowest MTU in the path, update the route PMTU to reflect PMTU
1899 decreases; if new MTU is greater than route PMTU, and the
1900 old MTU is the lowest MTU in the path, update the route PMTU
1901 to reflect the increase. In this case if the other nodes' MTU
1902 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1903 PMTU discouvery.
1904 */
1905 if (rt->rt6i_dev == arg->dev &&
1906 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1ab1457c
YH
1907 (dst_mtu(&rt->u.dst) > arg->mtu ||
1908 (dst_mtu(&rt->u.dst) < arg->mtu &&
566cfd8f 1909 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1da177e4 1910 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
566cfd8f
SA
1911 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1912 }
1da177e4
LT
1913 return 0;
1914}
1915
1916void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1917{
c71099ac
TG
1918 struct rt6_mtu_change_arg arg = {
1919 .dev = dev,
1920 .mtu = mtu,
1921 };
1da177e4 1922
c71099ac 1923 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1da177e4
LT
1924}
1925
ef7c79ed 1926static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 1927 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 1928 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 1929 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
1930 [RTA_PRIORITY] = { .type = NLA_U32 },
1931 [RTA_METRICS] = { .type = NLA_NESTED },
1932};
1933
1934static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1935 struct fib6_config *cfg)
1da177e4 1936{
86872cb5
TG
1937 struct rtmsg *rtm;
1938 struct nlattr *tb[RTA_MAX+1];
1939 int err;
1da177e4 1940
86872cb5
TG
1941 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1942 if (err < 0)
1943 goto errout;
1da177e4 1944
86872cb5
TG
1945 err = -EINVAL;
1946 rtm = nlmsg_data(nlh);
1947 memset(cfg, 0, sizeof(*cfg));
1948
1949 cfg->fc_table = rtm->rtm_table;
1950 cfg->fc_dst_len = rtm->rtm_dst_len;
1951 cfg->fc_src_len = rtm->rtm_src_len;
1952 cfg->fc_flags = RTF_UP;
1953 cfg->fc_protocol = rtm->rtm_protocol;
1954
1955 if (rtm->rtm_type == RTN_UNREACHABLE)
1956 cfg->fc_flags |= RTF_REJECT;
1957
1958 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1959 cfg->fc_nlinfo.nlh = nlh;
1960
1961 if (tb[RTA_GATEWAY]) {
1962 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1963 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 1964 }
86872cb5
TG
1965
1966 if (tb[RTA_DST]) {
1967 int plen = (rtm->rtm_dst_len + 7) >> 3;
1968
1969 if (nla_len(tb[RTA_DST]) < plen)
1970 goto errout;
1971
1972 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 1973 }
86872cb5
TG
1974
1975 if (tb[RTA_SRC]) {
1976 int plen = (rtm->rtm_src_len + 7) >> 3;
1977
1978 if (nla_len(tb[RTA_SRC]) < plen)
1979 goto errout;
1980
1981 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 1982 }
86872cb5
TG
1983
1984 if (tb[RTA_OIF])
1985 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1986
1987 if (tb[RTA_PRIORITY])
1988 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1989
1990 if (tb[RTA_METRICS]) {
1991 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1992 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 1993 }
86872cb5
TG
1994
1995 if (tb[RTA_TABLE])
1996 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1997
1998 err = 0;
1999errout:
2000 return err;
1da177e4
LT
2001}
2002
c127ea2c 2003static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2004{
86872cb5
TG
2005 struct fib6_config cfg;
2006 int err;
1da177e4 2007
86872cb5
TG
2008 err = rtm_to_fib6_config(skb, nlh, &cfg);
2009 if (err < 0)
2010 return err;
2011
2012 return ip6_route_del(&cfg);
1da177e4
LT
2013}
2014
c127ea2c 2015static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2016{
86872cb5
TG
2017 struct fib6_config cfg;
2018 int err;
1da177e4 2019
86872cb5
TG
2020 err = rtm_to_fib6_config(skb, nlh, &cfg);
2021 if (err < 0)
2022 return err;
2023
2024 return ip6_route_add(&cfg);
1da177e4
LT
2025}
2026
339bf98f
TG
2027static inline size_t rt6_nlmsg_size(void)
2028{
2029 return NLMSG_ALIGN(sizeof(struct rtmsg))
2030 + nla_total_size(16) /* RTA_SRC */
2031 + nla_total_size(16) /* RTA_DST */
2032 + nla_total_size(16) /* RTA_GATEWAY */
2033 + nla_total_size(16) /* RTA_PREFSRC */
2034 + nla_total_size(4) /* RTA_TABLE */
2035 + nla_total_size(4) /* RTA_IIF */
2036 + nla_total_size(4) /* RTA_OIF */
2037 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2038 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2039 + nla_total_size(sizeof(struct rta_cacheinfo));
2040}
2041
1da177e4 2042static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2043 struct in6_addr *dst, struct in6_addr *src,
2044 int iif, int type, u32 pid, u32 seq,
2045 int prefix, unsigned int flags)
1da177e4
LT
2046{
2047 struct rtmsg *rtm;
2d7202bf 2048 struct nlmsghdr *nlh;
e3703b3d 2049 long expires;
9e762a4a 2050 u32 table;
1da177e4
LT
2051
2052 if (prefix) { /* user wants prefix routes only */
2053 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2054 /* success since this is not a prefix route */
2055 return 1;
2056 }
2057 }
2058
2d7202bf
TG
2059 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2060 if (nlh == NULL)
26932566 2061 return -EMSGSIZE;
2d7202bf
TG
2062
2063 rtm = nlmsg_data(nlh);
1da177e4
LT
2064 rtm->rtm_family = AF_INET6;
2065 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2066 rtm->rtm_src_len = rt->rt6i_src.plen;
2067 rtm->rtm_tos = 0;
c71099ac 2068 if (rt->rt6i_table)
9e762a4a 2069 table = rt->rt6i_table->tb6_id;
c71099ac 2070 else
9e762a4a
PM
2071 table = RT6_TABLE_UNSPEC;
2072 rtm->rtm_table = table;
2d7202bf 2073 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2074 if (rt->rt6i_flags&RTF_REJECT)
2075 rtm->rtm_type = RTN_UNREACHABLE;
2076 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2077 rtm->rtm_type = RTN_LOCAL;
2078 else
2079 rtm->rtm_type = RTN_UNICAST;
2080 rtm->rtm_flags = 0;
2081 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2082 rtm->rtm_protocol = rt->rt6i_protocol;
2083 if (rt->rt6i_flags&RTF_DYNAMIC)
2084 rtm->rtm_protocol = RTPROT_REDIRECT;
2085 else if (rt->rt6i_flags & RTF_ADDRCONF)
2086 rtm->rtm_protocol = RTPROT_KERNEL;
2087 else if (rt->rt6i_flags&RTF_DEFAULT)
2088 rtm->rtm_protocol = RTPROT_RA;
2089
2090 if (rt->rt6i_flags&RTF_CACHE)
2091 rtm->rtm_flags |= RTM_F_CLONED;
2092
2093 if (dst) {
2d7202bf 2094 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2095 rtm->rtm_dst_len = 128;
1da177e4 2096 } else if (rtm->rtm_dst_len)
2d7202bf 2097 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2098#ifdef CONFIG_IPV6_SUBTREES
2099 if (src) {
2d7202bf 2100 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2101 rtm->rtm_src_len = 128;
1da177e4 2102 } else if (rtm->rtm_src_len)
2d7202bf 2103 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4
LT
2104#endif
2105 if (iif)
2d7202bf 2106 NLA_PUT_U32(skb, RTA_IIF, iif);
1da177e4
LT
2107 else if (dst) {
2108 struct in6_addr saddr_buf;
2109 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2d7202bf 2110 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2111 }
2d7202bf 2112
1da177e4 2113 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2114 goto nla_put_failure;
2115
1da177e4 2116 if (rt->u.dst.neighbour)
2d7202bf
TG
2117 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2118
1da177e4 2119 if (rt->u.dst.dev)
2d7202bf
TG
2120 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2121
2122 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d
TG
2123
2124 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2125 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2126 expires, rt->u.dst.error) < 0)
2127 goto nla_put_failure;
2d7202bf
TG
2128
2129 return nlmsg_end(skb, nlh);
2130
2131nla_put_failure:
26932566
PM
2132 nlmsg_cancel(skb, nlh);
2133 return -EMSGSIZE;
1da177e4
LT
2134}
2135
1b43af54 2136int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2137{
2138 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2139 int prefix;
2140
2d7202bf
TG
2141 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2142 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2143 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2144 } else
2145 prefix = 0;
2146
2147 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2148 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 2149 prefix, NLM_F_MULTI);
1da177e4
LT
2150}
2151
c127ea2c 2152static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2153{
ab364a6f
TG
2154 struct nlattr *tb[RTA_MAX+1];
2155 struct rt6_info *rt;
1da177e4 2156 struct sk_buff *skb;
ab364a6f 2157 struct rtmsg *rtm;
1da177e4 2158 struct flowi fl;
ab364a6f 2159 int err, iif = 0;
1da177e4 2160
ab364a6f
TG
2161 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2162 if (err < 0)
2163 goto errout;
1da177e4 2164
ab364a6f 2165 err = -EINVAL;
1da177e4 2166 memset(&fl, 0, sizeof(fl));
1da177e4 2167
ab364a6f
TG
2168 if (tb[RTA_SRC]) {
2169 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2170 goto errout;
2171
2172 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2173 }
2174
2175 if (tb[RTA_DST]) {
2176 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2177 goto errout;
2178
2179 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2180 }
2181
2182 if (tb[RTA_IIF])
2183 iif = nla_get_u32(tb[RTA_IIF]);
2184
2185 if (tb[RTA_OIF])
2186 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2187
2188 if (iif) {
2189 struct net_device *dev;
881d966b 2190 dev = __dev_get_by_index(&init_net, iif);
1da177e4
LT
2191 if (!dev) {
2192 err = -ENODEV;
ab364a6f 2193 goto errout;
1da177e4
LT
2194 }
2195 }
2196
ab364a6f
TG
2197 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2198 if (skb == NULL) {
2199 err = -ENOBUFS;
2200 goto errout;
2201 }
1da177e4 2202
ab364a6f
TG
2203 /* Reserve room for dummy headers, this skb can pass
2204 through good chunk of routing engine.
2205 */
459a98ed 2206 skb_reset_mac_header(skb);
ab364a6f 2207 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2208
ab364a6f 2209 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
1da177e4
LT
2210 skb->dst = &rt->u.dst;
2211
ab364a6f 2212 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2213 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 2214 nlh->nlmsg_seq, 0, 0);
1da177e4 2215 if (err < 0) {
ab364a6f
TG
2216 kfree_skb(skb);
2217 goto errout;
1da177e4
LT
2218 }
2219
2942e900 2220 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
ab364a6f 2221errout:
1da177e4 2222 return err;
1da177e4
LT
2223}
2224
86872cb5 2225void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2226{
2227 struct sk_buff *skb;
86872cb5
TG
2228 u32 pid = 0, seq = 0;
2229 struct nlmsghdr *nlh = NULL;
21713ebc
TG
2230 int err = -ENOBUFS;
2231
86872cb5
TG
2232 if (info) {
2233 pid = info->pid;
2234 nlh = info->nlh;
2235 if (nlh)
2236 seq = nlh->nlmsg_seq;
2237 }
2238
339bf98f 2239 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2240 if (skb == NULL)
2241 goto errout;
2242
2243 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
26932566
PM
2244 if (err < 0) {
2245 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2246 WARN_ON(err == -EMSGSIZE);
2247 kfree_skb(skb);
2248 goto errout;
2249 }
21713ebc
TG
2250 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2251errout:
2252 if (err < 0)
2253 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2254}
2255
2256/*
2257 * /proc
2258 */
2259
2260#ifdef CONFIG_PROC_FS
2261
2262#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2263
2264struct rt6_proc_arg
2265{
2266 char *buffer;
2267 int offset;
2268 int length;
2269 int skip;
2270 int len;
2271};
2272
2273static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2274{
33120b30 2275 struct seq_file *m = p_arg;
1da177e4 2276
33120b30
AD
2277 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2278 rt->rt6i_dst.plen);
1da177e4
LT
2279
2280#ifdef CONFIG_IPV6_SUBTREES
33120b30
AD
2281 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2282 rt->rt6i_src.plen);
1da177e4 2283#else
33120b30 2284 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2285#endif
2286
2287 if (rt->rt6i_nexthop) {
33120b30
AD
2288 seq_printf(m, NIP6_SEQFMT,
2289 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
1da177e4 2290 } else {
33120b30 2291 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2292 }
33120b30
AD
2293 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2294 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2295 rt->u.dst.__use, rt->rt6i_flags,
2296 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2297 return 0;
2298}
2299
33120b30 2300static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2301{
33120b30
AD
2302 fib6_clean_all(rt6_info_route, 0, m);
2303 return 0;
2304}
1da177e4 2305
33120b30
AD
2306static int ipv6_route_open(struct inode *inode, struct file *file)
2307{
2308 return single_open(file, ipv6_route_show, NULL);
1da177e4
LT
2309}
2310
33120b30
AD
2311static const struct file_operations ipv6_route_proc_fops = {
2312 .owner = THIS_MODULE,
2313 .open = ipv6_route_open,
2314 .read = seq_read,
2315 .llseek = seq_lseek,
2316 .release = single_release,
2317};
2318
1da177e4
LT
2319static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2320{
2321 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2322 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2323 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2324 rt6_stats.fib_rt_cache,
2325 atomic_read(&ip6_dst_ops.entries),
2326 rt6_stats.fib_discarded_routes);
2327
2328 return 0;
2329}
2330
2331static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2332{
2333 return single_open(file, rt6_stats_seq_show, NULL);
2334}
2335
9a32144e 2336static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2337 .owner = THIS_MODULE,
2338 .open = rt6_stats_seq_open,
2339 .read = seq_read,
2340 .llseek = seq_lseek,
2341 .release = single_release,
2342};
2343#endif /* CONFIG_PROC_FS */
2344
2345#ifdef CONFIG_SYSCTL
2346
2347static int flush_delay;
2348
2349static
2350int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2351 void __user *buffer, size_t *lenp, loff_t *ppos)
2352{
2353 if (write) {
2354 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2355 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2356 return 0;
2357 } else
2358 return -EINVAL;
2359}
2360
2361ctl_table ipv6_route_table[] = {
1ab1457c 2362 {
1da177e4 2363 .procname = "flush",
1ab1457c 2364 .data = &flush_delay,
1da177e4 2365 .maxlen = sizeof(int),
89c8b3a1 2366 .mode = 0200,
1ab1457c 2367 .proc_handler = &ipv6_sysctl_rtcache_flush
1da177e4
LT
2368 },
2369 {
2370 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2371 .procname = "gc_thresh",
1ab1457c 2372 .data = &ip6_dst_ops.gc_thresh,
1da177e4
LT
2373 .maxlen = sizeof(int),
2374 .mode = 0644,
1ab1457c 2375 .proc_handler = &proc_dointvec,
1da177e4
LT
2376 },
2377 {
2378 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2379 .procname = "max_size",
1ab1457c 2380 .data = &ip6_rt_max_size,
1da177e4
LT
2381 .maxlen = sizeof(int),
2382 .mode = 0644,
1ab1457c 2383 .proc_handler = &proc_dointvec,
1da177e4
LT
2384 },
2385 {
2386 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2387 .procname = "gc_min_interval",
1ab1457c 2388 .data = &ip6_rt_gc_min_interval,
1da177e4
LT
2389 .maxlen = sizeof(int),
2390 .mode = 0644,
1ab1457c 2391 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2392 .strategy = &sysctl_jiffies,
2393 },
2394 {
2395 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2396 .procname = "gc_timeout",
1ab1457c 2397 .data = &ip6_rt_gc_timeout,
1da177e4
LT
2398 .maxlen = sizeof(int),
2399 .mode = 0644,
1ab1457c 2400 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2401 .strategy = &sysctl_jiffies,
2402 },
2403 {
2404 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2405 .procname = "gc_interval",
1ab1457c 2406 .data = &ip6_rt_gc_interval,
1da177e4
LT
2407 .maxlen = sizeof(int),
2408 .mode = 0644,
1ab1457c 2409 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2410 .strategy = &sysctl_jiffies,
2411 },
2412 {
2413 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2414 .procname = "gc_elasticity",
1ab1457c 2415 .data = &ip6_rt_gc_elasticity,
1da177e4
LT
2416 .maxlen = sizeof(int),
2417 .mode = 0644,
1ab1457c 2418 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2419 .strategy = &sysctl_jiffies,
2420 },
2421 {
2422 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2423 .procname = "mtu_expires",
1ab1457c 2424 .data = &ip6_rt_mtu_expires,
1da177e4
LT
2425 .maxlen = sizeof(int),
2426 .mode = 0644,
1ab1457c 2427 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2428 .strategy = &sysctl_jiffies,
2429 },
2430 {
2431 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2432 .procname = "min_adv_mss",
1ab1457c 2433 .data = &ip6_rt_min_advmss,
1da177e4
LT
2434 .maxlen = sizeof(int),
2435 .mode = 0644,
1ab1457c 2436 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2437 .strategy = &sysctl_jiffies,
2438 },
2439 {
2440 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2441 .procname = "gc_min_interval_ms",
1ab1457c 2442 .data = &ip6_rt_gc_min_interval,
1da177e4
LT
2443 .maxlen = sizeof(int),
2444 .mode = 0644,
1ab1457c 2445 .proc_handler = &proc_dointvec_ms_jiffies,
1da177e4
LT
2446 .strategy = &sysctl_ms_jiffies,
2447 },
2448 { .ctl_name = 0 }
2449};
2450
2451#endif
2452
2453void __init ip6_route_init(void)
2454{
e5d679f3
AD
2455 ip6_dst_ops.kmem_cachep =
2456 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
20c2df83 2457 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
14e50e57
DM
2458 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2459
1da177e4 2460 fib6_init();
33120b30 2461 proc_net_fops_create(&init_net, "ipv6_route", 0, &ipv6_route_proc_fops);
457c4cbc 2462 proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
1da177e4
LT
2463#ifdef CONFIG_XFRM
2464 xfrm6_init();
2465#endif
101367c2
TG
2466#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2467 fib6_rules_init();
2468#endif
c127ea2c
TG
2469
2470 __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
2471 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
2472 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
1da177e4
LT
2473}
2474
2475void ip6_route_cleanup(void)
2476{
101367c2
TG
2477#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2478 fib6_rules_cleanup();
2479#endif
1da177e4 2480#ifdef CONFIG_PROC_FS
457c4cbc
EB
2481 proc_net_remove(&init_net, "ipv6_route");
2482 proc_net_remove(&init_net, "rt6_stats");
1da177e4
LT
2483#endif
2484#ifdef CONFIG_XFRM
2485 xfrm6_fini();
2486#endif
2487 rt6_ifdown(NULL);
2488 fib6_gc_cleanup();
2489 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2490}